From 4dde1a79bb9003762bcded88db35987bf9ee9e97 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 18 Jan 2024 07:58:23 -0800 Subject: [PATCH 01/32] fixes to the snr-like statistics --- bin/all_sky_search/pycbc_sngls_findtrigs | 12 +------ pycbc/events/stat.py | 42 +++++++++--------------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/bin/all_sky_search/pycbc_sngls_findtrigs b/bin/all_sky_search/pycbc_sngls_findtrigs index 066cec88d5f..9cf060725c5 100644 --- a/bin/all_sky_search/pycbc_sngls_findtrigs +++ b/bin/all_sky_search/pycbc_sngls_findtrigs @@ -131,16 +131,6 @@ if args.cluster_window is not None: logging.info('Clustering events over %s s window within each template', args.cluster_window) -extra_kwargs = {} -for inputstr in args.statistic_keywords: - try: - key, value = inputstr.split(':') - extra_kwargs[key] = value - except ValueError: - err_txt = "--statistic-keywords must take input in the " \ - "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \ - "Received {}".format(args.statistic_keywords) - raise ValueError(err_txt) loudest_keep_vals = args.loudest_keep_values.strip('[]').split(',') @@ -190,7 +180,7 @@ for i, tnum in enumerate(template_ids): sds = rank_method.single(trigs)[trigger_keep_ids] stat_t = rank_method.rank_stat_single((ifo, sds), **extra_kwargs) - trigger_times = sds['end_time'] + trigger_times = trigs['end_time'][:][trigger_keep_ids] if args.cluster_window is not None: cid = coinc.cluster_over_time(stat_t, trigger_times, args.cluster_window) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index e04ba732cad..1ea58542b83 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -87,9 +87,12 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.sngl_ranking = sngl_ranking self.sngl_ranking_kwargs = {} + self.kwargs = {} for key, value in kwargs.items(): if key.startswith('sngl_ranking_'): self.sngl_ranking_kwargs[key[13:]] = value + else: + self.kwargs = value def get_file_hashes(self): """ @@ -190,8 +193,7 @@ def single(self, trigs): # pylint:disable=unused-argument err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -275,8 +277,7 @@ def single(self, trigs): """ return self.get_sngl_ranking(trigs) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -778,8 +779,7 @@ def single(self, trigs): singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -1052,8 +1052,7 @@ def single(self, trigs): return self.lognoiserate(trigs) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -1188,8 +1187,7 @@ def single(self, trigs): stat = thresh - (logr_n / self.alpharef) return numpy.array(stat, ndmin=1, dtype=numpy.float32) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for single detector candidates @@ -1329,8 +1327,7 @@ def single(self, trigs): singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -1698,8 +1695,7 @@ def single(self, trigs): singles['benchmark_logvol'] = self.benchmark_logvol[self.curr_tnum] return numpy.array(singles, ndmin=1) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for single detector candidates @@ -1991,8 +1987,7 @@ def single(self, trigs): self.curr_mchirp = min(self.curr_mchirp, float(self.mcm)) return ExpFitFgBgNormStatistic.single(self, trigs) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -2012,8 +2007,7 @@ def rank_stat_single(self, single_info, """ rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( self, - single_info, - **kwargs) + single_info) rank_sngl += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) return rank_sngl @@ -2194,8 +2188,7 @@ def logsignalrate(self, stats, shift, to_shift): return logr_s - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -2212,8 +2205,7 @@ def rank_stat_single(self, single_info, """ rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( self, - single_info, - **kwargs) + single_info) rank_sngl += self.kde_ratio() return rank_sngl @@ -2553,15 +2545,13 @@ def logsignalrate(self, stats, shift, to_shift): return ExpFitFgBgKDEStatistic.logsignalrate(self, stats, shift, to_shift) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Inherited, see docstring for ExpFitFgBgKDEStatistic.rank_stat_single """ return ExpFitFgBgKDEStatistic.rank_stat_single( self, - single_info, - **kwargs) + single_info) def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): """ From ff7731c3bd5ad30ea0756983e865578cdd972ac6 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 24 Jan 2024 03:03:05 -0800 Subject: [PATCH 02/32] Move exp_fit statistics into a modular framework --- bin/all_sky_search/pycbc_sngls_findtrigs | 1 - pycbc/events/stat.py | 1839 ++++++---------------- 2 files changed, 472 insertions(+), 1368 deletions(-) diff --git a/bin/all_sky_search/pycbc_sngls_findtrigs b/bin/all_sky_search/pycbc_sngls_findtrigs index 9cf060725c5..304a00af785 100644 --- a/bin/all_sky_search/pycbc_sngls_findtrigs +++ b/bin/all_sky_search/pycbc_sngls_findtrigs @@ -131,7 +131,6 @@ if args.cluster_window is not None: logging.info('Clustering events over %s s window within each template', args.cluster_window) - loudest_keep_vals = args.loudest_keep_values.strip('[]').split(',') threshes = [] diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 1ea58542b83..3c610cfba1d 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -38,9 +38,18 @@ logger = logging.getLogger('pycbc.events.stat') +_allowed_statistic_features = [ + 'phasetd', + 'kde', + 'dq', + 'chirp_mass', + 'sensitive_volume', + 'alpha_constant_below_thresh', + 'normalize_fit_rate', +] class Stat(object): - """Base class which should be extended to provide a coincident statistic""" + """Base class which should be extended to provide a statistic""" def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): """ @@ -92,7 +101,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): if key.startswith('sngl_ranking_'): self.sngl_ranking_kwargs[key[13:]] = value else: - self.kwargs = value + self.kwargs[key] = value def get_file_hashes(self): """ @@ -609,7 +618,7 @@ def update_file(self, key): def logsignalrate(self, stats, shift, to_shift): """ - Calculate the normalized log rate density of signals via lookup + Calculate the normalized log rate density of coincident signals via lookup Parameters ---------- @@ -836,12 +845,14 @@ def coinc_lim_for_thresh(self, sngls_list, thresh, limifo, return s1 ** 0.5 -class ExpFitStatistic(QuadratureSumStatistic): +class ExpFitStatistic(PhaseTDStatistic): """ Detection statistic using an exponential falloff noise model. Statistic approximates the negative log noise coinc rate density per template over single-ifo newsnr values. + + Extra features can be added by supplying keyword arguments when initialising """ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): @@ -859,32 +870,174 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): ifos: list of strs, not used here The list of detector names + kwargs: values and features needed for the statistic """ if not files: - raise RuntimeError("Statistic files not specified") - QuadratureSumStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) + raise RuntimeError("Files not specified") + + PhaseTDStatistic.__init__(self, sngl_ranking, files=files, + ifos=ifos, **kwargs) + # Get the single-detector rates fit files # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' parsed_attrs = [f.split('-') for f in self.files.keys()] self.bg_ifos = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'fit_coeffs')] - + (len(at) == 2 and at[1] == 'fit_coeffs')] if not len(self.bg_ifos): raise RuntimeError("None of the statistic files has the required " "attribute called {ifo}-fit_coeffs !") + # Get the single-detector rates fit values self.fits_by_tid = {} - self.alphamax = {} for i in self.bg_ifos: self.fits_by_tid[i] = self.assign_fits(i) - self.get_ref_vals(i) + # These are important for the coinc_lim_for_thresh method + # Is the single threshold a < or > limit? self.single_increasing = False + # Things for calculating the best-case scenario for signal rate + self.max_sigmasq = -numpy.inf + self.min_snr = numpy.inf + + # Some modifiers for the statistic to get it into a nice range + self.benchmark_lograte= self.kwargs.get('benchmark_lograte', -14.6) + self.min_stat = self.kwargs.get('minimum_statistic_cutoff', -30.) + + # Go through the keywords and add class information as needed: + if self.kwargs['sensitive_volume']: + # Add network sensitivity beckmark + self.single_dtype.append(('benchmark_logvol', numpy.float32)) + # benchmark_logvol is a benchmark sensitivity array + # over template id + ref_ifos = self.kwargs.get('reference_ifos', 'H1,L1').split(',') + hl_net_med_sigma = numpy.amin( + [self.fits_by_tid[ifo]['median_sigma'] + for ifo in ref_ifos], + axis=0 + ) + self.benchmark_logvol = 3. * numpy.log(hl_net_med_sigma) + self.curr_tnum = None + + if self.kwargs['dq']: + # Reweight the noise rate by the dq reweighting factor + self.dq_val_by_time = {} + self.dq_bin_by_id = {} + for k in self.files.keys(): + parsed_attrs = k.split('-') + if len(parsed_attrs) < 3: + continue + if parsed_attrs[2] == 'dq_ts_reference': + ifo = parsed_attrs[0] + dq_type = parsed_attrs[1] + dq_vals = self.assign_dq_val(k) + dq_bins = self.assign_bin_id(k) + if ifo not in self.dq_val_by_time: + self.dq_val_by_time[ifo] = {} + self.dq_bin_by_id[ifo] = {} + self.dq_val_by_time[ifo][dq_type] = dq_vals + self.dq_bin_by_id[ifo][dq_type] = dq_bins + + if self.kwargs['chirp_mass']: + # Reweight the signal rate by the chirp mass of the template + # This may be stored as a float, so cast just in case + self.mcm = float(self.kwargs.get('max_chirp_mass', numpy.inf)) + self.curr_mchirp = None + + if self.kwargs['kde']: + # Reweight the signal rate by a weighting factor from the KDE of + # a signal population normalised by expected relative rate of noise + # triggers for a template + self.kde_names = [] + self.find_kdes() + self.kde_by_tid = {} + for kname in self.kde_names: + self.assign_kdes(kname) + + def assign_bin_id(self, key): + """ + Assign bin ID values for DQ reweighting + + Assign each template id to a bin name based on a + referenced statistic file. + + Parameters + ---------- + key: str + statistic file key string + + Returns + --------- + bin_dict: dict of strs + Dictionary containing the bin name for each template id + """ + ifo = key.split('-')[0] + with h5py.File(self.files[key], 'r') as dq_file: + bin_names = dq_file.attrs['names'][:] + locs = [] + names = [] + for bin_name in bin_names: + bin_locs = dq_file[ifo + '/locs/' + bin_name][:] + locs = list(locs) + list(bin_locs.astype(int)) + names = list(names) + list([bin_name] * len(bin_locs)) + + bin_dict = dict(zip(locs, names)) + return bin_dict + + def assign_dq_val(self, key): + """ + Assign dq values to each time for every bin based on a + referenced statistic file. + + Parameters + ---------- + key: str + statistic file key string + + Returns + --------- + dq_dict: dict of {time: dq_value} dicts for each bin + Dictionary containing the mapping between the time + and the dq value for each individual bin. + + """ + ifo = key.split('-')[0] + with h5py.File(self.files[key], 'r') as dq_file: + times = dq_file[ifo + '/times'][:] + bin_names = dq_file.attrs['names'][:] + dq_dict = {} + for bin_name in bin_names: + dq_vals = dq_file[ifo + '/dq_vals/' + bin_name][:] + dq_dict[bin_name] = dict(zip(times, dq_vals)) + + return dq_dict + + def find_dq_val(self, trigs): + """Get dq values for a specific ifo and times""" + time = trigs['end_time'].astype(int) + try: + tnum = trigs.template_num + ifo = trigs.ifo + except AttributeError: + tnum = trigs['template_id'] + assert len(self.ifos) == 1 + # Should be exactly one ifo provided + ifo = self.ifos[0] + dq_val = numpy.zeros(len(time)) + if ifo in self.dq_val_by_time: + for (i, t) in enumerate(time): + for k in self.dq_val_by_time[ifo].keys(): + if isinstance(tnum, numpy.ndarray): + bin_name = self.dq_bin_by_id[ifo][k][tnum[i]] + else: + bin_name = self.dq_bin_by_id[ifo][k][tnum] + val = self.dq_val_by_time[ifo][k][bin_name][int(t)] + dq_val[i] = max(dq_val[i], val) + return dq_val + def assign_fits(self, ifo): """ - Extract fits from fit files + Extract fits from single-detector rate fit files Parameters ----------- @@ -903,13 +1056,19 @@ def assign_fits(self, ifo): # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) + analysis_time = float(coeff_file.attrs['analysis_time']) if \ + self.kwargs['normalize_fit_rate'] else 1 + fits_by_tid_dict = {} fits_by_tid_dict['smoothed_fit_coeff'] = \ coeff_file['fit_coeff'][:][tid_sort] fits_by_tid_dict['smoothed_rate_above_thresh'] = \ - coeff_file['count_above_thresh'][:][tid_sort].astype(float) + coeff_file['count_above_thresh'][:][tid_sort].astype(float) / analysis_time fits_by_tid_dict['smoothed_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) + coeff_file['count_in_template'][:][tid_sort].astype(float) / analysis_time + if self.kwargs['sensitive_volume']: + fits_by_tid_dict['median_sigma'] = \ + coeff_file['median_sigma'][:][tid_sort].astype(float) # The by-template fits may have been stored in the smoothed fits file if 'fit_by_template' in coeff_file: @@ -917,9 +1076,9 @@ def assign_fits(self, ifo): fits_by_tid_dict['fit_by_fit_coeff'] = \ coeff_fbt['fit_coeff'][:][tid_sort] fits_by_tid_dict['fit_by_rate_above_thresh'] = \ - coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) + coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) / analysis_time fits_by_tid_dict['fit_by_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) + coeff_file['count_in_template'][:][tid_sort].astype(float) / analysis_time # Keep the fit threshold in fits_by_tid fits_by_tid_dict['thresh'] = coeff_file.attrs['stat_threshold'] @@ -1006,11 +1165,46 @@ def find_fits(self, trigs): return alphai, ratei, thresh + def find_kdes(self): + """ + Find which associated files are for the KDE reweighting + """ + # The stat file attributes are hard-coded as 'signal-kde_file' + # and 'template-kde_file' + parsed_attrs = [f.split('-') for f in self.files.keys()] + self.kde_names = [at[0] for at in parsed_attrs if + (len(at) == 2 and at[1] == 'kde_file')] + assert sorted(self.kde_names) == ['signal', 'template'], \ + "Two stat files are required, they should have stat attr " \ + "'signal-kde_file' and 'template-kde_file' respectively" + + def assign_kdes(self, kname): + """ + Extract values from KDE files + + Parameters + ----------- + kname: str + Used to label the kde files. + """ + with h5py.File(self.files[kname + '-kde_file'], 'r') as kde_file: + self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:].astype(numpy.float32) + + def kde_ratio(self): + """ + Calculate the weighting factor according to the ratio of the + signal and template KDE lookup tables + """ + signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] + template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] + + return numpy.log(signal_kde / template_kde) + def lognoiserate(self, trigs): """ Calculate the log noise rate density over single-ifo ranking - Read in single trigger information, compute the ranking + Read in single trigger information, make the sngl_stat and rescale by the fitted coefficients alpha and rate Parameters @@ -1025,12 +1219,21 @@ def lognoiserate(self, trigs): """ alphai, ratei, thresh = self.find_fits(trigs) sngl_stat = self.get_sngl_ranking(trigs) - # alphai is constant of proportionality between single-ifo newsnr and - # negative log noise likelihood in given template - # ratei is rate of trigs in given template compared to average - # thresh is stat threshold used in given ifo lognoisel = - alphai * (sngl_stat - thresh) + numpy.log(alphai) + \ - numpy.log(ratei) + numpy.log(ratei) + if self.kwargs['alpha_constant_below_thresh']: + # Above the threshold we use the usual fit coefficient (alpha) + # below threshold use specified alphabelow + alphabelow = self.kwargs.get('alpha_below_thresh', 6) + bt = sngl_stat < thresh + lognoiselbt = - alphabelow * (sngl_stat - thresh) + \ + numpy.log(alphabelow) + numpy.log(ratei) + lognoisel[bt] = lognoiselbt[bt] + + if self.kwargs['dq']: + # Reweight the lognoiserate things by the dq reweighting factor + lognoisel += self.find_dq_val(trigs) + return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) def single(self, trigs): @@ -1038,6 +1241,7 @@ def single(self, trigs): Calculate the necessary single detector information In this case the ranking rescaled (see the lognoiserate method here). + with the phase, end time, SNR values added in. Parameters ---------- @@ -1050,11 +1254,86 @@ def single(self, trigs): The array of single detector values """ - return self.lognoiserate(trigs) + # single-ifo stat = log of noise rate + sngl_stat = self.lognoiserate(trigs) + # populate other fields to calculate phase/time/amp consistency + singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) + singles['snglstat'] = sngl_stat + singles['coa_phase'] = trigs['coa_phase'][:] + singles['end_time'] = trigs['end_time'][:] + singles['snr'] = trigs['snr'][:] + # Save info about best-case scenario for use later + self.min_snr = min(singles['snr'].min(), self.min_snr) + + if self.kwargs['sensitive_volume']: + # populate fields to allow sensitive volume factor calculation + singles['sigmasq'] = trigs['sigmasq'][:] + try: + # exists if accessed via coinc_findtrigs + self.curr_tnum = trigs.template_num + except AttributeError: + # exists for SingleDetTriggers + self.curr_tnum = trigs['template_id'] + # Should only be one ifo fit file provided + assert len(self.ifos) == 1 + # Store benchmark log volume as single-ifo information since + # the ranking methods do not have access to template id + singles['benchmark_logvol'] = \ + self.benchmark_logvol[self.curr_tnum] + + # Save info about the best-case scenario for use later: + max_sigsq = numpy.max(singles['sigmasq']) + self.max_sigmasq = max(max_sigsq, self.max_sigmasq) + + if self.kwargs['chirp_mass']: + from pycbc.conversions import mchirp_from_mass1_mass2 + self.curr_mchirp = mchirp_from_mass1_mass2(trigs.param['mass1'], + trigs.param['mass2']) + return numpy.array(singles, ndmin=1) + + def sensitive_volume_factor(self, sngls): + # Network sensitivity for a given coinc type is approximately + # determined by the least sensitive ifo + network_sigmasq = numpy.amin( + [sngl[1]['sigmasq'] for sngl in sngls], + axis=0 + ) + # Volume \propto sigma^3 or sigmasq^1.5 + network_logvol = 1.5 * numpy.log(network_sigmasq) + # Get benchmark log volume as single-ifo information : + # benchmark_logvol for a given template is not ifo-dependent, so + # choose the first ifo for convenience + benchmark_logvol = sngls[0][1]['benchmark_logvol'] + network_logvol -= benchmark_logvol + + return network_logvol + + def logsignalrate_shared(self, sngls_info): + """ + Calculate the parts of the log signal rate which are shared by + both the single and coinc ranking statistics + """ + # Other features affecting the signal rate + sr_factor = 0 + if self.kwargs['sensitive_volume']: + # Sensitive volume - this is proportional to signal rate + # assuming a homogeneous universe + sr_factor += self.sensitive_volume_factor(sngls_info) + + if self.kwargs['chirp_mass']: + # chirp mass reweighting + mchirp = min(self.curr_mchirp, self.mcm) + sr_factor += numpy.log((mchirp / 20.) ** (11. / 3.)) + + if self.kwargs['kde']: + # KDE reweighting + sr_factor += self.kde_ratio() + + return sr_factor def rank_stat_single(self, single_info): """ - Calculate the statistic for a single detector candidate + Calculate the statistic for single detector candidates Parameters ---------- @@ -1067,61 +1346,136 @@ def rank_stat_single(self, single_info): numpy.ndarray The array of single detector statistics """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) + sngls = single_info[1] + + # Basic noise rate: the exp fit rate from the single statistic + ln_noise_rate = sngls['snglstat'] + ln_noise_rate -= self.benchmark_lograte + + # Basic signal rate: snr ** -4 + ln_s = -4 * numpy.log(sngls['snr'] / self.ref_snr) + # Add in the feature-dependent terms to the signal rate + ln_s += self.logsignalrate_shared((single_info,)) + + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate + + # cut off underflowing and very small values + loglr[loglr < self.min_stat] = self.min_stat + return loglr def rank_stat_coinc(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) + # ranking statistic is -ln(expected rate density of noise triggers) + # plus normalization constant + sngl_dict = {sngl[0]: sngl[1]['snglstat'] for sngl in s} + + # Basic noise rate: sum of noise rates multiplied by the + # window they can form coincidences in + ln_noise_rate = coinc_rate.combination_noise_lograte( + sngl_dict, kwargs['time_addition']) + ln_noise_rate -= self.benchmark_lograte + + # Basic option is not to have any signal-based assumptions, + # so this is zero to begin with + ln_s = 0 + + if self.kwargs['phasetd']: + # Find total volume of phase-time-amplitude space occupied by + # noise coincs, so that the logsignalrate function is properly + # normalized + # Extent of time-difference space occupied + noise_twindow = coinc_rate.multiifo_noise_coincident_area( + self.hist_ifos, + kwargs['time_addition'] + ) + # Volume is the allowed time difference window, multiplied by 2pi for + # each phase difference dimension and by allowed range of SNR ratio + # for each SNR ratio dimension : there are (n_ifos - 1) dimensions + # for both phase and SNR + n_ifos = len(self.hist_ifos) + hist_vol = noise_twindow * \ + (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ + (n_ifos - 1) + # Noise PDF is 1/volume, assuming a uniform distribution of noise + # coincs + ln_noise_rate -= numpy.log(hist_vol) + + # What is the signal pdf? + stat = {ifo: st for ifo, st in s} + ln_s += self.logsignalrate(stat, slide * step, to_shift) + + # Add in the feature-dependent terms to the signal rate + ln_s += self.logsignalrate_shared(s) + + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate + + # cut off underflowing and very small values + loglr[loglr < self.min_stat] = self.min_stat + + return loglr def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - # Keeping this here to help write the new coinc method. - def coinc_OLD(self, s0, s1, slide, step): # pylint:disable=unused-argument - """Calculate the final coinc ranking statistic""" - - # Approximate log likelihood ratio by summing single-ifo negative - # log noise likelihoods - loglr = - s0 - s1 - # add squares of threshold stat values via idealized Gaussian formula - threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] - loglr += sum([t ** 2. / 2. for t in threshes]) - # convert back to a coinc-SNR-like statistic - # via log likelihood ratio \propto rho_c^2 / 2 - return (2. * loglr) ** 0.5 - - # Keeping this here to help write the new coinc_lim method - def coinc_lim_for_thresh_OLD(self, s0, thresh): - """Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. + We are trying to get rid of as many events here at the point where + we can be confident that they will not meet ranking statistic + thresholds. - Parameters - ---------- - s0: numpy.ndarray - Single detector ranking statistic for the first detector. - thresh: float - The threshold on the coincident statistic. + The calculation here is "What is the minimum required snglstat in + the pivot IFO which could possibly pass the threshold?" - Returns - ------- - numpy.ndarray - Array of limits on the second detector single statistic to - exceed thresh. + There are a couple of points to be wary of here, e.g. in the signal + rate calculation, we take the best-case scenario. By using the + best-case for signal rate in this calculation, some events are kept + at this point which are hopeless. """ - s1 = - (thresh ** 2.) / 2. - s0 - threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] - s1 += sum([t ** 2. / 2. for t in threshes]) - return s1 + # Safety against subclassing and not rethinking this + allowed_names = ['ExpFitStatistic'] + self._check_coinc_lim_subclass(allowed_names) + + if thresh <= self.min_stat: + return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf + + # Modify the sngls so that the pivot ifo snglstats are zero + sngl_dict = {sngl[0]: sngl[1]['snglstat'] for sngl in s} + sngl_dict[limifo] = numpy.zeros(len(s[0][1])) + + # Noise rate calculated as normal. Because of the modification + # above, this is the rank_stat_coinc noise rate calculation + # minus the pivot ifo's snglstat + ln_noise_rate = coinc_rate.combination_noise_lograte( + sngl_dict, + kwargs['time_addition'] + ) + ln_noise_rate -= self.benchmark_lograte + + # Basic option is not to have any signal-based assumptions, + # so this is zero to begin with + ln_s = 0 + + if self.kwargs['phasetd']: + if not self.has_hist: + self.get_hist() + # Assume best-case scenario and use maximum signal rate + ln_s = numpy.log(self.hist_max + * (self.min_snr / self.ref_snr) ** -4.) + + # Shared info is the same as in the coinc calculation + ln_s += self.logsignalrate_shared(s) + + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate + + # From this combined rate, what is the minimum snglstat value + # in the pivot IFO needed to reach the threshold? + return loglr - thresh class ExpFitCombinedSNR(ExpFitStatistic): @@ -1262,7 +1616,7 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) -class PhaseTDExpFitStatistic(PhaseTDStatistic, ExpFitCombinedSNR): +class PhaseTDExpFitStatistic(ExpFitStatistic): """ Statistic combining exponential noise model with signal histogram PDF """ @@ -1300,1314 +1654,45 @@ def update_file(self, key): uf_phasetd = PhaseTDStatistic.update_file(self, key) return uf_exp_fit or uf_phasetd - def single(self, trigs): - """ - Calculate the necessary single detector information +statistic_dict = { + 'quadsum': QuadratureSumStatistic, + 'single_ranking_only': QuadratureSumStatistic, + 'phasetd': PhaseTDStatistic, + 'exp_fit_csnr': ExpFitCombinedSNR, + 'exp_fit': ExpFitStatistic, +} - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma and SNR values added in. - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. +def get_statistic(stat): + """ + Error-handling sugar around dict lookup for coincident statistics - Returns - ------- - numpy.ndarray - The array of single detector values - """ - # same single-ifo stat as ExpFitCombinedSNR - sngl_stat = ExpFitCombinedSNR.single(self, trigs) - singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] - return numpy.array(singles, ndmin=1) + Parameters + ---------- + stat : string + Name of the coincident statistic - def rank_stat_single(self, single_info): - """ - Calculate the statistic for a single detector candidate + Returns + ------- + class + Subclass of Stat base class - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. + Raises + ------ + RuntimeError + If the string is not recognized as corresponding to a Stat subclass + """ + try: + return statistic_dict[stat] + except KeyError: + raise RuntimeError('%s is not an available detection statistic' % stat) - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) +def insert_statistic_option_group(parser, default_ranking_statistic=None): + """ + Add ranking statistic options to the optparser object. - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - - # Keeping the old statistic code here for now to help with reimplementing - def coinc_OLD(self, s0, s1, slide, step): - # logsignalrate function inherited from PhaseTDStatistic - logr_s = self.logsignalrate(s0, s1, slide * step) - # rescale by ExpFitCombinedSNR reference slope as for sngl stat - cstat = s0['snglstat'] + s1['snglstat'] + logr_s / self.alpharef - # cut off underflowing and very small values - cstat[cstat < 8.] = 8. - # scale to resemble network SNR - return cstat / (2. ** 0.5) - - def coinc_lim_for_thresh_OLD(self, s0, thresh): - # if the threshold is below this value all triggers will - # pass because of rounding in the coinc method - if thresh <= (8. / (2. ** 0.5)): - return -1. * numpy.ones(len(s0['snglstat'])) * numpy.inf - if not self.has_hist: - self.get_hist() - # Assume best case scenario and use maximum signal rate - logr_s = self.hist_max - s1 = (2 ** 0.5) * thresh - s0['snglstat'] - logr_s / self.alpharef - return s1 - - -class ExpFitBgRateStatistic(ExpFitStatistic): - """ - Detection statistic using an exponential falloff noise model. - - Statistic calculates the log noise coinc rate for each - template over single-ifo newsnr values. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - benchmark_lograte=-14.6, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - benchmark_lograte: float, default=-14.6 - benchmark_lograte is log of a representative noise trigger rate. - The default comes from H1L1 (O2) and is 4.5e-7 Hz. - """ - - super(ExpFitBgRateStatistic, self).__init__(sngl_ranking, - files=files, ifos=ifos, - **kwargs) - self.benchmark_lograte = benchmark_lograte - - # Reassign the rate to be number per time rather than an arbitrarily - # normalised number - for ifo in self.bg_ifos: - self.reassign_rate(ifo) - - def reassign_rate(self, ifo): - """ - Reassign the rate to be number per time rather - - Reassign the rate to be number per time rather than an arbitrarily - normalised number. - - Parameters - ----------- - ifo: str - The ifo to consider. - """ - with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: - analysis_time = float(coeff_file.attrs['analysis_time']) - fbt = 'fit_by_template' in coeff_file - - self.fits_by_tid[ifo]['smoothed_rate_above_thresh'] /= analysis_time - self.fits_by_tid[ifo]['smoothed_rate_in_template'] /= analysis_time - # The by-template fits may have been stored in the smoothed fits file - if fbt: - self.fits_by_tid[ifo]['fit_by_rate_above_thresh'] /= analysis_time - self.fits_by_tid[ifo]['fit_by_rate_in_template'] /= analysis_time - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Check if the file to update is an ExpFit file - uf_expfit = ExpFitStatistic.update_file(self, key) - # If this has been updated we must do the reassign_rate step here - # on top of the file update from earlier - if uf_expfit: - # This is a fit coeff file which needs updating - # Which ifo is it? - ifo = key[:2] - self.reassign_rate(ifo) - return True - return False - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - # ranking statistic is -ln(expected rate density of noise triggers) - # plus normalization constant - sngl_dict = {sngl[0]: sngl[1] for sngl in s} - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs['time_addition']) - loglr = - ln_noise_rate + self.benchmark_lograte - return loglr - - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitBgRateStatistic'] - self._check_coinc_lim_subclass(allowed_names) - - sngl_dict = {sngl[0]: sngl[1] for sngl in s} - sngl_dict[limifo] = numpy.zeros(len(s[0][1])) - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs['time_addition']) - loglr = - thresh - ln_noise_rate + self.benchmark_lograte - return loglr - - -class ExpFitFgBgNormStatistic(PhaseTDStatistic, - ExpFitBgRateStatistic): - """ - Statistic combining PhaseTD, ExpFitBg and additional foreground info. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - reference_ifos='H1,L1', **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs - The list of detector names - reference_ifos: string of comma separated ifo prefixes - Detectors to be used as the reference network for network - sensitivity comparisons. Each must be in fits_by_tid - """ - # read in background fit info and store it - ExpFitBgRateStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - # if ifos not already set, determine via background fit info - self.ifos = self.ifos or self.bg_ifos - # PhaseTD statistic single_dtype plus network sensitivity benchmark - PhaseTDStatistic.__init__(self, sngl_ranking, files=files, - ifos=self.ifos, **kwargs) - self.single_dtype.append(('benchmark_logvol', numpy.float32)) - - for ifo in self.bg_ifos: - self.assign_median_sigma(ifo) - - self.ref_ifos = reference_ifos.split(',') - self.benchmark_logvol = None - self.assign_benchmark_logvol() - self.single_increasing = False - # Initialize variable to hold event template id(s) - self.curr_tnum = None - - def assign_median_sigma(self, ifo): - """ - Read and sort the median_sigma values from input files. - - Parameters - ---------- - ifo: str - The ifo to consider. - """ - - with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: - template_id = coeff_file['template_id'][:] - tid_sort = numpy.argsort(template_id) - self.fits_by_tid[ifo]['median_sigma'] = \ - coeff_file['median_sigma'][:][tid_sort] - - def assign_benchmark_logvol(self): - """ - Assign the benchmark log-volume used by the statistic. - This is the sensitive log-volume of each template in the - network of reference IFOs - """ - # benchmark_logvol is a benchmark sensitivity array over template id - bench_net_med_sigma = numpy.amin( - [self.fits_by_tid[ifo]['median_sigma'] for ifo in self.ref_ifos], - axis=0, - ) - self.benchmark_logvol = 3. * numpy.log(bench_net_med_sigma) - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Here we inherit the PhaseTD file checks - uf_phasetd = PhaseTDStatistic.update_file(self, key) - uf_exp_fit = ExpFitBgRateStatistic.update_file(self, key) - if uf_phasetd: - # The key to update refers to a PhaseTDStatistic file - return True - if uf_exp_fit: - # The key to update refers to a ExpFitBgRateStatistic file - # In this case we must reload some statistic information - # Which ifo is it? - ifo = key[:2] - self.assign_median_sigma(ifo) - self.assign_benchmark_logvol() - return True - return False - - def lognoiserate(self, trigs, alphabelow=6): - """ - Calculate the log noise rate density over single-ifo ranking - - Read in single trigger information, make the newsnr statistic - and rescale by the fitted coefficients alpha and rate - - Parameters - ----------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - alphabelow: float, default=6 - Use this slope to fit the noise triggers below the point at which - fits are present in the input files. - - Returns - --------- - lognoisel: numpy.array - Array of log noise rate density for each input trigger. - """ - alphai, ratei, thresh = self.find_fits(trigs) - newsnr = self.get_sngl_ranking(trigs) - # Above the threshold we use the usual fit coefficient (alpha) - # below threshold use specified alphabelow - bt = newsnr < thresh - lognoisel = - alphai * (newsnr - thresh) + numpy.log(alphai) + \ - numpy.log(ratei) - lognoiselbt = - alphabelow * (newsnr - thresh) + \ - numpy.log(alphabelow) + numpy.log(ratei) - lognoisel[bt] = lognoiselbt[bt] - return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) - - def single(self, trigs): - """ - Calculate the necessary single detector information - - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma, SNR, template_id and the - benchmark_logvol values added in. - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - Returns - ------- - numpy.ndarray - The array of single detector values - """ - try: - # exists if accessed via coinc_findtrigs - self.curr_tnum = trigs.template_num - except AttributeError: - # exists for SingleDetTriggers & pycbc_live get_coinc - self.curr_tnum = trigs['template_id'] - - # single-ifo stat = log of noise rate - sngl_stat = self.lognoiserate(trigs) - # populate other fields to calculate phase/time/amp consistency - # and sigma comparison - singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] - - # Store benchmark log volume as single-ifo information since the coinc - # method does not have access to template id - singles['benchmark_logvol'] = self.benchmark_logvol[self.curr_tnum] - return numpy.array(singles, ndmin=1) - - def rank_stat_single(self, single_info): - """ - Calculate the statistic for single detector candidates - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - sngls = single_info[1] - - ln_noise_rate = sngls['snglstat'] - ln_noise_rate -= self.benchmark_lograte - network_sigmasq = sngls['sigmasq'] - network_logvol = 1.5 * numpy.log(network_sigmasq) - benchmark_logvol = sngls['benchmark_logvol'] - network_logvol -= benchmark_logvol - ln_s = -4 * numpy.log(sngls['snr'] / self.ref_snr) - loglr = network_logvol - ln_noise_rate + ln_s - # cut off underflowing and very small values - loglr[loglr < -30.] = -30. - return loglr - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - - sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} - # Find total volume of phase-time-amplitude space occupied by - # noise coincs - if 'dets' in kwargs: - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition'], - kwargs['dets']) - # Extent of time-difference space occupied - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition'], - kwargs['dets']) - else: - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition']) - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition']) - - ln_noise_rate -= self.benchmark_lograte - - # Network sensitivity for a given coinc type is approximately - # determined by the least sensitive ifo - network_sigmasq = numpy.amin([sngl[1]['sigmasq'] for sngl in s], - axis=0) - # Volume \propto sigma^3 or sigmasq^1.5 - network_logvol = 1.5 * numpy.log(network_sigmasq) - # Get benchmark log volume as single-ifo information : - # benchmark_logvol for a given template is not ifo-dependent, so - # choose the first ifo for convenience - benchmark_logvol = s[0][1]['benchmark_logvol'] - network_logvol -= benchmark_logvol - - # Use prior histogram to get Bayes factor for signal vs noise - # given the time, phase and SNR differences between IFOs - - # First get signal PDF logr_s - stat = {ifo: st for ifo, st in s} - logr_s = self.logsignalrate(stat, slide * step, to_shift) - - # Volume is the allowed time difference window, multiplied by 2pi for - # each phase difference dimension and by allowed range of SNR ratio - # for each SNR ratio dimension : there are (n_ifos - 1) dimensions - # for both phase and SNR - n_ifos = len(self.hist_ifos) - hist_vol = noise_twindow * \ - (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ - (n_ifos - 1) - # Noise PDF is 1/volume, assuming a uniform distribution of noise - # coincs - logr_n = - numpy.log(hist_vol) - - # Combine to get final statistic: log of - # ((rate of signals / rate of noise) * PTA Bayes factor) - loglr = network_logvol - ln_noise_rate + logr_s - logr_n - - # cut off underflowing and very small values - loglr[loglr < -30.] = -30. - return loglr - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - - # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitFgBgNormStatistic', - 'ExpFitFgBgNormBBHStatistic', - 'DQExpFitFgBgNormStatistic', - 'DQExpFitFgBgKDEStatistic', - 'ExpFitFgBgKDEStatistic'] - self._check_coinc_lim_subclass(allowed_names) - - if not self.has_hist: - self.get_hist() - # if the threshold is below this value all triggers will - # pass because of rounding in the coinc method - if thresh <= -30.: - return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf - sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} - # Add limifo to singles dict so that overlap time is calculated correctly - sngl_rates[limifo] = numpy.zeros(len(s[0][1])) - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition']) - ln_noise_rate -= self.benchmark_lograte - - # Assume best case and use the maximum sigma squared from all triggers - network_sigmasq = numpy.ones(len(s[0][1])) * kwargs['max_sigmasq'] - # Volume \propto sigma^3 or sigmasq^1.5 - network_logvol = 1.5 * numpy.log(network_sigmasq) - # Get benchmark log volume as single-ifo information : - # benchmark_logvol for a given template is not ifo-dependent, so - # choose the first ifo for convenience - benchmark_logvol = s[0][1]['benchmark_logvol'] - network_logvol -= benchmark_logvol - - # Assume best case scenario and use maximum signal rate - logr_s = numpy.log(self.hist_max - * (kwargs['min_snr'] / self.ref_snr) ** -4.) - - # Find total volume of phase-time-amplitude space occupied by noise - # coincs - # Extent of time-difference space occupied - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition']) - # Volume is the allowed time difference window, multiplied by 2pi for - # each phase difference dimension and by allowed range of SNR ratio - # for each SNR ratio dimension : there are (n_ifos - 1) dimensions - # for both phase and SNR - n_ifos = len(self.hist_ifos) - hist_vol = noise_twindow * \ - (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ - (n_ifos - 1) - # Noise PDF is 1/volume, assuming a uniform distribution of noise - # coincs - logr_n = - numpy.log(hist_vol) - - loglr = - thresh + network_logvol - ln_noise_rate + logr_s - logr_n - return loglr - - -class ExpFitFgBgNormBBHStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with a mass weighting factor. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - is multiplied by a signal rate prior modelled as uniform over chirp mass. - As templates are distributed roughly according to mchirp^(-11/3) we - weight by the inverse of this. This ensures that quiet signals at high - mass where template density is sparse are not swamped by events at lower - masses where template density is high. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - max_chirp_mass=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - max_chirp_mass: float, default=None - If given, if a template's chirp mass is above this value it will - be reweighted as if it had this chirp mass. This is to avoid the - problem where the distribution fails to be accurate at high mass - and we can have a case where a single highest-mass template might - produce *all* the loudest background (and foreground) events. - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.mcm = max_chirp_mass - self.curr_mchirp = None - - def logsignalrate(self, stats, shift, to_shift): - """ - Calculate the normalized log rate density of signals via lookup - - This calls back to the Parent class and then applies the chirp mass - weighting factor. - - Parameters - ---------- - stats: list of dicts giving single-ifo quantities, ordered as - self.ifos - shift: numpy array of float, size of the time shift vector for each - coinc to be ranked - to_shift: list of int, multiple of the time shift to apply ordered - as self.ifos - - Returns - ------- - value: log of coinc signal rate density for the given single-ifo - triggers and time shifts - """ - # Model signal rate as uniform over chirp mass, background rate is - # proportional to mchirp^(-11/3) due to density of templates - logr_s = ExpFitFgBgNormStatistic.logsignalrate( - self, - stats, - shift, - to_shift - ) - logr_s += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return logr_s - - def single(self, trigs): - """ - Calculate the necessary single detector information - - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma, SNR, template_id and the - benchmark_logvol values added in. This also stored the current chirp - mass for use when computing the coinc statistic values. - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - - Returns - ------- - numpy.ndarray - The array of single detector values - """ - from pycbc.conversions import mchirp_from_mass1_mass2 - try: - mass1 = trigs.param['mass1'] - mass2 = trigs.param['mass2'] - except AttributeError: - mass1 = trigs['mass1'] - mass2 = trigs['mass2'] - self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) - - if self.mcm is not None: - # Careful - input might be a str, so cast to float - self.curr_mchirp = min(self.curr_mchirp, float(self.mcm)) - return ExpFitFgBgNormStatistic.single(self, trigs) - - def rank_stat_single(self, single_info): - """ - Calculate the statistic for a single detector candidate - - This calls back to the Parent class and then applies the chirp mass - weighting factor. - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( - self, - single_info) - rank_sngl += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return rank_sngl - - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs): - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - - if 'mchirp' in kwargs: - self.curr_mchirp = kwargs['mchirp'] - - return ExpFitFgBgNormStatistic.rank_stat_coinc(self, - sngls_list, - slide, - step, - to_shift, - **kwargs) - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - loglr = ExpFitFgBgNormStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) - loglr += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return loglr - - -class ExpFitFgBgKDEStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with an additional mass and spin weighting - factor determined by KDE statistic files. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - ratio is multiplied by the ratio of signal KDE to template KDE over some - parameters covering the bank. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.kde_names = [] - self.find_kdes() - self.kde_by_tid = {} - for kname in self.kde_names: - self.assign_kdes(kname) - - def find_kdes(self): - """ - Find which associated files are for the KDE reweighting - """ - # The stat file attributes are hard-coded as 'signal-kde_file' - # and 'template-kde_file' - parsed_attrs = [f.split('-') for f in self.files.keys()] - self.kde_names = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'kde_file')] - assert sorted(self.kde_names) == ['signal', 'template'], \ - "Two stat files are required, they should have stat attr " \ - "'signal-kde_file' and 'template-kde_file' respectively" - - def assign_kdes(self, kname): - """ - Extract values from KDE files - - Parameters - ----------- - kname: str - Used to label the kde files. - """ - with h5py.File(self.files[kname + '-kde_file'], 'r') as kde_file: - self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:] - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from ExpFitFgBgNormStatistic - uf_expfit = ExpFitFgBgNormStatistic.update_file(self, key) - if uf_expfit: - # The key to update refers to a ExpFitFgBgNormStatistic file - return True - # Is the key a KDE statistic file that we update here? - if key.endswith('kde_file'): - logger.info( - "Updating %s statistic %s file", - ''.join(self.ifos), - key - ) - kde_style = key.split('-')[0] - self.assign_kdes(kde_style) - return True - return False - - def kde_ratio(self): - """ - Calculate the weighting factor according to the ratio of the - signal and template KDE lookup tables - """ - signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] - template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] - - return numpy.log(signal_kde / template_kde) - - def logsignalrate(self, stats, shift, to_shift): - """ - Calculate the normalized log rate density of signals via lookup. - - This calls back to the parent class and then applies the ratio_kde - weighting factor. - - Parameters - ---------- - stats: list of dicts giving single-ifo quantities, ordered as - self.ifos - shift: numpy array of float, size of the time shift vector for each - coinc to be ranked - to_shift: list of int, multiple of the time shift to apply ordered - as self.ifos - - Returns - ------- - value: log of coinc signal rate density for the given single-ifo - triggers and time shifts - """ - logr_s = ExpFitFgBgNormStatistic.logsignalrate(self, stats, shift, - to_shift) - logr_s += self.kde_ratio() - - return logr_s - - def rank_stat_single(self, single_info): - """ - Calculate the statistic for a single detector candidate - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( - self, - single_info) - rank_sngl += self.kde_ratio() - return rank_sngl - - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed the - threshold for each of the input trigers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - loglr = ExpFitFgBgNormStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) - signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] - template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] - loglr += numpy.log(signal_kde / template_kde) - return loglr - - -class DQExpFitFgBgNormStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with DQ-based reranking. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - ratio is corrected via estimating relative noise trigger rates based on - the DQ time series. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.dq_rates_by_state = {} - self.dq_bin_by_tid = {} - self.dq_state_segments = None - self.low_latency = False - self.single_dtype.append(('dq_state', int)) - - for ifo in self.ifos: - key = f'{ifo}-dq_stat_info' - if key in self.files.keys(): - self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) - self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) - self.check_low_latency(key) - if not self.low_latency: - if self.dq_state_segments is None: - self.dq_state_segments = {} - self.dq_state_segments[ifo] = self.setup_segments(key) - - def check_low_latency(self, key): - """ - Check if the statistic file indicates low latency mode. - Parameters - ---------- - key: str - Statistic file key string. - Returns - ------- - None - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - ifo_grp = dq_file[ifo] - if 'dq_segments' not in ifo_grp.keys(): - # if segs are not in file, we must be in LL - if self.dq_state_segments is not None: - raise ValueError( - 'Either all dq stat files must have segments or none' - ) - self.low_latency = True - elif self.low_latency: - raise ValueError( - 'Either all dq stat files must have segments or none' - ) - - def assign_template_bins(self, key): - """ - Assign bin ID values - Assign each template id to a bin name based on a - referenced statistic file. - - Parameters - ---------- - key: str - statistic file key string - - Returns - --------- - bin_dict: dict of strs - Dictionary containing the bin name for each template id - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - tids = [] - bin_nums = [] - bin_grp = dq_file[f'{ifo}/bins'] - for bin_name in bin_grp.keys(): - bin_tids = bin_grp[f'{bin_name}/tids'][:] - tids = list(tids) + list(bin_tids.astype(int)) - bin_nums = list(bin_nums) + list([bin_name] * len(bin_tids)) - - bin_dict = dict(zip(tids, bin_nums)) - return bin_dict - - def assign_dq_rates(self, key): - """ - Assign dq values to each time for every bin based on a - referenced statistic file. - - Parameters - ---------- - key: str - statistic file key string - - Returns - --------- - dq_dict: dict of {time: dq_value} dicts for each bin - Dictionary containing the mapping between the time - and the dq value for each individual bin. - - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - bin_grp = dq_file[f'{ifo}/bins'] - dq_dict = {} - for bin_name in bin_grp.keys(): - dq_dict[bin_name] = bin_grp[f'{bin_name}/dq_rates'][:] - - return dq_dict - - def setup_segments(self, key): - """ - Store segments from stat file - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - ifo_grp = dq_file[ifo] - dq_state_segs_dict = {} - for k in ifo_grp['dq_segments'].keys(): - seg_dict = {} - seg_dict['start'] = \ - ifo_grp[f'dq_segments/{k}/segment_starts'][:] - seg_dict['end'] = \ - ifo_grp[f'dq_segments/{k}/segment_ends'][:] - dq_state_segs_dict[k] = seg_dict - - return dq_state_segs_dict - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from ExpFitFgBgNormStatistic - uf_expfit = ExpFitFgBgNormStatistic.update_file(self, key) - if uf_expfit: - # We have updated a ExpFitFgBgNormStatistic file already - return True - # We also need to check if the DQ files have updated - if key.endswith('dq_stat_info'): - ifo = key.split('-')[0] - logger.info( - "Updating %s statistic %s file", - ifo, - key - ) - self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) - self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) - return True - return False - - def find_dq_noise_rate(self, trigs): - """Get dq values for a specific ifo and dq states""" - - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs.get('ifo', None) - if ifo is None: - ifo = self.ifos[0] - assert ifo in self.ifos - - dq_state = trigs['dq_state'] - dq_val = numpy.ones(len(dq_state)) - - tnum = self.curr_tnum - if ifo in self.dq_rates_by_state: - for (i, st) in enumerate(dq_state): - if isinstance(tnum, numpy.ndarray): - bin_name = self.dq_bin_by_tid[ifo][tnum[i]] - else: - bin_name = self.dq_bin_by_tid[ifo][tnum] - dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] - return dq_val - - def find_dq_state_by_time(self, ifo, times): - """Get the dq state for an ifo at times""" - dq_state = numpy.zeros(len(times), dtype=numpy.uint8) - if ifo in self.dq_state_segments: - from pycbc.events.veto import indices_within_times - for k in self.dq_state_segments[ifo]: - starts = self.dq_state_segments[ifo][k]['start'] - ends = self.dq_state_segments[ifo][k]['end'] - inds = indices_within_times(times, starts, ends) - # states are named in file as 'dq_state_N', need to extract N - dq_state[inds] = int(k[9:]) - return dq_state - - def lognoiserate(self, trigs): - """ - Calculate the log noise rate density over single-ifo ranking - - Read in single trigger information, compute the ranking - and rescale by the fitted coefficients alpha and rate - - Parameters - ----------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - - Returns - --------- - lognoiserate: numpy.array - Array of log noise rate density for each input trigger. - """ - - dq_rate = self.find_dq_noise_rate(trigs) - dq_rate = numpy.maximum(dq_rate, 1) - - logr_n = ExpFitFgBgNormStatistic.lognoiserate( - self, trigs) - logr_n += numpy.log(dq_rate) - return logr_n - - def single(self, trigs): - # make sure every trig has a dq state - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs.get('ifo', None) - if ifo is None: - ifo = self.ifos[0] - assert ifo in self.ifos - - singles = ExpFitFgBgNormStatistic.single(self, trigs) - - if self.low_latency: - # trigs should already have a dq state assigned - singles['dq_state'] = trigs['dq_state'][:] - else: - singles['dq_state'] = self.find_dq_state_by_time( - ifo, trigs['end_time'][:] - ) - return singles - - -class DQExpFitFgBgKDEStatistic(DQExpFitFgBgNormStatistic): - """ - The ExpFitFgBgKDEStatistic with DQ-based reranking. - - This is the same as the DQExpFitFgBgNormStatistic except the signal - rate is adjusted according to the KDE statistic files - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - """ - DQExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.kde_names = [] - ExpFitFgBgKDEStatistic.find_kdes(self) - self.kde_by_tid = {} - for kname in self.kde_names: - ExpFitFgBgKDEStatistic.assign_kdes(self, kname) - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from DQExpFitFgBgNormStatistic and ExpFitFgBgKDEStatistic - uf_dq = DQExpFitFgBgNormStatistic.update_file(self, key) - uf_kde = ExpFitFgBgKDEStatistic.update_file(self, key) - return uf_dq or uf_kde - - def kde_ratio(self): - """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.kde_signalrate - """ - return ExpFitFgBgKDEStatistic.kde_ratio(self) - - def logsignalrate(self, stats, shift, to_shift): - """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.logsignalrate - """ - return ExpFitFgBgKDEStatistic.logsignalrate(self, stats, shift, - to_shift) - - def rank_stat_single(self, single_info): - """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.rank_stat_single - """ - return ExpFitFgBgKDEStatistic.rank_stat_single( - self, - single_info) - - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): - """ - Inherited, see docstring for - ExpFitFgBgKDEStatistic.coinc_lim_for_thresh - """ - return ExpFitFgBgKDEStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) - - -statistic_dict = { - 'quadsum': QuadratureSumStatistic, - 'single_ranking_only': QuadratureSumStatistic, - 'phasetd': PhaseTDStatistic, - 'exp_fit_stat': ExpFitStatistic, - 'exp_fit_csnr': ExpFitCombinedSNR, - 'phasetd_exp_fit_stat': PhaseTDExpFitStatistic, - 'dq_phasetd_exp_fit_fgbg_norm': DQExpFitFgBgNormStatistic, - 'exp_fit_bg_rate': ExpFitBgRateStatistic, - 'phasetd_exp_fit_fgbg_norm': ExpFitFgBgNormStatistic, - 'phasetd_exp_fit_fgbg_bbh_norm': ExpFitFgBgNormBBHStatistic, - 'phasetd_exp_fit_fgbg_kde': ExpFitFgBgKDEStatistic, - 'dq_phasetd_exp_fit_fgbg_kde': DQExpFitFgBgKDEStatistic, -} - - -def get_statistic(stat): - """ - Error-handling sugar around dict lookup for coincident statistics - - Parameters - ---------- - stat : string - Name of the coincident statistic - - Returns - ------- - class - Subclass of Stat base class - - Raises - ------ - RuntimeError - If the string is not recognized as corresponding to a Stat subclass - """ - try: - return statistic_dict[stat] - except KeyError: - raise RuntimeError('%s is not an available detection statistic' % stat) - - -def insert_statistic_option_group(parser, default_ranking_statistic=None): - """ - Add ranking statistic options to the optparser object. - - Adds the options used to initialize a PyCBC Stat class. + Adds the options used to initialize a PyCBC Stat class. Parameters ----------- @@ -2660,10 +1745,19 @@ def insert_statistic_option_group(parser, default_ranking_statistic=None): "KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ..." ) + statistic_opt_group.add_argument( + "--statistic-features", + nargs='*', + default=[], + choices=_allowed_statistic_features, + help="Provide additional arguments to include particular " + "features in the ranking statistic." + ) + return statistic_opt_group -def parse_statistic_keywords_opt(stat_kwarg_list): +def parse_statistic_feature_options(opts): """ Parse the list of statistic keywords into an appropriate dictionary. @@ -2681,6 +1775,7 @@ def parse_statistic_keywords_opt(stat_kwarg_list): Statistic keywords in dict format """ stat_kwarg_dict = {} + stat_kwarg_list = opts.statistic_keywords for inputstr in stat_kwarg_list: try: key, value = inputstr.split(':') @@ -2691,6 +1786,16 @@ def parse_statistic_keywords_opt(stat_kwarg_list): "Received {}".format(' '.join(stat_kwarg_list)) raise ValueError(err_txt) + # Check that the statistic keywords are allowed + for feature in opts.statistic_features: + if feature not in _allowed_statistic_features: + err_msg = f"--statistic-feature {feature} not recognised" + raise NotImplementedError(err_msg) + + # Set values for each feature key to a boolean of whether we want them + for feature in _allowed_statistic_features: + stat_kwarg_dict[feature] = feature in opts.statistic_features + return stat_kwarg_dict @@ -2726,7 +1831,7 @@ def get_statistic_from_opts(opts, ifos): isinstance(opts.statistic_files[0], list): opts.statistic_files = sum(opts.statistic_files, []) - extra_kwargs = parse_statistic_keywords_opt(opts.statistic_keywords) + extra_kwargs = parse_statistic_feature_options(opts) stat_class = get_statistic(opts.ranking_statistic)( opts.sngl_ranking, From 8cd3c21bbf10ee275c5a7537a550ecd6969dcf62 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 24 Jan 2024 03:04:02 -0800 Subject: [PATCH 03/32] remove unused statistic --- pycbc/events/stat.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 3c610cfba1d..0d7ba8959d3 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1615,45 +1615,6 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) - -class PhaseTDExpFitStatistic(ExpFitStatistic): - """ - Statistic combining exponential noise model with signal histogram PDF - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, needed here - The list of detector names - """ - # read in both foreground PDF and background fit info - ExpFitCombinedSNR.__init__(self, sngl_ranking, files=files, ifos=ifos, - **kwargs) - # need the self.single_dtype value from PhaseTDStatistic - PhaseTDStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Here we inherit the PhaseTD and ExpFit file checks, - # nothing else needs doing - uf_exp_fit = ExpFitCombinedSNR.update_file(self, key) - uf_phasetd = PhaseTDStatistic.update_file(self, key) - return uf_exp_fit or uf_phasetd - statistic_dict = { 'quadsum': QuadratureSumStatistic, 'single_ranking_only': QuadratureSumStatistic, From 7173b4876db88e86c452084dfbcf0f2e5c016400 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 24 Jan 2024 03:14:40 -0800 Subject: [PATCH 04/32] use keyword:value rather than feature for alpha below --- pycbc/events/stat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 0d7ba8959d3..1507c4a60ba 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -44,7 +44,6 @@ 'dq', 'chirp_mass', 'sensitive_volume', - 'alpha_constant_below_thresh', 'normalize_fit_rate', ] @@ -1221,10 +1220,11 @@ def lognoiserate(self, trigs): sngl_stat = self.get_sngl_ranking(trigs) lognoisel = - alphai * (sngl_stat - thresh) + numpy.log(alphai) + \ numpy.log(ratei) - if self.kwargs['alpha_constant_below_thresh']: - # Above the threshold we use the usual fit coefficient (alpha) + + if 'alpha_below_thresh' in self.kwargs: + # Above the threshold we use the usual fit coefficient (alphai) # below threshold use specified alphabelow - alphabelow = self.kwargs.get('alpha_below_thresh', 6) + alphabelow = float(self.kwargs['alpha_below_thresh']) bt = sngl_stat < thresh lognoiselbt = - alphabelow * (sngl_stat - thresh) + \ numpy.log(alphabelow) + numpy.log(ratei) From 1d2058f02bc14f7ae79d41d9c5c3dfe583fd783c Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 24 Jan 2024 04:22:32 -0800 Subject: [PATCH 05/32] Codeclimate complaints --- pycbc/events/stat.py | 62 +++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 1507c4a60ba..c91ac6e2fe7 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -47,6 +47,7 @@ 'normalize_fit_rate', ] + class Stat(object): """Base class which should be extended to provide a statistic""" @@ -201,7 +202,7 @@ def single(self, trigs): # pylint:disable=unused-argument err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_single(self, single_info): + def rank_stat_single(self, single_info): # pylint:disable=unused-argument """ Calculate the statistic for a single detector candidate @@ -617,7 +618,8 @@ def update_file(self, key): def logsignalrate(self, stats, shift, to_shift): """ - Calculate the normalized log rate density of coincident signals via lookup + Calculate the normalized log rate density of coincident signals + via lookup Parameters ---------- @@ -851,7 +853,8 @@ class ExpFitStatistic(PhaseTDStatistic): Statistic approximates the negative log noise coinc rate density per template over single-ifo newsnr values. - Extra features can be added by supplying keyword arguments when initialising + Extra features can be added by supplying keyword arguments when + initialising. """ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): @@ -900,7 +903,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.min_snr = numpy.inf # Some modifiers for the statistic to get it into a nice range - self.benchmark_lograte= self.kwargs.get('benchmark_lograte', -14.6) + self.benchmark_lograte = self.kwargs.get('benchmark_lograte', -14.6) self.min_stat = self.kwargs.get('minimum_statistic_cutoff', -30.) # Go through the keywords and add class information as needed: @@ -1055,16 +1058,13 @@ def assign_fits(self, ifo): # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) - analysis_time = float(coeff_file.attrs['analysis_time']) if \ - self.kwargs['normalize_fit_rate'] else 1 - fits_by_tid_dict = {} fits_by_tid_dict['smoothed_fit_coeff'] = \ coeff_file['fit_coeff'][:][tid_sort] fits_by_tid_dict['smoothed_rate_above_thresh'] = \ - coeff_file['count_above_thresh'][:][tid_sort].astype(float) / analysis_time + coeff_file['count_above_thresh'][:][tid_sort].astype(float) fits_by_tid_dict['smoothed_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) / analysis_time + coeff_file['count_in_template'][:][tid_sort].astype(float) if self.kwargs['sensitive_volume']: fits_by_tid_dict['median_sigma'] = \ coeff_file['median_sigma'][:][tid_sort].astype(float) @@ -1075,9 +1075,16 @@ def assign_fits(self, ifo): fits_by_tid_dict['fit_by_fit_coeff'] = \ coeff_fbt['fit_coeff'][:][tid_sort] fits_by_tid_dict['fit_by_rate_above_thresh'] = \ - coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) / analysis_time + coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) fits_by_tid_dict['fit_by_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) / analysis_time + coeff_file['count_in_template'][:][tid_sort].astype(float) + + if self.kwargs['normalize_fit_rate']: + analysis_time = float(coeff_file.attrs['analysis_time']) + fits_by_tid_dict['smoothed_rate_above_thresh'] /= analysis_time + fits_by_tid_dict['smoothed_rate_in_template'] /= analysis_time + fits_by_tid_dict['fit_by_rate_above_thresh'] /= analysis_time + fits_by_tid_dict['fit_by_rate_in_template'] /= analysis_time # Keep the fit threshold in fits_by_tid fits_by_tid_dict['thresh'] = coeff_file.attrs['stat_threshold'] @@ -1292,6 +1299,16 @@ def single(self, trigs): return numpy.array(singles, ndmin=1) def sensitive_volume_factor(self, sngls): + """ + Determine the factor for different network sensitivities + + Assuming a homogeneous distribution of sources, the signal rate + should be given by the volume of the sphere to which we are + sensitive. + + This is then the cube of the sensitive distance (sigma), divided + by a benchmark volume. + """ # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo network_sigmasq = numpy.amin( @@ -1392,14 +1409,14 @@ def rank_stat_coinc(self, s, slide, step, to_shift, self.hist_ifos, kwargs['time_addition'] ) - # Volume is the allowed time difference window, multiplied by 2pi for - # each phase difference dimension and by allowed range of SNR ratio - # for each SNR ratio dimension : there are (n_ifos - 1) dimensions - # for both phase and SNR + # Volume is the allowed time difference window, multiplied by 2pi + # for each phase difference dimension and by allowed range of SNR + # ratio for each SNR ratio dimension : there are (n_ifos - 1) + # dimensions for both phase and SNR n_ifos = len(self.hist_ifos) + snr_range = (self.srbmax - self.srbmin) * self.swidth hist_vol = noise_twindow * \ - (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ - (n_ifos - 1) + (2. * numpy.pi * snr_range) ** (n_ifos - 1) # Noise PDF is 1/volume, assuming a uniform distribution of noise # coincs ln_noise_rate -= numpy.log(hist_vol) @@ -1509,16 +1526,6 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.single_increasing = True self.single_dtype = numpy.float32 - def use_alphamax(self): - """ - Compute the reference alpha from the fit files. - - Use the harmonic mean of the maximum individual ifo slopes as the - reference value of alpha. - """ - inv_alphas = [1. / self.alphamax[i] for i in self.bg_ifos] - self.alpharef = 1. / (sum(inv_alphas) / len(inv_alphas)) - def single(self, trigs): """ Calculate the necessary single detector information @@ -1615,6 +1622,7 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) + statistic_dict = { 'quadsum': QuadratureSumStatistic, 'single_ranking_only': QuadratureSumStatistic, From 666f5feabab1539eeef59faba285188f2545d21c Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 24 Jan 2024 04:24:31 -0800 Subject: [PATCH 06/32] use new-style statistic in CI --- examples/search/analysis.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/search/analysis.ini b/examples/search/analysis.ini index 4361ef1eb2c..eb1e0d0202d 100644 --- a/examples/search/analysis.ini +++ b/examples/search/analysis.ini @@ -171,7 +171,9 @@ smoothing-width = 0.4 analyze = [coinc&sngls] -ranking-statistic = phasetd_exp_fit_fgbg_norm +ranking-statistic = exp_fit +statistic-features = phasetd sensitive_volume normalize_fit_rate +statistic-keywords = alpha_below_thresh:6 sngl-ranking = newsnr_sgveto_psdvar randomize-template-order = statistic-files = ${resolve:./statHL.hdf} ${resolve:./statLV.hdf} ${resolve:./statHV.hdf} ${resolve:./statHLV.hdf} From 175e43dca55a9d88b6a5c85b8f90c6275b8f23f7 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 25 Jan 2024 03:37:48 -0800 Subject: [PATCH 07/32] fix in case teh fit_by_templte is not stored in the fit_over file --- pycbc/events/stat.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index c91ac6e2fe7..a44ee0001fd 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1080,11 +1080,12 @@ def assign_fits(self, ifo): coeff_file['count_in_template'][:][tid_sort].astype(float) if self.kwargs['normalize_fit_rate']: - analysis_time = float(coeff_file.attrs['analysis_time']) - fits_by_tid_dict['smoothed_rate_above_thresh'] /= analysis_time - fits_by_tid_dict['smoothed_rate_in_template'] /= analysis_time - fits_by_tid_dict['fit_by_rate_above_thresh'] /= analysis_time - fits_by_tid_dict['fit_by_rate_in_template'] /= analysis_time + analysis_time = float(coeff_file.attrs['analysis_time']) + fits_by_tid_dict['smoothed_rate_above_thresh'] /= analysis_time + fits_by_tid_dict['smoothed_rate_in_template'] /= analysis_time + if 'fit_by_template' in coeff_file: + fits_by_tid_dict['fit_by_rate_above_thresh'] /= analysis_time + fits_by_tid_dict['fit_by_rate_in_template'] /= analysis_time # Keep the fit threshold in fits_by_tid fits_by_tid_dict['thresh'] = coeff_file.attrs['stat_threshold'] From 3ad042d5dd57493fd06432d1f66b72e0741a73a5 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 26 Jan 2024 02:29:05 -0800 Subject: [PATCH 08/32] remove testing change --- pycbc/events/stat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index a44ee0001fd..a9b0645efe6 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1195,7 +1195,7 @@ def assign_kdes(self, kname): Used to label the kde files. """ with h5py.File(self.files[kname + '-kde_file'], 'r') as kde_file: - self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:].astype(numpy.float32) + self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:] def kde_ratio(self): """ From 47a21452d40d6f55c5b3f0e7f8cd5f96a5c60980 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 26 Jan 2024 02:37:49 -0800 Subject: [PATCH 09/32] fix usage of parse_statistic_feature_options in test --- pycbc/events/coinc.py | 6 +++++- pycbc/events/stat.py | 29 ++++++++++++++++------------- test/test_live_coinc_compare.py | 1 + 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/pycbc/events/coinc.py b/pycbc/events/coinc.py index ee729a97bcc..4d686941e11 100644 --- a/pycbc/events/coinc.py +++ b/pycbc/events/coinc.py @@ -973,12 +973,16 @@ def from_cli(cls, args, num_templates, analysis_chunk, ifos): # Allow None inputs stat_files = args.statistic_files or [] + stat_features = args.statistic_features or [] stat_keywords = args.statistic_keywords or [] # flatten the list of lists of filenames to a single list (may be empty) stat_files = sum(stat_files, []) - kwargs = pycbcstat.parse_statistic_keywords_opt(stat_keywords) + kwargs = pycbcstat.parse_statistic_feature_options( + stat_features, + stat_keywords, + ) return cls(num_templates, analysis_chunk, args.ranking_statistic, diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index a9b0645efe6..26d92cf39cb 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1727,7 +1727,7 @@ def insert_statistic_option_group(parser, default_ranking_statistic=None): return statistic_opt_group -def parse_statistic_feature_options(opts): +def parse_statistic_feature_options(stat_features, stat_kwarg_list): """ Parse the list of statistic keywords into an appropriate dictionary. @@ -1745,7 +1745,17 @@ def parse_statistic_feature_options(opts): Statistic keywords in dict format """ stat_kwarg_dict = {} - stat_kwarg_list = opts.statistic_keywords + + # Check that the statistic keywords are allowed + for feature in stat_features: + if feature not in _allowed_statistic_features: + err_msg = f"--statistic-feature {feature} not recognised" + raise NotImplementedError(err_msg) + + # Set values for each feature key to a boolean of whether we want them + for feature in _allowed_statistic_features: + stat_kwarg_dict[feature] = feature in stat_features + for inputstr in stat_kwarg_list: try: key, value = inputstr.split(':') @@ -1756,16 +1766,6 @@ def parse_statistic_feature_options(opts): "Received {}".format(' '.join(stat_kwarg_list)) raise ValueError(err_txt) - # Check that the statistic keywords are allowed - for feature in opts.statistic_features: - if feature not in _allowed_statistic_features: - err_msg = f"--statistic-feature {feature} not recognised" - raise NotImplementedError(err_msg) - - # Set values for each feature key to a boolean of whether we want them - for feature in _allowed_statistic_features: - stat_kwarg_dict[feature] = feature in opts.statistic_features - return stat_kwarg_dict @@ -1801,7 +1801,10 @@ def get_statistic_from_opts(opts, ifos): isinstance(opts.statistic_files[0], list): opts.statistic_files = sum(opts.statistic_files, []) - extra_kwargs = parse_statistic_feature_options(opts) + extra_kwargs = parse_statistic_feature_options( + opts.statistic_features, + opts.statistic_keywords, + ) stat_class = get_statistic(opts.ranking_statistic)( opts.sngl_ranking, diff --git a/test/test_live_coinc_compare.py b/test/test_live_coinc_compare.py index 531bb8d16f0..4039541fcbb 100644 --- a/test/test_live_coinc_compare.py +++ b/test/test_live_coinc_compare.py @@ -73,6 +73,7 @@ def setUp(self, *args): ranking_statistic="phasetd", statistic_files=[stat_file_paths], statistic_keywords=None, + statistic_features=None, timeslide_interval=0.1, background_ifar_limit=100, store_background=True, From 7b95712870f9b08084ecf2abfa72ec3a1b7e0cfa Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 5 Feb 2024 08:03:50 -0800 Subject: [PATCH 10/32] Docstrings for various functions --- pycbc/events/stat.py | 81 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 26d92cf39cb..86c7d8f759e 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1015,7 +1015,19 @@ def assign_dq_val(self, key): return dq_dict def find_dq_val(self, trigs): - """Get dq values for a specific ifo and times""" + """Get dq values for a specific ifo and times + + Parameters + ---------- + trigs: ReadByTempate or SingleDetTriggers object + Object containing information about the triggers to be + checked + + Returns + ------- + dq_val: numpy array + The value of the dq reweighting factor for each trigger + """ time = trigs['end_time'].astype(int) try: tnum = trigs.template_num @@ -1307,8 +1319,18 @@ def sensitive_volume_factor(self, sngls): should be given by the volume of the sphere to which we are sensitive. - This is then the cube of the sensitive distance (sigma), divided - by a benchmark volume. + Parameters + ---------- + sngls: tuple + Single-detector information, tuples of the (ifo, sngl_object), + where sngl_object is a ReadByTemplate object or similar. + + Returns + ------- + network_logvol: numpy.array + Array of values for the network log-volume factor. This is the + log of the cube of the sensitive distance (sigma), divided by + a benchmark volume. """ # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo @@ -1330,6 +1352,18 @@ def logsignalrate_shared(self, sngls_info): """ Calculate the parts of the log signal rate which are shared by both the single and coinc ranking statistics + + Parameters + ---------- + sngls_info: tuple + Single-detector information, tuples of the (ifo, sngl_object), + where sngl_object is a ReadByTemplate object or similar. + + Returns + ------- + sr_factor: numpy.ndarray + Array of values to be added to the ranking statistic when + taking various signal rate factors into account """ # Other features affecting the signal rate sr_factor = 0 @@ -1386,6 +1420,28 @@ def rank_stat_coinc(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. + + Parameters + ---------- + s: list + List of (ifo, single detector statistic) tuples + slide: numpy array + The number of steps by which to shift each trigger + step: float + The size of each step, to be multiplied by to_shift + to_shift: list + List of integers indicating what multiples of the time shift will + be applied + kwargs: various + Key-word arguments to define different features and tunable + values in the statistic. Only time_addition is used here + + Returns + ------- + loglr: numpy.ndarray + The ranking statistic for each trigger based on the supplied + triggers, requested features and keyword arguments + """ # ranking statistic is -ln(expected rate density of noise triggers) # plus normalization constant @@ -1453,6 +1509,25 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, rate calculation, we take the best-case scenario. By using the best-case for signal rate in this calculation, some events are kept at this point which are hopeless. + + Parameters + ---------- + s: list + List of (ifo, single detector statistic) tuples + thresh: float + The threshold value to be checked against + limifo: string + The pivot detector, i.e. the one in which the sngl stat must + reach the value output by ths function + kwargs: various + Key-word arguments to define different features and tunable + values in the statistic. Only time_addition is used here + + Returns + ------- + numpy.array + The minimum snglstat values required in the 'pivot' detector + in order to reach the threshold specified """ # Safety against subclassing and not rethinking this allowed_names = ['ExpFitStatistic'] From 1e0bd5c73a60d421755f8947db60ce681db97177 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 5 Feb 2024 08:27:50 -0800 Subject: [PATCH 11/32] Add back in the changes from #4603 --- pycbc/events/stat.py | 150 +++++++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 61 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 86c7d8f759e..42e091b05b5 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -923,22 +923,16 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): if self.kwargs['dq']: # Reweight the noise rate by the dq reweighting factor - self.dq_val_by_time = {} - self.dq_bin_by_id = {} - for k in self.files.keys(): - parsed_attrs = k.split('-') - if len(parsed_attrs) < 3: - continue - if parsed_attrs[2] == 'dq_ts_reference': - ifo = parsed_attrs[0] - dq_type = parsed_attrs[1] - dq_vals = self.assign_dq_val(k) - dq_bins = self.assign_bin_id(k) - if ifo not in self.dq_val_by_time: - self.dq_val_by_time[ifo] = {} - self.dq_bin_by_id[ifo] = {} - self.dq_val_by_time[ifo][dq_type] = dq_vals - self.dq_bin_by_id[ifo][dq_type] = dq_bins + self.dq_rates_by_state = {} + self.dq_bin_by_tid = {} + self.dq_state_segments = {} + + for ifo in self.ifos: + key = f'{ifo}-dq_stat_info' + if key in self.files.keys(): + self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) + self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) + self.dq_state_segments[ifo] = self.setup_segments(key) if self.kwargs['chirp_mass']: # Reweight the signal rate by the chirp mass of the template @@ -956,10 +950,9 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): for kname in self.kde_names: self.assign_kdes(kname) - def assign_bin_id(self, key): + def assign_template_bins(self, key): """ - Assign bin ID values for DQ reweighting - + Assign bin ID values Assign each template id to a bin name based on a referenced statistic file. @@ -975,18 +968,18 @@ def assign_bin_id(self, key): """ ifo = key.split('-')[0] with h5py.File(self.files[key], 'r') as dq_file: - bin_names = dq_file.attrs['names'][:] - locs = [] - names = [] - for bin_name in bin_names: - bin_locs = dq_file[ifo + '/locs/' + bin_name][:] - locs = list(locs) + list(bin_locs.astype(int)) - names = list(names) + list([bin_name] * len(bin_locs)) - - bin_dict = dict(zip(locs, names)) + tids = [] + bin_nums = [] + bin_grp = dq_file[f'{ifo}/bins'] + for bin_name in bin_grp.keys(): + bin_tids = bin_grp[f'{bin_name}/tids'][:] + tids = list(tids) + list(bin_tids.astype(int)) + bin_nums = list(bin_nums) + list([bin_name] * len(bin_tids)) + + bin_dict = dict(zip(tids, bin_nums)) return bin_dict - def assign_dq_val(self, key): + def assign_dq_rates(self, key): """ Assign dq values to each time for every bin based on a referenced statistic file. @@ -1005,50 +998,73 @@ def assign_dq_val(self, key): """ ifo = key.split('-')[0] with h5py.File(self.files[key], 'r') as dq_file: - times = dq_file[ifo + '/times'][:] - bin_names = dq_file.attrs['names'][:] + bin_grp = dq_file[f'{ifo}/bins'] dq_dict = {} - for bin_name in bin_names: - dq_vals = dq_file[ifo + '/dq_vals/' + bin_name][:] - dq_dict[bin_name] = dict(zip(times, dq_vals)) + for bin_name in bin_grp.keys(): + dq_dict[bin_name] = bin_grp[f'{bin_name}/dq_rates'][:] return dq_dict - def find_dq_val(self, trigs): - """Get dq values for a specific ifo and times + def setup_segments(self, key): + """ + Check if segments definitions are in stat file + If they are, we are running offline and need to store them + If they aren't, we are running online + """ + ifo = key.split('-')[0] + with h5py.File(self.files[key], 'r') as dq_file: + ifo_grp = dq_file[ifo] + dq_state_segs_dict = {} + for k in ifo_grp['dq_segments'].keys(): + seg_dict = {} + seg_dict['start'] = \ + ifo_grp[f'dq_segments/{k}/segment_starts'][:] + seg_dict['end'] = \ + ifo_grp[f'dq_segments/{k}/segment_ends'][:] + dq_state_segs_dict[k] = seg_dict - Parameters - ---------- - trigs: ReadByTempate or SingleDetTriggers object - Object containing information about the triggers to be - checked + return dq_state_segs_dict + + def find_dq_noise_rate(self, trigs, dq_state): + """Get dq values for a specific ifo and dq states""" - Returns - ------- - dq_val: numpy array - The value of the dq reweighting factor for each trigger - """ - time = trigs['end_time'].astype(int) try: tnum = trigs.template_num - ifo = trigs.ifo except AttributeError: tnum = trigs['template_id'] - assert len(self.ifos) == 1 + + try: + ifo = trigs.ifo + except AttributeError: + ifo = trigs['ifo'] + assert len(numpy.unique(ifo)) == 1 # Should be exactly one ifo provided - ifo = self.ifos[0] - dq_val = numpy.zeros(len(time)) - if ifo in self.dq_val_by_time: - for (i, t) in enumerate(time): - for k in self.dq_val_by_time[ifo].keys(): - if isinstance(tnum, numpy.ndarray): - bin_name = self.dq_bin_by_id[ifo][k][tnum[i]] - else: - bin_name = self.dq_bin_by_id[ifo][k][tnum] - val = self.dq_val_by_time[ifo][k][bin_name][int(t)] - dq_val[i] = max(dq_val[i], val) + ifo = ifo[0] + + dq_val = numpy.zeros(len(dq_state)) + + if ifo in self.dq_rates_by_state: + for (i, st) in enumerate(dq_state): + if isinstance(tnum, numpy.ndarray): + bin_name = self.dq_bin_by_tid[ifo][tnum[i]] + else: + bin_name = self.dq_bin_by_tid[ifo][tnum] + dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] return dq_val + def find_dq_state_by_time(self, ifo, times): + """Get the dq state for an ifo at times""" + dq_state = numpy.zeros(len(times), dtype=numpy.uint8) + if ifo in self.dq_state_segments: + from pycbc.events.veto import indices_within_times + for k in self.dq_state_segments[ifo]: + starts = self.dq_state_segments[ifo][k]['start'] + ends = self.dq_state_segments[ifo][k]['end'] + inds = indices_within_times(times, starts, ends) + # states are named in file as 'dq_state_N', need to extract N + dq_state[inds] = int(k[9:]) + return dq_state + def assign_fits(self, ifo): """ Extract fits from single-detector rate fit files @@ -1252,7 +1268,19 @@ def lognoiserate(self, trigs): if self.kwargs['dq']: # Reweight the lognoiserate things by the dq reweighting factor - lognoisel += self.find_dq_val(trigs) + # make sure every trig has a dq state + try: + ifo = trigs.ifo + except AttributeError: + ifo = trigs['ifo'] + assert len(numpy.unique(ifo)) == 1 + # Should be exactly one ifo provided + ifo = ifo[0] + + dq_state = self.find_dq_state_by_time(ifo, trigs['end_time'][:]) + dq_rate = self.find_dq_noise_rate(trigs, dq_state) + dq_rate = numpy.maximum(dq_rate, 1) + lognoisel += numpy.log(dq_rate) return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) From 9e3c0f56f41e7e72500c0170badcb36fe62a44ea Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 05:23:21 -0800 Subject: [PATCH 12/32] Add description of the statistics to the documentation --- .../pycbc_make_offline_search_workflow.rst | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 7addf015000..4a5eebd95d7 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -306,9 +306,72 @@ Specify the name of the channel you want to run the inspiral analysis over for H [coinc] coinc-threshold = 0.000 + ranking-statistic = exp_fit + sngl-ranking = newsnr_sgveto_psdvar + statistic-features = sensitive_volume normalize_fit_rate phasetd + statistic-keywords = alpha_below_thresh:6 sngl_ranking_min_expected_psdvar:0.7 Here we are doing exact match coincidence. So we take the light travel time between detectors and look for triggers which are coincident within this time window. The threshold defines if you want to extend the window. +How triggers are ranked is defined by the ranking-statistic, sngl-ranking, statistic-features and statistic-keywords options. + - ``sngl-ranking`` = The ranking used for single-detector triggers, this is generally a re-weighting of the SNR. + - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See Statistics table below for the options. + - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the Features table below. + - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Keywords table below. + - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. + +.. list-table:: Statistics: ``ranking-statistic`` + :widths: 25 75 + :header-rows: 1 + * - Statistic name + - Description + * - ``quadsum`` + - The quadrature sum of triggers in each detector in the triggered network. ``sngl_ranking_only`` can also be given and is exactly equivalent. + * - ``phasetd`` + - The same as ``quadsum``, but reweighted by the coincident parameters. + * - ``exp_fit_csnr`` + - This is a reworking of the exponential fit designed to resemble network SNR. Uses a monotonic function of the negative log noise rate density which approximates combined sngl-ranking for coincs with similar newsnr in each ifo + * - ``exp_fit`` + - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. + +.. list-table:: Features for use with the ``exp_fit``: ``statistic-features` + :widths: 25 75 + :header-rows: 1 + + * - Feature name + - Description + * - ``phasetd`` + - Use a histogram of expected phase and time differences, and amplitude ratio, for signals to determine a factor to be added for the signal rate. + * - ``sensitive_volume`` + - Signal rate is expected to be proportional to the cube of the sensitive distance. This feature adds a factor of :math:`log(\sigma^3)` minus a benchmark value, to make this zero in many cases. + * - ``normalize_fit_rate`` + - Normalise the exponential fits to use a rate rather than an absolute count of triggers. This means that statistics should be comparable over differently-sized analyses. + * - ``dq`` + - Apply a reweighting factor according to the rates of triggers during data-quality flags vs the rate outside this. Must supply a reranking file using ``statistic-files`` for each detector, with stat attribute '{detector}-dq_stat_info' + * - ``kde`` + - Use a file to re-rank according to the signal and density rates calculated using a KDE approach. Must supply two reranking files using ``statistic-files`` with stat attributes 'signal-kde_file' and 'template-kde_file' respectively. + * - ``chirp_mass`` + - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. + +.. list-table:: Keywords: ``statistic-keywords`` + :widths: 25 75 + :header-rows: 1 + + * - Keyword + - Description + * - ``benchmark_lograte`` + - This is a numerical factor to be subtracted from the log rate ratio in order to alter the dynamic range. Default -14.6. + * - ``minimum_statistic_cutoff`` + - Cutoff for the statistic in order to avoid underflowing and very small statistic values. Default -30. + * - ``alpha_below_thresh`` + - If given, the fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates.. For Gaussian noise, this will be approximately 6. + * - ``reference_ifos`` + - If using the ``sensitive_volume``feature, these are the detectors used to determine the benchmark value by which the sensitive volume is compared. We use the median sensitive volume in the network of detectors supplied. Default H1,L1. + * - ``max_chirp_mass`` + - If using the ``chirp_mass`` feature, this chirp mass defines a maximum weighting which can be applied to the statistic. + * - ``sngl_ranking_*`` + - This is used to provide the keyword arguments to functions in :xref:`the events.ranking module`. For example, to use a different psdvar threshold in the newsnr_sgveto_psdvar_threshold function, we would use ``sngl_ranking_psd_var_val_threshold:10``. + :: [coinc-full] From 3a4bce4b27aa6432bf99c372892b4ce1eb0dc793 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 05:48:14 -0800 Subject: [PATCH 13/32] fix error if passing keywords which need to be floats, rework the alpha_below_thresh keyword --- .../pycbc_make_offline_search_workflow.rst | 2 +- pycbc/events/stat.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 4a5eebd95d7..f3d4d0e27d9 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -364,7 +364,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``minimum_statistic_cutoff`` - Cutoff for the statistic in order to avoid underflowing and very small statistic values. Default -30. * - ``alpha_below_thresh`` - - If given, the fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates.. For Gaussian noise, this will be approximately 6. + - The fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates. For Gaussian noise, this will be approximately 6 (the default). To use whatever the fit value is, supply alpha_below_thresh:None. * - ``reference_ifos`` - If using the ``sensitive_volume``feature, these are the detectors used to determine the benchmark value by which the sensitive volume is compared. We use the median sensitive volume in the network of detectors supplied. Default H1,L1. * - ``max_chirp_mass`` diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 42e091b05b5..2f314a62176 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -903,8 +903,10 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.min_snr = numpy.inf # Some modifiers for the statistic to get it into a nice range - self.benchmark_lograte = self.kwargs.get('benchmark_lograte', -14.6) - self.min_stat = self.kwargs.get('minimum_statistic_cutoff', -30.) + self.benchmark_lograte = \ + float(self.kwargs.get('benchmark_lograte', -14.6)) + self.min_stat = \ + float(self.kwargs.get('minimum_statistic_cutoff', -30.)) # Go through the keywords and add class information as needed: if self.kwargs['sensitive_volume']: @@ -1257,14 +1259,19 @@ def lognoiserate(self, trigs): lognoisel = - alphai * (sngl_stat - thresh) + numpy.log(alphai) + \ numpy.log(ratei) - if 'alpha_below_thresh' in self.kwargs: + alphabelow = self.kwargs.get('alpha_below_thresh', 6.) + try: + alphabelow = float(alphabelow) # Above the threshold we use the usual fit coefficient (alphai) # below threshold use specified alphabelow - alphabelow = float(self.kwargs['alpha_below_thresh']) bt = sngl_stat < thresh lognoiselbt = - alphabelow * (sngl_stat - thresh) + \ numpy.log(alphabelow) + numpy.log(ratei) lognoisel[bt] = lognoiselbt[bt] + except ValueError: + # The float conversion will raise this if passed e.g. 'None' + # In that case we simply use the fit value everywhere + pass if self.kwargs['dq']: # Reweight the lognoiserate things by the dq reweighting factor From 102203e43a510d48e755b7172428ecb3d0e896a0 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 06:39:25 -0800 Subject: [PATCH 14/32] Allow sngl_ranking keywords to actually be used --- pycbc/events/ranking.py | 55 ++++++++++++++++++++++------------------- pycbc/events/stat.py | 5 +++- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 0368bb05371..2612ba1298c 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,7 +7,7 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250.): +def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argument """Calculate the effective SNR statistic. See (S5y1 paper) for definition. """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) @@ -21,7 +21,7 @@ def effsnr(snr, reduced_x2, fac=250.): return esnr[0] -def newsnr(snr, reduced_x2, q=6., n=2.): +def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -40,9 +40,9 @@ def newsnr(snr, reduced_x2, q=6., n=2.): return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq): +def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" - nsnr = numpy.array(newsnr(snr, brchisq), ndmin=1) + nsnr = numpy.array(newsnr(snr, brchisq, **kwargs), ndmin=1) sgchisq = numpy.array(sgchisq, ndmin=1) t = numpy.array(sgchisq > 4, ndmin=1) if len(t): @@ -56,7 +56,7 @@ def newsnr_sgveto(snr, brchisq, sgchisq): def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65): + min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this @@ -66,7 +66,7 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, psd_var_val[psd_var_val < min_expected_psdvar] = 1. scaled_snr = snr * (psd_var_val ** -0.5) scaled_brchisq = brchisq * (psd_var_val ** -1.) - nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq) + nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq, **kwargs) # If snr input is float, return a float. Otherwise return numpy array. if hasattr(snr, '__len__'): @@ -78,14 +78,16 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, min_expected_psdvar=0.65, brchisq_threshold=10.0, - psd_var_val_threshold=10.0): + psd_var_val_threshold=10.0, + **kwargs): # pylint:disable=unused-argument """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options to threshold on chi-squared or PSD variation. """ nsnr = newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=min_expected_psdvar) + min_expected_psdvar=min_expected_psdvar, + **kwargs) nsnr = numpy.array(nsnr, ndmin=1) nsnr[brchisq > brchisq_threshold] = 1. nsnr[psd_var_val > psd_var_val_threshold] = 1. @@ -98,10 +100,10 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65): + scaling=0.33, min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD variation statistic. """ - nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq), ndmin=1) + nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq, **kwargs), ndmin=1) psd_var_val = numpy.array(psd_var_val, ndmin=1, copy=True) psd_var_val[psd_var_val < min_expected_psdvar] = 1. @@ -116,11 +118,11 @@ def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0): + threshold=2.0, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ - nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val) + nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val, **kwargs) nsnr = numpy.array(nsnr, ndmin=1) nsnr[bchisq > threshold] = 1. @@ -131,7 +133,7 @@ def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, return nsnr[0] -def get_snr(trigs): +def get_snr(trigs, **kwargs): # pylint:disable=unused-argument """ Return SNR from a trigs/dictionary object @@ -149,7 +151,7 @@ def get_snr(trigs): return numpy.array(trigs['snr'][:], ndmin=1, dtype=numpy.float32) -def get_newsnr(trigs): +def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr ('reweighted SNR') for a trigs/dictionary object @@ -165,11 +167,11 @@ def get_newsnr(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr = newsnr(trigs['snr'][:], trigs['chisq'][:] / dof) + nsnr = newsnr(trigs['snr'][:], trigs['chisq'][:] / dof, **kwargs) return numpy.array(nsnr, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto(trigs): +def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weigthed by the sine-gaussian veto @@ -187,11 +189,11 @@ def get_newsnr_sgveto(trigs): dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg = newsnr_sgveto(trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:]) + trigs['sg_chisq'][:], **kwargs) return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar(trigs): +def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate snr re-weighted by Allen chisq, sine-gaussian veto and psd variation statistic @@ -211,11 +213,11 @@ def get_newsnr_sgveto_psdvar(trigs): nsnr_sg_psd = \ newsnr_sgveto_psdvar(trigs['snr'][:], trigs['chisq'][:] / dof, trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + trigs['psd_var_val'][:], **kwargs) return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs): +def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -235,12 +237,13 @@ def get_newsnr_sgveto_psdvar_threshold(trigs): nsnr_sg_psdt = newsnr_sgveto_psdvar_threshold( trigs['snr'][:], trigs['chisq'][:] / dof, trigs['sg_chisq'][:], - trigs['psd_var_val'][:] + trigs['psd_var_val'][:], + **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled(trigs): +def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -261,11 +264,12 @@ def get_newsnr_sgveto_psdvar_scaled(trigs): newsnr_sgveto_psdvar_scaled( trigs['snr'][:], trigs['chisq'][:] / dof, trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + trigs['psd_var_val'][:], + **kwargs) return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs): +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -287,7 +291,8 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs): newsnr_sgveto_psdvar_scaled_threshold( trigs['snr'][:], trigs['chisq'][:] / dof, trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + trigs['psd_var_val'][:], + **kwargs) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 2f314a62176..dfdd17c8a1b 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -99,7 +99,10 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.kwargs = {} for key, value in kwargs.items(): if key.startswith('sngl_ranking_'): - self.sngl_ranking_kwargs[key[13:]] = value + # Note that all the sngl_ranking keywords are floats, + # so this conversion is safe - if that changes in the future, + # we may need something more sophisticated + self.sngl_ranking_kwargs[key[13:]] = float(value) else: self.kwargs[key] = value From eb69bd806019ce091c5cdc66ce6013f51c18cfee Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 06:55:03 -0800 Subject: [PATCH 15/32] CC --- pycbc/events/ranking.py | 122 ++++++++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 41 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 2612ba1298c..a23e8c97875 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,7 +7,7 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argument +def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argument """Calculate the effective SNR statistic. See (S5y1 paper) for definition. """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) @@ -21,7 +21,7 @@ def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argumen return esnr[0] -def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argument +def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -40,9 +40,14 @@ def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argum return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument +def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" - nsnr = numpy.array(newsnr(snr, brchisq, **kwargs), ndmin=1) + nsnr = numpy.array( + newsnr( + snr, + brchisq, + **kwargs), + ndmin=1) sgchisq = numpy.array(sgchisq, ndmin=1) t = numpy.array(sgchisq > 4, ndmin=1) if len(t): @@ -56,7 +61,7 @@ def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argu def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument + min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this @@ -66,7 +71,12 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, psd_var_val[psd_var_val < min_expected_psdvar] = 1. scaled_snr = snr * (psd_var_val ** -0.5) scaled_brchisq = brchisq * (psd_var_val ** -1.) - nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq, **kwargs) + nsnr = newsnr_sgveto( + scaled_snr, + scaled_brchisq, + sgchisq, + **kwargs + ) # If snr input is float, return a float. Otherwise return numpy array. if hasattr(snr, '__len__'): @@ -79,15 +89,20 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, min_expected_psdvar=0.65, brchisq_threshold=10.0, psd_var_val_threshold=10.0, - **kwargs): # pylint:disable=unused-argument + **kwargs): # pylint:disable=unused-argument """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options to threshold on chi-squared or PSD variation. """ - nsnr = newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=min_expected_psdvar, - **kwargs) + nsnr = newsnr_sgveto_psdvar( + snr, + brchisq, + sgchisq, + psd_var_val, + min_expected_psdvar=min_expected_psdvar, + **kwargs + ) nsnr = numpy.array(nsnr, ndmin=1) nsnr[brchisq > brchisq_threshold] = 1. nsnr[psd_var_val > psd_var_val_threshold] = 1. @@ -100,10 +115,17 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument + scaling=0.33, min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD variation statistic. """ - nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq, **kwargs), ndmin=1) + nsnr = numpy.array( + newsnr_sgveto( + snr, + brchisq, + sgchisq, + **kwargs), + ndmin=1) psd_var_val = numpy.array(psd_var_val, ndmin=1, copy=True) psd_var_val[psd_var_val < min_expected_psdvar] = 1. @@ -118,11 +140,17 @@ def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0, **kwargs): # pylint:disable=unused-argument + threshold=2.0, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ - nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val, **kwargs) + nsnr = newsnr_sgveto_psdvar_scaled( + snr, + bchisq, + sgchisq, + psd_var_val, + **kwargs + ) nsnr = numpy.array(nsnr, ndmin=1) nsnr[bchisq > threshold] = 1. @@ -133,7 +161,7 @@ def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, return nsnr[0] -def get_snr(trigs, **kwargs): # pylint:disable=unused-argument +def get_snr(trigs, **kwargs): # pylint:disable=unused-argument """ Return SNR from a trigs/dictionary object @@ -151,7 +179,7 @@ def get_snr(trigs, **kwargs): # pylint:disable=unused-argument return numpy.array(trigs['snr'][:], ndmin=1, dtype=numpy.float32) -def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr ('reweighted SNR') for a trigs/dictionary object @@ -167,11 +195,15 @@ def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr = newsnr(trigs['snr'][:], trigs['chisq'][:] / dof, **kwargs) + nsnr = newsnr( + trigs['snr'][:], + trigs['chisq'][:] / dof, + **kwargs + ) return numpy.array(nsnr, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weigthed by the sine-gaussian veto @@ -187,13 +219,16 @@ def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg = newsnr_sgveto(trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], **kwargs) + nsnr_sg = newsnr_sgveto( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + **kwargs + ) return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate snr re-weighted by Allen chisq, sine-gaussian veto and psd variation statistic @@ -210,14 +245,17 @@ def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psd = \ - newsnr_sgveto_psdvar(trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], **kwargs) + nsnr_sg_psd = newsnr_sgveto_psdvar( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -243,7 +281,7 @@ def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -260,16 +298,17 @@ def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-ar Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psdscale = \ - newsnr_sgveto_psdvar_scaled( - trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], - **kwargs) + nsnr_sg_psdscale = newsnr_sgveto_psdvar_scaled( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -287,12 +326,13 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psdt = \ - newsnr_sgveto_psdvar_scaled_threshold( - trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], - **kwargs) + nsnr_sg_psdt = newsnr_sgveto_psdvar_scaled_threshold( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) From d715d307dfcb524e7c7428282fb62c4767418f70 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 06:57:40 -0800 Subject: [PATCH 16/32] try this --- docs/workflow/pycbc_make_offline_search_workflow.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index f3d4d0e27d9..da445dead5e 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -320,7 +320,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Keywords table below. - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. -.. list-table:: Statistics: ``ranking-statistic`` +.. list-table:: Statistics - ``ranking-statistic`` :widths: 25 75 :header-rows: 1 * - Statistic name @@ -334,7 +334,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``exp_fit`` - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. -.. list-table:: Features for use with the ``exp_fit``: ``statistic-features` +.. list-table:: Features for use with the ``exp_fit`` - ``statistic-features` :widths: 25 75 :header-rows: 1 @@ -353,7 +353,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``chirp_mass`` - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. -.. list-table:: Keywords: ``statistic-keywords`` +.. list-table:: Keywords - ``statistic-keywords`` :widths: 25 75 :header-rows: 1 From 36ee4ab100ff4d630667b87273e4ee20518428bc Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 6 Feb 2024 08:57:46 -0800 Subject: [PATCH 17/32] maybe --- docs/workflow/pycbc_make_offline_search_workflow.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index da445dead5e..8eb7e85a88b 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -320,7 +320,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Keywords table below. - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. -.. list-table:: Statistics - ``ranking-statistic`` +.. list-table:: Statistics - ranking-statistic :widths: 25 75 :header-rows: 1 * - Statistic name @@ -334,7 +334,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``exp_fit`` - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. -.. list-table:: Features for use with the ``exp_fit`` - ``statistic-features` +.. list-table:: Features for use with exp_fit - statistic-features :widths: 25 75 :header-rows: 1 @@ -353,7 +353,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``chirp_mass`` - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. -.. list-table:: Keywords - ``statistic-keywords`` +.. list-table:: Keywords - statistic-keywords :widths: 25 75 :header-rows: 1 From 9e821844d0fe9d58bdd5afc22ec395db48f8d962 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 16 Feb 2024 06:55:52 -0800 Subject: [PATCH 18/32] single-word titles --- docs/workflow/pycbc_make_offline_search_workflow.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 8eb7e85a88b..6ce930b9c5c 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -315,12 +315,12 @@ Here we are doing exact match coincidence. So we take the light travel time betw How triggers are ranked is defined by the ranking-statistic, sngl-ranking, statistic-features and statistic-keywords options. - ``sngl-ranking`` = The ranking used for single-detector triggers, this is generally a re-weighting of the SNR. - - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See Statistics table below for the options. - - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the Features table below. - - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Keywords table below. + - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See ranking-statistic table below for the options. + - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the statistic-features table below. + - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the statistic-keywords table below. - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. -.. list-table:: Statistics - ranking-statistic +.. list-table:: ranking-statistic :widths: 25 75 :header-rows: 1 * - Statistic name @@ -334,7 +334,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``exp_fit`` - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. -.. list-table:: Features for use with exp_fit - statistic-features +.. list-table:: statistic-features :widths: 25 75 :header-rows: 1 @@ -353,7 +353,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``chirp_mass`` - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. -.. list-table:: Keywords - statistic-keywords +.. list-table:: statistic-keywords :widths: 25 75 :header-rows: 1 From 674a1967f99af1bce541b10ff06551a3bb3bc8c5 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 16 Feb 2024 07:49:25 -0800 Subject: [PATCH 19/32] Fix a bunch of line-too-long errors --- pycbc/events/ranking.py | 64 ++++++++++++++++++++++------------------- pycbc/io/hdf.py | 2 +- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index a23e8c97875..aad4a76caf5 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,7 +7,8 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argument +def effsnr(snr, reduced_x2, fac=250., + **kwargs): # pylint:disable=unused-argument """Calculate the effective SNR statistic. See (S5y1 paper) for definition. """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) @@ -21,7 +22,8 @@ def effsnr(snr, reduced_x2, fac=250., **kwargs): # pylint:disable=unused-argume return esnr[0] -def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argument +def newsnr(snr, reduced_x2, q=6., n=2., + **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -40,7 +42,8 @@ def newsnr(snr, reduced_x2, q=6., n=2., **kwargs): # pylint:disable=unused-argu return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument +def newsnr_sgveto(snr, brchisq, sgchisq, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" nsnr = numpy.array( newsnr( @@ -61,7 +64,7 @@ def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-arg def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument + min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this @@ -86,10 +89,9 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, - brchisq_threshold=10.0, - psd_var_val_threshold=10.0, - **kwargs): # pylint:disable=unused-argument + min_expected_psdvar=0.65, brchisq_threshold=10.0, + psd_var_val_threshold=10.0, + **kwargs): # pylint:disable=unused-argument """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options @@ -115,8 +117,8 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65, - **kwargs): # pylint:disable=unused-argument + scaling=0.33, min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD variation statistic. """ nsnr = numpy.array( @@ -140,7 +142,7 @@ def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0, **kwargs): # pylint:disable=unused-argument + threshold=2.0, **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ @@ -228,7 +230,7 @@ def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate snr re-weighted by Allen chisq, sine-gaussian veto and psd variation statistic @@ -255,7 +257,8 @@ def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_threshold(trigs, + **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -281,7 +284,8 @@ def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unuse return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled(trigs, + **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -308,7 +312,8 @@ def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-a return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, + **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -344,23 +349,24 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disabl 'newsnr_sgveto_psdvar': get_newsnr_sgveto_psdvar, 'newsnr_sgveto_psdvar_threshold': get_newsnr_sgveto_psdvar_threshold, 'newsnr_sgveto_psdvar_scaled': get_newsnr_sgveto_psdvar_scaled, - 'newsnr_sgveto_psdvar_scaled_threshold': get_newsnr_sgveto_psdvar_scaled_threshold, + 'newsnr_sgveto_psdvar_scaled_threshold': \ + get_newsnr_sgveto_psdvar_scaled_threshold, } # Lists of datasets required in the trigs object for each function -required_datasets = {} -required_datasets['snr'] = ['snr'] -required_datasets['newsnr'] = required_datasets['snr'] + ['chisq', 'chisq_dof'] -required_datasets['new_snr'] = required_datasets['newsnr'] -required_datasets['newsnr_sgveto'] = required_datasets['newsnr'] + ['sg_chisq'] -required_datasets['newsnr_sgveto_psdvar'] = \ - required_datasets['newsnr_sgveto'] + ['psd_var_val'] -required_datasets['newsnr_sgveto_psdvar_threshold'] = \ - required_datasets['newsnr_sgveto_psdvar'] -required_datasets['newsnr_sgveto_psdvar_scaled'] = \ - required_datasets['newsnr_sgveto_psdvar'] -required_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ - required_datasets['newsnr_sgveto_psdvar'] +reqd_datasets = {} +reqd_datasets['snr'] = ['snr'] +reqd_datasets['newsnr'] = reqd_datasets['snr'] + ['chisq', 'chisq_dof'] +reqd_datasets['new_snr'] = reqd_datasets['newsnr'] +reqd_datasets['newsnr_sgveto'] = reqd_datasets['newsnr'] + ['sg_chisq'] +reqd_datasets['newsnr_sgveto_psdvar'] = \ + reqd_datasets['newsnr_sgveto'] + ['psd_var_val'] +reqd_datasets['newsnr_sgveto_psdvar_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index e5d61afe4a1..c98e09fcafa 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -576,7 +576,7 @@ def __init__(self, trig_file, detector, bank_file=None, veto_file=None, logger.info("Applying threshold of %.3f on %s", filter_threshold, filter_rank) fcn_dsets = (ranking.sngls_ranking_function_dict[filter_rank], - ranking.required_datasets[filter_rank]) + ranking.reqd_datasets[filter_rank]) idx, _ = self.trigs_f.select( lambda rank: rank > filter_threshold, filter_rank, From 94add88483585be8a0d2aab9c19647354404668e Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 11 Mar 2024 06:26:44 -0700 Subject: [PATCH 20/32] lines-too-long --- pycbc/events/ranking.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index aad4a76caf5..4d746de1fb7 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -8,7 +8,7 @@ def effsnr(snr, reduced_x2, fac=250., - **kwargs): # pylint:disable=unused-argument + **kwargs): # pylint:disable=unused-argument """Calculate the effective SNR statistic. See (S5y1 paper) for definition. """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) @@ -23,7 +23,7 @@ def effsnr(snr, reduced_x2, fac=250., def newsnr(snr, reduced_x2, q=6., n=2., - **kwargs): # pylint:disable=unused-argument + **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -43,7 +43,7 @@ def newsnr(snr, reduced_x2, q=6., n=2., def newsnr_sgveto(snr, brchisq, sgchisq, - **kwargs): # pylint:disable=unused-argument + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" nsnr = numpy.array( newsnr( @@ -64,7 +64,8 @@ def newsnr_sgveto(snr, brchisq, sgchisq, def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, **kwargs): # pylint:disable=unused-argument + min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this @@ -89,9 +90,10 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, brchisq_threshold=10.0, - psd_var_val_threshold=10.0, - **kwargs): # pylint:disable=unused-argument + min_expected_psdvar=0.65, + brchisq_threshold=10.0, + psd_var_val_threshold=10.0, + **kwargs): # pylint:disable=unused-argument """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options @@ -117,8 +119,8 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65, - **kwargs): # pylint:disable=unused-argument + scaling=0.33, min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD variation statistic. """ nsnr = numpy.array( @@ -142,7 +144,8 @@ def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0, **kwargs): # pylint:disable=unused-argument + threshold=2.0, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ @@ -349,8 +352,8 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, 'newsnr_sgveto_psdvar': get_newsnr_sgveto_psdvar, 'newsnr_sgveto_psdvar_threshold': get_newsnr_sgveto_psdvar_threshold, 'newsnr_sgveto_psdvar_scaled': get_newsnr_sgveto_psdvar_scaled, - 'newsnr_sgveto_psdvar_scaled_threshold': \ - get_newsnr_sgveto_psdvar_scaled_threshold, + 'newsnr_sgveto_psdvar_scaled_threshold': + get_newsnr_sgveto_psdvar_scaled_threshold, } # Lists of datasets required in the trigs object for each function From 022997df20f01bc0e1822138cf6f65dd84b725fe Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 11 Mar 2024 08:14:44 -0700 Subject: [PATCH 21/32] These tables are annoying me --- docs/workflow/pycbc_make_offline_search_workflow.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 6ce930b9c5c..1b8e17ede9d 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -315,12 +315,12 @@ Here we are doing exact match coincidence. So we take the light travel time betw How triggers are ranked is defined by the ranking-statistic, sngl-ranking, statistic-features and statistic-keywords options. - ``sngl-ranking`` = The ranking used for single-detector triggers, this is generally a re-weighting of the SNR. - - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See ranking-statistic table below for the options. - - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the statistic-features table below. - - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the statistic-keywords table below. + - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See Ranking Statistic table below for the options. + - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the Statistic Features table below. + - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Statistic Keywords table below. - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. -.. list-table:: ranking-statistic +.. list-table:: Ranking Statistic :widths: 25 75 :header-rows: 1 * - Statistic name @@ -334,7 +334,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``exp_fit`` - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. -.. list-table:: statistic-features +.. list-table:: Statistic Features :widths: 25 75 :header-rows: 1 @@ -353,7 +353,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati * - ``chirp_mass`` - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. -.. list-table:: statistic-keywords +.. list-table:: Statistic Keywords :widths: 25 75 :header-rows: 1 From 6800377d09e4abcb52616794f102c1dbe2d8d60b Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 11 Mar 2024 08:27:48 -0700 Subject: [PATCH 22/32] CC again --- pycbc/events/ranking.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 4d746de1fb7..d733f444296 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -233,7 +233,7 @@ def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate snr re-weighted by Allen chisq, sine-gaussian veto and psd variation statistic @@ -260,8 +260,7 @@ def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs, - **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -287,8 +286,7 @@ def get_newsnr_sgveto_psdvar_threshold(trigs, return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled(trigs, - **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -315,8 +313,7 @@ def get_newsnr_sgveto_psdvar_scaled(trigs, return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, - **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the From 745eaf1c7e52211c623e7e4540c7ca2d2d0fe24d Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Tue, 12 Mar 2024 05:21:01 -0700 Subject: [PATCH 23/32] Fix errors in the tables --- .../pycbc_make_offline_search_workflow.rst | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 1b8e17ede9d..53366262dcd 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -323,6 +323,7 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati .. list-table:: Ranking Statistic :widths: 25 75 :header-rows: 1 + * - Statistic name - Description * - ``quadsum`` @@ -357,20 +358,20 @@ How triggers are ranked is defined by the ranking-statistic, sngl-ranking, stati :widths: 25 75 :header-rows: 1 - * - Keyword - - Description - * - ``benchmark_lograte`` - - This is a numerical factor to be subtracted from the log rate ratio in order to alter the dynamic range. Default -14.6. - * - ``minimum_statistic_cutoff`` - - Cutoff for the statistic in order to avoid underflowing and very small statistic values. Default -30. - * - ``alpha_below_thresh`` - - The fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates. For Gaussian noise, this will be approximately 6 (the default). To use whatever the fit value is, supply alpha_below_thresh:None. - * - ``reference_ifos`` - - If using the ``sensitive_volume``feature, these are the detectors used to determine the benchmark value by which the sensitive volume is compared. We use the median sensitive volume in the network of detectors supplied. Default H1,L1. - * - ``max_chirp_mass`` - - If using the ``chirp_mass`` feature, this chirp mass defines a maximum weighting which can be applied to the statistic. - * - ``sngl_ranking_*`` - - This is used to provide the keyword arguments to functions in :xref:`the events.ranking module`. For example, to use a different psdvar threshold in the newsnr_sgveto_psdvar_threshold function, we would use ``sngl_ranking_psd_var_val_threshold:10``. + * - Keyword + - Description + * - ``benchmark_lograte`` + - This is a numerical factor to be subtracted from the log rate ratio in order to alter the dynamic range. Default -14.6. + * - ``minimum_statistic_cutoff`` + - Cutoff for the statistic in order to avoid underflowing and very small statistic values. Default -30. + * - ``alpha_below_thresh`` + - The fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates. For Gaussian noise, this will be approximately 6 (the default). To use whatever the fit value is, supply alpha_below_thresh:None. + * - ``reference_ifos`` + - If using the ``sensitive_volume`` feature, these are the detectors used to determine the benchmark value by which the sensitive volume is compared. We use the median sensitive volume in the network of detectors supplied. Default H1,L1. + * - ``max_chirp_mass`` + - If using the ``chirp_mass`` feature, this chirp mass defines a maximum weighting which can be applied to the statistic. + * - ``sngl_ranking_*`` + - This is used to provide the keyword arguments to functions in `the events.ranking module `_. For example, to use a different psdvar threshold in the newsnr_sgveto_psdvar_threshold function, we would use ``sngl_ranking_psd_var_val_threshold:10``. :: From 26992ae758c19bc373e875c75e194e1dc77b0c3c Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 25 Mar 2024 09:05:32 -0700 Subject: [PATCH 24/32] run black on pycbc/events/stat.py --- pycbc/events/stat.py | 588 ++++++++++++++++++++++++------------------- 1 file changed, 323 insertions(+), 265 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index dfdd17c8a1b..01ea9501634 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -36,15 +36,15 @@ from .eventmgr_cython import logsignalrateinternals_computepsignalbins from .eventmgr_cython import logsignalrateinternals_compute2detrate -logger = logging.getLogger('pycbc.events.stat') +logger = logging.getLogger("pycbc.events.stat") _allowed_statistic_features = [ - 'phasetd', - 'kde', - 'dq', - 'chirp_mass', - 'sensitive_volume', - 'normalize_fit_rate', + "phasetd", + "kde", + "dq", + "chirp_mass", + "sensitive_volume", + "normalize_fit_rate", ] @@ -71,13 +71,15 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.files = {} files = files or [] for filename in files: - with h5py.File(filename, 'r') as f: - stat = f.attrs['stat'] - if hasattr(stat, 'decode'): + with h5py.File(filename, "r") as f: + stat = f.attrs["stat"] + if hasattr(stat, "decode"): stat = stat.decode() if stat in self.files: - raise RuntimeError("We already have one file with stat attr =" - " %s. Can't provide more than one!" % stat) + raise RuntimeError( + "We already have one file with stat attr = " + "%s. Can't provide more than one!" % stat + ) logger.info("Found file %s for stat %s", filename, stat) self.files[stat] = filename # Keep track of when stat files hashes so it can be @@ -98,7 +100,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.sngl_ranking_kwargs = {} self.kwargs = {} for key, value in kwargs.items(): - if key.startswith('sngl_ranking_'): + if key.startswith("sngl_ranking_"): # Note that all the sngl_ranking keywords are floats, # so this conversion is safe - if that changes in the future, # we may need something more sophisticated @@ -182,12 +184,10 @@ def get_sngl_ranking(self, trigs): The array of single detector values """ return ranking.get_sngls_ranking_from_trigs( - trigs, - self.sngl_ranking, - **self.sngl_ranking_kwargs + trigs, self.sngl_ranking, **self.sngl_ranking_kwargs ) - def single(self, trigs): # pylint:disable=unused-argument + def single(self, trigs): # pylint:disable=unused-argument """ Calculate the necessary single detector information @@ -205,7 +205,7 @@ def single(self, trigs): # pylint:disable=unused-argument err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_single(self, single_info): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): # pylint:disable=unused-argument """ Calculate the statistic for a single detector candidate @@ -224,8 +224,9 @@ def rank_stat_single(self, single_info): # pylint:disable=unused-argument err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. """ @@ -254,8 +255,9 @@ def _check_coinc_lim_subclass(self, allowed_names): err_msg += "list of allowed_names above." raise NotImplementedError(err_msg) - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -309,8 +311,9 @@ def rank_stat_single(self, single_info): """ return single_info[1] - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, sngls_list, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. @@ -329,14 +332,15 @@ def rank_stat_coinc(self, sngls_list, slide, step, to_shift, numpy.ndarray Array of coincident ranking statistic values """ - cstat = sum(sngl[1] ** 2. for sngl in sngls_list) ** 0.5 + cstat = sum(sngl[1] ** 2.0 for sngl in sngls_list) ** 0.5 # For single-detector "cuts" the single ranking is set to -1 for sngls in sngls_list: cstat[sngls == -1] = 0 return cstat - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -360,12 +364,12 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, exceed thresh. """ # Safety against subclassing and not rethinking this - allowed_names = ['QuadratureSumStatistic'] + allowed_names = ["QuadratureSumStatistic"] self._check_coinc_lim_subclass(allowed_names) - s0 = thresh ** 2. - sum(sngl[1] ** 2. for sngl in s) + s0 = thresh**2.0 - sum(sngl[1] ** 2.0 for sngl in s) s0[s0 < 0] = 0 - return s0 ** 0.5 + return s0**0.5 class PhaseTDStatistic(QuadratureSumStatistic): @@ -376,8 +380,14 @@ class PhaseTDStatistic(QuadratureSumStatistic): amplitude ratios between triggers in different ifos. """ - def __init__(self, sngl_ranking, files=None, ifos=None, - pregenerate_hist=True, **kwargs): + def __init__( + self, + sngl_ranking, + files=None, + ifos=None, + pregenerate_hist=True, + **kwargs, + ): """ Parameters ---------- @@ -397,21 +407,22 @@ def __init__(self, sngl_ranking, files=None, ifos=None, If False, do not pregenerate histogram on class instantiation. Default is True. """ - QuadratureSumStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) + QuadratureSumStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) self.single_dtype = [ - ('snglstat', numpy.float32), - ('coa_phase', numpy.float32), - ('end_time', numpy.float64), - ('sigmasq', numpy.float32), - ('snr', numpy.float32) + ("snglstat", numpy.float32), + ("coa_phase", numpy.float32), + ("end_time", numpy.float64), + ("sigmasq", numpy.float32), + ("snr", numpy.float32), ] # Assign attribute so that it can be replaced with other functions self.has_hist = False self.hist_ifos = None - self.ref_snr = 5. + self.ref_snr = 5.0 self.relsense = {} self.swidth = self.pwidth = self.twidth = None self.srbmin = self.srbmax = None @@ -419,7 +430,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, self.pdtype = [] self.weights = {} self.param_bin = {} - self.two_det_flag = (len(ifos) == 2) + self.two_det_flag = len(ifos) == 2 self.two_det_weights = {} # Some memory self.pdif = numpy.zeros(128, dtype=numpy.float64) @@ -455,8 +466,8 @@ def get_hist(self, ifos=None): for name in self.files: # Pick out the statistic files that provide phase / time/ amp # relationships and match to the ifos in use - if 'phasetd_newsnr' in name: - ifokey = name.split('_')[2] + if "phasetd_newsnr" in name: + ifokey = name.split("_")[2] num = len(ifokey) / 2 if num != len(ifos): continue @@ -486,32 +497,32 @@ def get_hist(self, ifos=None): weights = {} param = {} - with h5py.File(self.files[selected], 'r') as histfile: - self.hist_ifos = histfile.attrs['ifos'] + with h5py.File(self.files[selected], "r") as histfile: + self.hist_ifos = histfile.attrs["ifos"] # Patch for pre-hdf5=3.0 histogram files try: logger.info("Decoding hist ifos ..") - self.hist_ifos = [i.decode('UTF-8') for i in self.hist_ifos] + self.hist_ifos = [i.decode("UTF-8") for i in self.hist_ifos] except (UnicodeDecodeError, AttributeError): pass # Histogram bin attributes - self.twidth = histfile.attrs['twidth'] - self.pwidth = histfile.attrs['pwidth'] - self.swidth = histfile.attrs['swidth'] - self.srbmin = histfile.attrs['srbmin'] - self.srbmax = histfile.attrs['srbmax'] - relfac = histfile.attrs['sensitivity_ratios'] + self.twidth = histfile.attrs["twidth"] + self.pwidth = histfile.attrs["pwidth"] + self.swidth = histfile.attrs["swidth"] + self.srbmin = histfile.attrs["srbmin"] + self.srbmax = histfile.attrs["srbmax"] + relfac = histfile.attrs["sensitivity_ratios"] for ifo in self.hist_ifos: - weights[ifo] = histfile[ifo]['weights'][:] - param[ifo] = histfile[ifo]['param_bin'][:] + weights[ifo] = histfile[ifo]["weights"][:] + param[ifo] = histfile[ifo]["param_bin"][:] n_ifos = len(self.hist_ifos) bin_volume = (self.twidth * self.pwidth * self.swidth) ** (n_ifos - 1) - self.hist_max = - 1. * numpy.inf + self.hist_max = -1.0 * numpy.inf # Read histogram for each ifo, to use if that ifo has smallest SNR in # the coinc @@ -525,11 +536,14 @@ def get_hist(self, ifos=None): if param[ifo].dtype == numpy.int8: # Older style, incorrectly sorted histogram file ncol = param[ifo].shape[1] - self.pdtype = [('c%s' % i, param[ifo].dtype) for i in range(ncol)] - self.param_bin[ifo] = numpy.zeros(len(self.weights[ifo]), - dtype=self.pdtype) + self.pdtype = [ + ("c%s" % i, param[ifo].dtype) for i in range(ncol) + ] + self.param_bin[ifo] = numpy.zeros( + len(self.weights[ifo]), dtype=self.pdtype + ) for i in range(ncol): - self.param_bin[ifo]['c%s' % i] = param[ifo][:, i] + self.param_bin[ifo]["c%s" % i] = param[ifo][:, i] lsort = self.param_bin[ifo].argsort() self.param_bin[ifo] = self.param_bin[ifo][lsort] @@ -562,32 +576,42 @@ def get_hist(self, ifos=None): # "weights" table a O(N) rather than O(NlogN) operation. It # sacrifices RAM to do this, so is a good tradeoff for 2 # detectors, but not for 3! - if not hasattr(self, 'c0_size'): + if not hasattr(self, "c0_size"): self.c0_size = {} self.c1_size = {} self.c2_size = {} self.c0_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c0']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c0"]).max() + 1) ) self.c1_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c1']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c1"]).max() + 1) ) self.c2_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c2']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c2"]).max() + 1) ) - array_size = [self.c0_size[ifo], self.c1_size[ifo], - self.c2_size[ifo]] + array_size = [ + self.c0_size[ifo], + self.c1_size[ifo], + self.c2_size[ifo], + ] dtypec = self.weights[ifo].dtype - self.two_det_weights[ifo] = \ + self.two_det_weights[ifo] = ( numpy.zeros(array_size, dtype=dtypec) + self.max_penalty - id0 = self.param_bin[ifo]['c0'].astype(numpy.int32) \ + ) + id0 = ( + self.param_bin[ifo]["c0"].astype(numpy.int32) + self.c0_size[ifo] // 2 - id1 = self.param_bin[ifo]['c1'].astype(numpy.int32) \ + ) + id1 = ( + self.param_bin[ifo]["c1"].astype(numpy.int32) + self.c1_size[ifo] // 2 - id2 = self.param_bin[ifo]['c2'].astype(numpy.int32) \ + ) + id2 = ( + self.param_bin[ifo]["c2"].astype(numpy.int32) + self.c2_size[ifo] // 2 + ) self.two_det_weights[ifo][id0, id1, id2] = self.weights[ifo] for ifo, sense in zip(self.hist_ifos, relfac): @@ -647,12 +671,14 @@ def logsignalrate(self, stats, shift, to_shift): # Figure out which ifo of the contributing ifos has the smallest SNR, # to use as reference for choosing the signal histogram. - snrs = numpy.array([numpy.array(stats[ifo]['snr'], ndmin=1) - for ifo in self.ifos]) + snrs = numpy.array( + [numpy.array(stats[ifo]["snr"], ndmin=1) for ifo in self.ifos] + ) smin = snrs.argmin(axis=0) # Store a list of the triggers using each ifo as reference - rtypes = {ifo: numpy.where(smin == j)[0] - for j, ifo in enumerate(self.ifos)} + rtypes = { + ifo: numpy.where(smin == j)[0] for j, ifo in enumerate(self.ifos) + } # Get reference ifo information rate = numpy.zeros(len(shift), dtype=numpy.float32) @@ -715,13 +741,13 @@ def logsignalrate(self, stats, shift, to_shift): self.swidth, to_shift[ref_ifo], to_shift[ifo], - length + length, ) binned += [ self.tbin[:length], self.pbin[:length], - self.sbin[:length] + self.sbin[:length], ] # Read signal weight from precalculated histogram @@ -740,7 +766,7 @@ def logsignalrate(self, stats, shift, to_shift): self.two_det_weights[ref_ifo], self.max_penalty, self.ref_snr, - len(rtype) + len(rtype), ) else: # Low[er]-RAM, high[er]-CPU option for >two det @@ -748,7 +774,7 @@ def logsignalrate(self, stats, shift, to_shift): # Convert binned to same dtype as stored in hist nbinned = numpy.zeros(len(binned[1]), dtype=self.pdtype) for i, b in enumerate(binned): - nbinned[f'c{i}'] = b + nbinned[f"c{i}"] = b loc = numpy.searchsorted(self.param_bin[ref_ifo], nbinned) loc[loc == len(self.weights[ref_ifo])] = 0 @@ -756,12 +782,12 @@ def logsignalrate(self, stats, shift, to_shift): # These weren't in our histogram so give them max penalty # instead of random value - missed = numpy.where( - self.param_bin[ref_ifo][loc] != nbinned - )[0] + missed = numpy.where(self.param_bin[ref_ifo][loc] != nbinned)[ + 0 + ] rate[rtype[missed]] = self.max_penalty # Scale by signal population SNR - rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4. + rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4.0 return numpy.log(rate) @@ -785,11 +811,11 @@ def single(self, trigs): """ sngl_stat = self.get_sngl_ranking(trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] + singles["snglstat"] = sngl_stat + singles["coa_phase"] = trigs["coa_phase"][:] + singles["end_time"] = trigs["end_time"][:] + singles["sigmasq"] = trigs["sigmasq"][:] + singles["snr"] = trigs["snr"][:] return numpy.array(singles, ndmin=1) def rank_stat_single(self, single_info): @@ -810,30 +836,32 @@ def rank_stat_single(self, single_info): numpy.ndarray The array of single detector statistics """ - return single_info[1]['snglstat'] + return single_info[1]["snglstat"] - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, sngls_list, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic, defined in Eq 2 of [Nitz et al, 2017](https://doi.org/10.3847/1538-4357/aa8f50). """ - rstat = sum(s[1]['snglstat'] ** 2 for s in sngls_list) - cstat = rstat + 2. * self.logsignalrate(dict(sngls_list), - slide * step, - to_shift) + rstat = sum(s[1]["snglstat"] ** 2 for s in sngls_list) + cstat = rstat + 2.0 * self.logsignalrate( + dict(sngls_list), slide * step, to_shift + ) cstat[cstat < 0] = 0 - return cstat ** 0.5 + return cstat**0.5 - def coinc_lim_for_thresh(self, sngls_list, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, sngls_list, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest. Calculate the required single detector statistic to exceed the threshold for each of the input triggers. """ # Safety against subclassing and not rethinking this - allowed_names = ['PhaseTDStatistic'] + allowed_names = ["PhaseTDStatistic"] self._check_coinc_lim_subclass(allowed_names) if not self.has_hist: @@ -844,9 +872,9 @@ def coinc_lim_for_thresh(self, sngls_list, thresh, limifo, ) s1 = thresh ** 2. - fixed_statsq # Assume best case scenario and use maximum signal rate - s1 -= 2. * self.hist_max + s1 -= 2.0 * self.hist_max s1[s1 < 0] = 0 - return s1 ** 0.5 + return s1**0.5 class ExpFitStatistic(PhaseTDStatistic): @@ -880,17 +908,23 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): if not files: raise RuntimeError("Files not specified") - PhaseTDStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) + PhaseTDStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) # Get the single-detector rates fit files # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' - parsed_attrs = [f.split('-') for f in self.files.keys()] - self.bg_ifos = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'fit_coeffs')] + parsed_attrs = [f.split("-") for f in self.files.keys()] + self.bg_ifos = [ + at[0] + for at in parsed_attrs + if (len(at) == 2 and at[1] == "fit_coeffs") + ] if not len(self.bg_ifos): - raise RuntimeError("None of the statistic files has the required " - "attribute called {ifo}-fit_coeffs !") + raise RuntimeError( + "None of the statistic files has the required " + "attribute called {ifo}-fit_coeffs !" + ) # Get the single-detector rates fit values self.fits_by_tid = {} @@ -906,46 +940,47 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.min_snr = numpy.inf # Some modifiers for the statistic to get it into a nice range - self.benchmark_lograte = \ - float(self.kwargs.get('benchmark_lograte', -14.6)) - self.min_stat = \ - float(self.kwargs.get('minimum_statistic_cutoff', -30.)) + self.benchmark_lograte = float( + self.kwargs.get("benchmark_lograte", -14.6) + ) + self.min_stat = float( + self.kwargs.get("minimum_statistic_cutoff", -30.0) + ) # Go through the keywords and add class information as needed: - if self.kwargs['sensitive_volume']: + if self.kwargs["sensitive_volume"]: # Add network sensitivity beckmark - self.single_dtype.append(('benchmark_logvol', numpy.float32)) + self.single_dtype.append(("benchmark_logvol", numpy.float32)) # benchmark_logvol is a benchmark sensitivity array # over template id - ref_ifos = self.kwargs.get('reference_ifos', 'H1,L1').split(',') + ref_ifos = self.kwargs.get("reference_ifos", "H1,L1").split(",") hl_net_med_sigma = numpy.amin( - [self.fits_by_tid[ifo]['median_sigma'] - for ifo in ref_ifos], - axis=0 + [self.fits_by_tid[ifo]["median_sigma"] for ifo in ref_ifos], + axis=0, ) - self.benchmark_logvol = 3. * numpy.log(hl_net_med_sigma) + self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) self.curr_tnum = None - if self.kwargs['dq']: + if self.kwargs["dq"]: # Reweight the noise rate by the dq reweighting factor self.dq_rates_by_state = {} self.dq_bin_by_tid = {} self.dq_state_segments = {} for ifo in self.ifos: - key = f'{ifo}-dq_stat_info' + key = f"{ifo}-dq_stat_info" if key in self.files.keys(): self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) self.dq_state_segments[ifo] = self.setup_segments(key) - if self.kwargs['chirp_mass']: + if self.kwargs["chirp_mass"]: # Reweight the signal rate by the chirp mass of the template # This may be stored as a float, so cast just in case - self.mcm = float(self.kwargs.get('max_chirp_mass', numpy.inf)) + self.mcm = float(self.kwargs.get("max_chirp_mass", numpy.inf)) self.curr_mchirp = None - if self.kwargs['kde']: + if self.kwargs["kde"]: # Reweight the signal rate by a weighting factor from the KDE of # a signal population normalised by expected relative rate of noise # triggers for a template @@ -971,13 +1006,13 @@ def assign_template_bins(self, key): bin_dict: dict of strs Dictionary containing the bin name for each template id """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: tids = [] bin_nums = [] - bin_grp = dq_file[f'{ifo}/bins'] + bin_grp = dq_file[f"{ifo}/bins"] for bin_name in bin_grp.keys(): - bin_tids = bin_grp[f'{bin_name}/tids'][:] + bin_tids = bin_grp[f"{bin_name}/tids"][:] tids = list(tids) + list(bin_tids.astype(int)) bin_nums = list(bin_nums) + list([bin_name] * len(bin_tids)) @@ -1001,12 +1036,12 @@ def assign_dq_rates(self, key): and the dq value for each individual bin. """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - bin_grp = dq_file[f'{ifo}/bins'] + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: + bin_grp = dq_file[f"{ifo}/bins"] dq_dict = {} for bin_name in bin_grp.keys(): - dq_dict[bin_name] = bin_grp[f'{bin_name}/dq_rates'][:] + dq_dict[bin_name] = bin_grp[f"{bin_name}/dq_rates"][:] return dq_dict @@ -1016,16 +1051,16 @@ def setup_segments(self, key): If they are, we are running offline and need to store them If they aren't, we are running online """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: ifo_grp = dq_file[ifo] dq_state_segs_dict = {} - for k in ifo_grp['dq_segments'].keys(): + for k in ifo_grp["dq_segments"].keys(): seg_dict = {} - seg_dict['start'] = \ - ifo_grp[f'dq_segments/{k}/segment_starts'][:] - seg_dict['end'] = \ - ifo_grp[f'dq_segments/{k}/segment_ends'][:] + seg_dict["start"] = ifo_grp[f"dq_segments/{k}/segment_starts"][ + : + ] + seg_dict["end"] = ifo_grp[f"dq_segments/{k}/segment_ends"][:] dq_state_segs_dict[k] = seg_dict return dq_state_segs_dict @@ -1036,12 +1071,12 @@ def find_dq_noise_rate(self, trigs, dq_state): try: tnum = trigs.template_num except AttributeError: - tnum = trigs['template_id'] + tnum = trigs["template_id"] try: ifo = trigs.ifo except AttributeError: - ifo = trigs['ifo'] + ifo = trigs["ifo"] assert len(numpy.unique(ifo)) == 1 # Should be exactly one ifo provided ifo = ifo[0] @@ -1049,7 +1084,7 @@ def find_dq_noise_rate(self, trigs, dq_state): dq_val = numpy.zeros(len(dq_state)) if ifo in self.dq_rates_by_state: - for (i, st) in enumerate(dq_state): + for i, st in enumerate(dq_state): if isinstance(tnum, numpy.ndarray): bin_name = self.dq_bin_by_tid[ifo][tnum[i]] else: @@ -1062,9 +1097,10 @@ def find_dq_state_by_time(self, ifo, times): dq_state = numpy.zeros(len(times), dtype=numpy.uint8) if ifo in self.dq_state_segments: from pycbc.events.veto import indices_within_times + for k in self.dq_state_segments[ifo]: - starts = self.dq_state_segments[ifo][k]['start'] - ends = self.dq_state_segments[ifo][k]['end'] + starts = self.dq_state_segments[ifo][k]["start"] + ends = self.dq_state_segments[ifo][k]["end"] inds = indices_within_times(times, starts, ends) # states are named in file as 'dq_state_N', need to extract N dq_state[inds] = int(k[9:]) @@ -1085,43 +1121,50 @@ def assign_fits(self, ifo): A dictionary containing the fit information in the `alpha`, `rate` and `thresh` keys. """ - coeff_file = h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') - template_id = coeff_file['template_id'][:] + coeff_file = h5py.File(self.files[f"{ifo}-fit_coeffs"], "r") + template_id = coeff_file["template_id"][:] # the template_ids and fit coeffs are stored in an arbitrary order # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) fits_by_tid_dict = {} - fits_by_tid_dict['smoothed_fit_coeff'] = \ - coeff_file['fit_coeff'][:][tid_sort] - fits_by_tid_dict['smoothed_rate_above_thresh'] = \ - coeff_file['count_above_thresh'][:][tid_sort].astype(float) - fits_by_tid_dict['smoothed_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) - if self.kwargs['sensitive_volume']: - fits_by_tid_dict['median_sigma'] = \ - coeff_file['median_sigma'][:][tid_sort].astype(float) + fits_by_tid_dict["smoothed_fit_coeff"] = coeff_file["fit_coeff"][:][ + tid_sort + ] + fits_by_tid_dict["smoothed_rate_above_thresh"] = coeff_file[ + "count_above_thresh" + ][:][tid_sort].astype(float) + fits_by_tid_dict["smoothed_rate_in_template"] = coeff_file[ + "count_in_template" + ][:][tid_sort].astype(float) + if self.kwargs["sensitive_volume"]: + fits_by_tid_dict["median_sigma"] = coeff_file["median_sigma"][:][ + tid_sort + ].astype(float) # The by-template fits may have been stored in the smoothed fits file - if 'fit_by_template' in coeff_file: - coeff_fbt = coeff_file['fit_by_template'] - fits_by_tid_dict['fit_by_fit_coeff'] = \ - coeff_fbt['fit_coeff'][:][tid_sort] - fits_by_tid_dict['fit_by_rate_above_thresh'] = \ - coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) - fits_by_tid_dict['fit_by_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) - - if self.kwargs['normalize_fit_rate']: - analysis_time = float(coeff_file.attrs['analysis_time']) - fits_by_tid_dict['smoothed_rate_above_thresh'] /= analysis_time - fits_by_tid_dict['smoothed_rate_in_template'] /= analysis_time - if 'fit_by_template' in coeff_file: - fits_by_tid_dict['fit_by_rate_above_thresh'] /= analysis_time - fits_by_tid_dict['fit_by_rate_in_template'] /= analysis_time + if "fit_by_template" in coeff_file: + coeff_fbt = coeff_file["fit_by_template"] + fits_by_tid_dict["fit_by_fit_coeff"] = coeff_fbt["fit_coeff"][:][ + tid_sort + ] + fits_by_tid_dict["fit_by_rate_above_thresh"] = coeff_fbt[ + "count_above_thresh" + ][:][tid_sort].astype(float) + fits_by_tid_dict["fit_by_rate_in_template"] = coeff_file[ + "count_in_template" + ][:][tid_sort].astype(float) + + if self.kwargs["normalize_fit_rate"]: + analysis_time = float(coeff_file.attrs["analysis_time"]) + fits_by_tid_dict["smoothed_rate_above_thresh"] /= analysis_time + fits_by_tid_dict["smoothed_rate_in_template"] /= analysis_time + if "fit_by_template" in coeff_file: + fits_by_tid_dict["fit_by_rate_above_thresh"] /= analysis_time + fits_by_tid_dict["fit_by_rate_in_template"] /= analysis_time # Keep the fit threshold in fits_by_tid - fits_by_tid_dict['thresh'] = coeff_file.attrs['stat_threshold'] + fits_by_tid_dict["thresh"] = coeff_file.attrs["stat_threshold"] coeff_file.close() @@ -1199,9 +1242,9 @@ def find_fits(self, trigs): # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id - alphai = self.fits_by_tid[ifo]['smoothed_fit_coeff'][tnum] - ratei = self.fits_by_tid[ifo]['smoothed_rate_above_thresh'][tnum] - thresh = self.fits_by_tid[ifo]['thresh'] + alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][tnum] + ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][tnum] + thresh = self.fits_by_tid[ifo]["thresh"] return alphai, ratei, thresh @@ -1211,12 +1254,16 @@ def find_kdes(self): """ # The stat file attributes are hard-coded as 'signal-kde_file' # and 'template-kde_file' - parsed_attrs = [f.split('-') for f in self.files.keys()] - self.kde_names = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'kde_file')] - assert sorted(self.kde_names) == ['signal', 'template'], \ - "Two stat files are required, they should have stat attr " \ + parsed_attrs = [f.split("-") for f in self.files.keys()] + self.kde_names = [ + at[0] + for at in parsed_attrs + if (len(at) == 2 and at[1] == "kde_file") + ] + assert sorted(self.kde_names) == ["signal", "template"], ( + "Two stat files are required, they should have stat attr " "'signal-kde_file' and 'template-kde_file' respectively" + ) def assign_kdes(self, kname): """ @@ -1227,8 +1274,8 @@ def assign_kdes(self, kname): kname: str Used to label the kde files. """ - with h5py.File(self.files[kname + '-kde_file'], 'r') as kde_file: - self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:] + with h5py.File(self.files[kname + "-kde_file"], "r") as kde_file: + self.kde_by_tid[kname + "_kdevals"] = kde_file["data_kde"][:] def kde_ratio(self): """ @@ -1259,35 +1306,41 @@ def lognoiserate(self, trigs): """ alphai, ratei, thresh = self.find_fits(trigs) sngl_stat = self.get_sngl_ranking(trigs) - lognoisel = - alphai * (sngl_stat - thresh) + numpy.log(alphai) + \ - numpy.log(ratei) + lognoisel = ( + -alphai * (sngl_stat - thresh) + + numpy.log(alphai) + + numpy.log(ratei) + ) - alphabelow = self.kwargs.get('alpha_below_thresh', 6.) + alphabelow = self.kwargs.get("alpha_below_thresh", 6.0) try: alphabelow = float(alphabelow) # Above the threshold we use the usual fit coefficient (alphai) # below threshold use specified alphabelow bt = sngl_stat < thresh - lognoiselbt = - alphabelow * (sngl_stat - thresh) + \ - numpy.log(alphabelow) + numpy.log(ratei) + lognoiselbt = ( + -alphabelow * (sngl_stat - thresh) + + numpy.log(alphabelow) + + numpy.log(ratei) + ) lognoisel[bt] = lognoiselbt[bt] except ValueError: # The float conversion will raise this if passed e.g. 'None' # In that case we simply use the fit value everywhere pass - if self.kwargs['dq']: + if self.kwargs["dq"]: # Reweight the lognoiserate things by the dq reweighting factor # make sure every trig has a dq state try: ifo = trigs.ifo except AttributeError: - ifo = trigs['ifo'] + ifo = trigs["ifo"] assert len(numpy.unique(ifo)) == 1 # Should be exactly one ifo provided ifo = ifo[0] - dq_state = self.find_dq_state_by_time(ifo, trigs['end_time'][:]) + dq_state = self.find_dq_state_by_time(ifo, trigs["end_time"][:]) dq_rate = self.find_dq_noise_rate(trigs, dq_state) dq_rate = numpy.maximum(dq_rate, 1) lognoisel += numpy.log(dq_rate) @@ -1316,37 +1369,38 @@ def single(self, trigs): sngl_stat = self.lognoiserate(trigs) # populate other fields to calculate phase/time/amp consistency singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['snr'] = trigs['snr'][:] + singles["snglstat"] = sngl_stat + singles["coa_phase"] = trigs["coa_phase"][:] + singles["end_time"] = trigs["end_time"][:] + singles["snr"] = trigs["snr"][:] # Save info about best-case scenario for use later - self.min_snr = min(singles['snr'].min(), self.min_snr) + self.min_snr = min(singles["snr"].min(), self.min_snr) - if self.kwargs['sensitive_volume']: + if self.kwargs["sensitive_volume"]: # populate fields to allow sensitive volume factor calculation - singles['sigmasq'] = trigs['sigmasq'][:] + singles["sigmasq"] = trigs["sigmasq"][:] try: # exists if accessed via coinc_findtrigs self.curr_tnum = trigs.template_num except AttributeError: # exists for SingleDetTriggers - self.curr_tnum = trigs['template_id'] + self.curr_tnum = trigs["template_id"] # Should only be one ifo fit file provided assert len(self.ifos) == 1 # Store benchmark log volume as single-ifo information since # the ranking methods do not have access to template id - singles['benchmark_logvol'] = \ - self.benchmark_logvol[self.curr_tnum] + singles["benchmark_logvol"] = self.benchmark_logvol[self.curr_tnum] # Save info about the best-case scenario for use later: - max_sigsq = numpy.max(singles['sigmasq']) + max_sigsq = numpy.max(singles["sigmasq"]) self.max_sigmasq = max(max_sigsq, self.max_sigmasq) - if self.kwargs['chirp_mass']: + if self.kwargs["chirp_mass"]: from pycbc.conversions import mchirp_from_mass1_mass2 - self.curr_mchirp = mchirp_from_mass1_mass2(trigs.param['mass1'], - trigs.param['mass2']) + + self.curr_mchirp = mchirp_from_mass1_mass2( + trigs.param["mass1"], trigs.param["mass2"] + ) return numpy.array(singles, ndmin=1) def sensitive_volume_factor(self, sngls): @@ -1373,15 +1427,14 @@ def sensitive_volume_factor(self, sngls): # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo network_sigmasq = numpy.amin( - [sngl[1]['sigmasq'] for sngl in sngls], - axis=0 + [sngl[1]["sigmasq"] for sngl in sngls], axis=0 ) # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) # Get benchmark log volume as single-ifo information : # benchmark_logvol for a given template is not ifo-dependent, so # choose the first ifo for convenience - benchmark_logvol = sngls[0][1]['benchmark_logvol'] + benchmark_logvol = sngls[0][1]["benchmark_logvol"] network_logvol -= benchmark_logvol return network_logvol @@ -1405,17 +1458,17 @@ def logsignalrate_shared(self, sngls_info): """ # Other features affecting the signal rate sr_factor = 0 - if self.kwargs['sensitive_volume']: + if self.kwargs["sensitive_volume"]: # Sensitive volume - this is proportional to signal rate # assuming a homogeneous universe sr_factor += self.sensitive_volume_factor(sngls_info) - if self.kwargs['chirp_mass']: + if self.kwargs["chirp_mass"]: # chirp mass reweighting mchirp = min(self.curr_mchirp, self.mcm) - sr_factor += numpy.log((mchirp / 20.) ** (11. / 3.)) + sr_factor += numpy.log((mchirp / 20.0) ** (11.0 / 3.0)) - if self.kwargs['kde']: + if self.kwargs["kde"]: # KDE reweighting sr_factor += self.kde_ratio() @@ -1439,11 +1492,11 @@ def rank_stat_single(self, single_info): sngls = single_info[1] # Basic noise rate: the exp fit rate from the single statistic - ln_noise_rate = sngls['snglstat'] + ln_noise_rate = sngls["snglstat"] ln_noise_rate -= self.benchmark_lograte # Basic signal rate: snr ** -4 - ln_s = -4 * numpy.log(sngls['snr'] / self.ref_snr) + ln_s = -4 * numpy.log(sngls["snr"] / self.ref_snr) # Add in the feature-dependent terms to the signal rate ln_s += self.logsignalrate_shared((single_info,)) @@ -1454,8 +1507,9 @@ def rank_stat_single(self, single_info): loglr[loglr < self.min_stat] = self.min_stat return loglr - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. @@ -1483,26 +1537,26 @@ def rank_stat_coinc(self, s, slide, step, to_shift, """ # ranking statistic is -ln(expected rate density of noise triggers) # plus normalization constant - sngl_dict = {sngl[0]: sngl[1]['snglstat'] for sngl in s} + sngl_dict = {sngl[0]: sngl[1]["snglstat"] for sngl in s} # Basic noise rate: sum of noise rates multiplied by the # window they can form coincidences in ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs['time_addition']) + sngl_dict, kwargs["time_addition"] + ) ln_noise_rate -= self.benchmark_lograte # Basic option is not to have any signal-based assumptions, # so this is zero to begin with ln_s = 0 - if self.kwargs['phasetd']: + if self.kwargs["phasetd"]: # Find total volume of phase-time-amplitude space occupied by # noise coincs, so that the logsignalrate function is properly # normalized # Extent of time-difference space occupied noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, - kwargs['time_addition'] + self.hist_ifos, kwargs["time_addition"] ) # Volume is the allowed time difference window, multiplied by 2pi # for each phase difference dimension and by allowed range of SNR @@ -1510,8 +1564,9 @@ def rank_stat_coinc(self, s, slide, step, to_shift, # dimensions for both phase and SNR n_ifos = len(self.hist_ifos) snr_range = (self.srbmax - self.srbmin) * self.swidth - hist_vol = noise_twindow * \ - (2. * numpy.pi * snr_range) ** (n_ifos - 1) + hist_vol = noise_twindow * (2.0 * numpy.pi * snr_range) ** ( + n_ifos - 1 + ) # Noise PDF is 1/volume, assuming a uniform distribution of noise # coincs ln_noise_rate -= numpy.log(hist_vol) @@ -1531,8 +1586,9 @@ def rank_stat_coinc(self, s, slide, step, to_shift, return loglr - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -1568,22 +1624,21 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, in order to reach the threshold specified """ # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitStatistic'] + allowed_names = ["ExpFitStatistic"] self._check_coinc_lim_subclass(allowed_names) if thresh <= self.min_stat: - return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf + return numpy.ones(len(s[0][1]["snglstat"])) * numpy.inf # Modify the sngls so that the pivot ifo snglstats are zero - sngl_dict = {sngl[0]: sngl[1]['snglstat'] for sngl in s} + sngl_dict = {sngl[0]: sngl[1]["snglstat"] for sngl in s} sngl_dict[limifo] = numpy.zeros(len(s[0][1])) # Noise rate calculated as normal. Because of the modification # above, this is the rank_stat_coinc noise rate calculation # minus the pivot ifo's snglstat ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, - kwargs['time_addition'] + sngl_dict, kwargs["time_addition"] ) ln_noise_rate -= self.benchmark_lograte @@ -1591,12 +1646,13 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, # so this is zero to begin with ln_s = 0 - if self.kwargs['phasetd']: + if self.kwargs["phasetd"]: if not self.has_hist: self.get_hist() # Assume best-case scenario and use maximum signal rate - ln_s = numpy.log(self.hist_max - * (self.min_snr / self.ref_snr) ** -4.) + ln_s = numpy.log( + self.hist_max * (self.min_snr / self.ref_snr) ** -4.0 + ) # Shared info is the same as in the coinc calculation ln_s += self.logsignalrate_shared(s) @@ -1633,10 +1689,11 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): ifos: list of strs, not used here The list of detector names """ - ExpFitStatistic.__init__(self, sngl_ranking, files=files, ifos=ifos, - **kwargs) + ExpFitStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) # for low-mass templates the exponential slope alpha \approx 6 - self.alpharef = 6. + self.alpharef = 6.0 self.single_increasing = True self.single_dtype = numpy.float32 @@ -1657,7 +1714,7 @@ def single(self, trigs): logr_n = self.lognoiserate(trigs) _, _, thresh = self.find_fits(trigs) # shift by log of reference slope alpha - logr_n += -1. * numpy.log(self.alpharef) + logr_n += -1.0 * numpy.log(self.alpharef) # add threshold and rescale by reference slope stat = thresh - (logr_n / self.alpharef) return numpy.array(stat, ndmin=1, dtype=numpy.float32) @@ -1680,11 +1737,12 @@ def rank_stat_single(self, single_info): if self.single_increasing: sngl_multiifo = single_info[1] else: - sngl_multiifo = -1. * single_info[1] + sngl_multiifo = -1.0 * single_info[1] return sngl_multiifo - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. @@ -1706,8 +1764,9 @@ def rank_stat_coinc(self, s, slide, step, to_shift, # scale by 1/sqrt(number of ifos) to resemble network SNR return sum(sngl[1] for sngl in s) / (len(s) ** 0.5) - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -1731,18 +1790,18 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, exceed thresh. """ # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitCombinedSNR'] + allowed_names = ["ExpFitCombinedSNR"] self._check_coinc_lim_subclass(allowed_names) return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) statistic_dict = { - 'quadsum': QuadratureSumStatistic, - 'single_ranking_only': QuadratureSumStatistic, - 'phasetd': PhaseTDStatistic, - 'exp_fit_csnr': ExpFitCombinedSNR, - 'exp_fit': ExpFitStatistic, + "quadsum": QuadratureSumStatistic, + "single_ranking_only": QuadratureSumStatistic, + "phasetd": PhaseTDStatistic, + "exp_fit_csnr": ExpFitCombinedSNR, + "exp_fit": ExpFitStatistic, } @@ -1768,7 +1827,7 @@ def get_statistic(stat): try: return statistic_dict[stat] except KeyError: - raise RuntimeError('%s is not an available detection statistic' % stat) + raise RuntimeError("%s is not an available detection statistic" % stat) def insert_statistic_option_group(parser, default_ranking_statistic=None): @@ -1800,41 +1859,41 @@ def insert_statistic_option_group(parser, default_ranking_statistic=None): default=default_ranking_statistic, choices=statistic_dict.keys(), required=True if default_ranking_statistic is None else False, - help="The coinc ranking statistic to calculate" + help="The coinc ranking statistic to calculate", ) statistic_opt_group.add_argument( "--sngl-ranking", choices=ranking.sngls_ranking_function_dict.keys(), required=True, - help="The single-detector trigger ranking to use." + help="The single-detector trigger ranking to use.", ) statistic_opt_group.add_argument( "--statistic-files", - nargs='*', - action='append', + nargs="*", + action="append", default=[], - help="Files containing ranking statistic info" + help="Files containing ranking statistic info", ) statistic_opt_group.add_argument( "--statistic-keywords", - nargs='*', + nargs="*", default=[], help="Provide additional key-word arguments to be sent to " - "the statistic class when it is initialized. Should " - "be given in format --statistic-keywords " - "KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ..." + "the statistic class when it is initialized. Should " + "be given in format --statistic-keywords " + "KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ...", ) statistic_opt_group.add_argument( "--statistic-features", - nargs='*', + nargs="*", default=[], choices=_allowed_statistic_features, help="Provide additional arguments to include particular " - "features in the ranking statistic." + "features in the ranking statistic.", ) return statistic_opt_group @@ -1871,12 +1930,14 @@ def parse_statistic_feature_options(stat_features, stat_kwarg_list): for inputstr in stat_kwarg_list: try: - key, value = inputstr.split(':') + key, value = inputstr.split(":") stat_kwarg_dict[key] = value except ValueError: - err_txt = "--statistic-keywords must take input in the " \ - "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \ - "Received {}".format(' '.join(stat_kwarg_list)) + err_txt = ( + "--statistic-keywords must take input in the " + "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " + "Received {}".format(" ".join(stat_kwarg_list)) + ) raise ValueError(err_txt) return stat_kwarg_dict @@ -1920,10 +1981,7 @@ def get_statistic_from_opts(opts, ifos): ) stat_class = get_statistic(opts.ranking_statistic)( - opts.sngl_ranking, - opts.statistic_files, - ifos=ifos, - **extra_kwargs + opts.sngl_ranking, opts.statistic_files, ifos=ifos, **extra_kwargs ) return stat_class From d61b9973bb96867f8193dab1404dec13942fe7af Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 18 Apr 2024 06:03:54 -0700 Subject: [PATCH 25/32] Start getting recent stat changes into module --- pycbc/events/stat.py | 62 ++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 01ea9501634..105ad1b4cd4 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -947,6 +947,9 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.kwargs.get("minimum_statistic_cutoff", -30.0) ) + # This will be used to keep track of the template number being used + self.curr_tnum = None + # Go through the keywords and add class information as needed: if self.kwargs["sensitive_volume"]: # Add network sensitivity beckmark @@ -959,7 +962,6 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): axis=0, ) self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) - self.curr_tnum = None if self.kwargs["dq"]: # Reweight the noise rate by the dq reweighting factor @@ -1068,11 +1070,6 @@ def setup_segments(self, key): def find_dq_noise_rate(self, trigs, dq_state): """Get dq values for a specific ifo and dq states""" - try: - tnum = trigs.template_num - except AttributeError: - tnum = trigs["template_id"] - try: ifo = trigs.ifo except AttributeError: @@ -1085,10 +1082,10 @@ def find_dq_noise_rate(self, trigs, dq_state): if ifo in self.dq_rates_by_state: for i, st in enumerate(dq_state): - if isinstance(tnum, numpy.ndarray): - bin_name = self.dq_bin_by_tid[ifo][tnum[i]] + if isinstance(self.curr_tnum, numpy.ndarray): + bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum[i]] else: - bin_name = self.dq_bin_by_tid[ifo][tnum] + bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum] dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] return dq_val @@ -1242,8 +1239,8 @@ def find_fits(self, trigs): # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id - alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][tnum] - ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][tnum] + alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][self.curr_tnum] + ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][self.curr_tnum] thresh = self.fits_by_tid[ifo]["thresh"] return alphai, ratei, thresh @@ -1304,6 +1301,15 @@ def lognoiserate(self, trigs): lognoisel: numpy.array Array of log noise rate density for each input trigger. """ + # What is the template number currently being used? + try: + # exists if accessed via coinc_findtrigs, this is a number + self.curr_tnum = trigs.template_num + except AttributeError: + # exists for SingleDetTriggers & pycbc_live get_coinc, + # this is a numpy array + self.curr_tnum = trigs["template_id"] + alphai, ratei, thresh = self.find_fits(trigs) sngl_stat = self.get_sngl_ranking(trigs) lognoisel = ( @@ -1379,14 +1385,6 @@ def single(self, trigs): if self.kwargs["sensitive_volume"]: # populate fields to allow sensitive volume factor calculation singles["sigmasq"] = trigs["sigmasq"][:] - try: - # exists if accessed via coinc_findtrigs - self.curr_tnum = trigs.template_num - except AttributeError: - # exists for SingleDetTriggers - self.curr_tnum = trigs["template_id"] - # Should only be one ifo fit file provided - assert len(self.ifos) == 1 # Store benchmark log volume as single-ifo information since # the ranking methods do not have access to template id singles["benchmark_logvol"] = self.benchmark_logvol[self.curr_tnum] @@ -1398,9 +1396,14 @@ def single(self, trigs): if self.kwargs["chirp_mass"]: from pycbc.conversions import mchirp_from_mass1_mass2 - self.curr_mchirp = mchirp_from_mass1_mass2( - trigs.param["mass1"], trigs.param["mass2"] - ) + try: + mass1 = trigs.param['mass1'] + mass2 = trigs.param['mass2'] + except AttributeError: + mass1 = trigs['mass1'] + mass2 = trigs['mass2'] + self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) + return numpy.array(singles, ndmin=1) def sensitive_volume_factor(self, sngls): @@ -1465,7 +1468,11 @@ def logsignalrate_shared(self, sngls_info): if self.kwargs["chirp_mass"]: # chirp mass reweighting - mchirp = min(self.curr_mchirp, self.mcm) + if isinstance(self.curr_mchirp, numpy.ndarray): + mchirp = numpy.minimum(self.curr_mchirp, self.mcm) + else: + # curr_mchirp will be a number + mchirp = min(self.curr_mchirp, self.mcm) sr_factor += numpy.log((mchirp / 20.0) ** (11.0 / 3.0)) if self.kwargs["kde"]: @@ -1542,8 +1549,11 @@ def rank_stat_coinc( # Basic noise rate: sum of noise rates multiplied by the # window they can form coincidences in ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs["time_addition"] + sngl_dict, + kwargs["time_addition"], + dets=kwargs.get('dets', None), ) + ln_noise_rate -= self.benchmark_lograte # Basic option is not to have any signal-based assumptions, @@ -1556,7 +1566,9 @@ def rank_stat_coinc( # normalized # Extent of time-difference space occupied noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs["time_addition"] + self.hist_ifos, + kwargs["time_addition"], + dets=kwargs.get('dets', None), ) # Volume is the allowed time difference window, multiplied by 2pi # for each phase difference dimension and by allowed range of SNR From 2bb9a205f2659a682ef2116a834f7358ab6b90e7 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Thu, 18 Apr 2024 06:10:58 -0700 Subject: [PATCH 26/32] fixes post-rebase --- pycbc/events/stat.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 105ad1b4cd4..2848dd881f0 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -1222,13 +1222,6 @@ def find_fits(self, trigs): thresh: float or numpy array The thresh fit value(s) """ - try: - # Exists where trigs is a class with the template num attribute - tnum = trigs.template_num - except AttributeError: - # Exists where trigs is dict-like - tnum = trigs['template_id'] - try: ifo = trigs.ifo except AttributeError: From 9564e5d2564bf0d92aac14b8104fc63d73cfa8f1 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 20 May 2024 08:32:11 -0700 Subject: [PATCH 27/32] run black on pycbc/events/ranking in order to try and get codeclimate to be quiet --- pycbc/events/ranking.py | 228 ++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 127 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index d733f444296..1c62792b0a3 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,23 +7,20 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250., - **kwargs): # pylint:disable=unused-argument - """Calculate the effective SNR statistic. See (S5y1 paper) for definition. - """ +def effsnr(snr, reduced_x2, fac=250.0, **kwargs): # pylint:disable=unused-argument + """Calculate the effective SNR statistic. See (S5y1 paper) for definition.""" snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) rchisq = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) - esnr = snr / (1 + snr ** 2 / fac) ** 0.25 / rchisq ** 0.25 + esnr = snr / (1 + snr**2 / fac) ** 0.25 / rchisq**0.25 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return esnr else: return esnr[0] -def newsnr(snr, reduced_x2, q=6., n=2., - **kwargs): # pylint:disable=unused-argument +def newsnr(snr, reduced_x2, q=6.0, n=2.0, **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -32,69 +29,63 @@ def newsnr(snr, reduced_x2, q=6., n=2., reduced_x2 = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) # newsnr is only different from snr if reduced chisq > 1 - ind = numpy.where(reduced_x2 > 1.)[0] - nsnr[ind] *= (0.5 * (1. + reduced_x2[ind] ** (q/n))) ** (-1./q) + ind = numpy.where(reduced_x2 > 1.0)[0] + nsnr[ind] *= (0.5 * (1.0 + reduced_x2[ind] ** (q / n))) ** (-1.0 / q) # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq, - **kwargs): # pylint:disable=unused-argument - """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" - nsnr = numpy.array( - newsnr( - snr, - brchisq, - **kwargs), - ndmin=1) +def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument + """Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" + nsnr = numpy.array(newsnr(snr, brchisq, **kwargs), ndmin=1) sgchisq = numpy.array(sgchisq, ndmin=1) t = numpy.array(sgchisq > 4, ndmin=1) if len(t): nsnr[t] = nsnr[t] / (sgchisq[t] / 4.0) ** 0.5 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, - **kwargs): # pylint:disable=unused-argument - """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and +def newsnr_sgveto_psdvar( + snr, brchisq, sgchisq, psd_var_val, min_expected_psdvar=0.65, **kwargs +): # pylint:disable=unused-argument + """Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this # being used in the statistic. This low value might arise because a # significant fraction of the "short" PSD period was gated (for instance). psd_var_val = numpy.array(psd_var_val, copy=True) - psd_var_val[psd_var_val < min_expected_psdvar] = 1. - scaled_snr = snr * (psd_var_val ** -0.5) - scaled_brchisq = brchisq * (psd_var_val ** -1.) - nsnr = newsnr_sgveto( - scaled_snr, - scaled_brchisq, - sgchisq, - **kwargs - ) + psd_var_val[psd_var_val < min_expected_psdvar] = 1.0 + scaled_snr = snr * (psd_var_val**-0.5) + scaled_brchisq = brchisq * (psd_var_val**-1.0) + nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq, **kwargs) # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65, - brchisq_threshold=10.0, - psd_var_val_threshold=10.0, - **kwargs): # pylint:disable=unused-argument - """ newsnr_sgveto_psdvar with thresholds applied. +def newsnr_sgveto_psdvar_threshold( + snr, + brchisq, + sgchisq, + psd_var_val, + min_expected_psdvar=0.65, + brchisq_threshold=10.0, + psd_var_val_threshold=10.0, + **kwargs +): # pylint:disable=unused-argument + """newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options to threshold on chi-squared or PSD variation. @@ -108,59 +99,47 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, **kwargs ) nsnr = numpy.array(nsnr, ndmin=1) - nsnr[brchisq > brchisq_threshold] = 1. - nsnr[psd_var_val > psd_var_val_threshold] = 1. + nsnr[brchisq > brchisq_threshold] = 1.0 + nsnr[psd_var_val > psd_var_val_threshold] = 1.0 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65, - **kwargs): # pylint:disable=unused-argument - """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD - variation statistic. """ - nsnr = numpy.array( - newsnr_sgveto( - snr, - brchisq, - sgchisq, - **kwargs), - ndmin=1) +def newsnr_sgveto_psdvar_scaled( + snr, brchisq, sgchisq, psd_var_val, scaling=0.33, min_expected_psdvar=0.65, **kwargs +): # pylint:disable=unused-argument + """Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD + variation statistic.""" + nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq, **kwargs), ndmin=1) psd_var_val = numpy.array(psd_var_val, ndmin=1, copy=True) - psd_var_val[psd_var_val < min_expected_psdvar] = 1. + psd_var_val[psd_var_val < min_expected_psdvar] = 1.0 # Default scale is 0.33 as tuned from analysis of data from O2 chunks - nsnr = nsnr / psd_var_val ** scaling + nsnr = nsnr / psd_var_val**scaling # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0, - **kwargs): # pylint:disable=unused-argument - """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and +def newsnr_sgveto_psdvar_scaled_threshold( + snr, bchisq, sgchisq, psd_var_val, threshold=2.0, **kwargs +): # pylint:disable=unused-argument + """Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ - nsnr = newsnr_sgveto_psdvar_scaled( - snr, - bchisq, - sgchisq, - psd_var_val, - **kwargs - ) + nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val, **kwargs) nsnr = numpy.array(nsnr, ndmin=1) - nsnr[bchisq > threshold] = 1. + nsnr[bchisq > threshold] = 1.0 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, '__len__'): + if hasattr(snr, "__len__"): return nsnr else: return nsnr[0] @@ -181,7 +160,7 @@ def get_snr(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of snr values """ - return numpy.array(trigs['snr'][:], ndmin=1, dtype=numpy.float32) + return numpy.array(trigs["snr"][:], ndmin=1, dtype=numpy.float32) def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument @@ -199,12 +178,8 @@ def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr = newsnr( - trigs['snr'][:], - trigs['chisq'][:] / dof, - **kwargs - ) + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + nsnr = newsnr(trigs["snr"][:], trigs["chisq"][:] / dof, **kwargs) return numpy.array(nsnr, ndmin=1, dtype=numpy.float32) @@ -223,12 +198,9 @@ def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 nsnr_sg = newsnr_sgveto( - trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - **kwargs + trigs["snr"][:], trigs["chisq"][:] / dof, trigs["sg_chisq"][:], **kwargs ) return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) @@ -249,18 +221,20 @@ def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 nsnr_sg_psd = newsnr_sgveto_psdvar( - trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], + trigs["snr"][:], + trigs["chisq"][:] / dof, + trigs["sg_chisq"][:], + trigs["psd_var_val"][:], **kwargs ) return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_threshold( + trigs, **kwargs +): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -276,11 +250,12 @@ def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unuse numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 nsnr_sg_psdt = newsnr_sgveto_psdvar_threshold( - trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], + trigs["snr"][:], + trigs["chisq"][:] / dof, + trigs["sg_chisq"][:], + trigs["psd_var_val"][:], **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) @@ -302,18 +277,20 @@ def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-a numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 nsnr_sg_psdscale = newsnr_sgveto_psdvar_scaled( - trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], + trigs["snr"][:], + trigs["chisq"][:] / dof, + trigs["sg_chisq"][:], + trigs["psd_var_val"][:], **kwargs ) return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled_threshold( + trigs, **kwargs +): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -330,43 +307,40 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disabl numpy.ndarray Array of newsnr values """ - dof = 2. * trigs['chisq_dof'][:] - 2. + dof = 2.0 * trigs["chisq_dof"][:] - 2.0 nsnr_sg_psdt = newsnr_sgveto_psdvar_scaled_threshold( - trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:], + trigs["snr"][:], + trigs["chisq"][:] / dof, + trigs["sg_chisq"][:], + trigs["psd_var_val"][:], **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) sngls_ranking_function_dict = { - 'snr': get_snr, - 'newsnr': get_newsnr, - 'new_snr': get_newsnr, - 'newsnr_sgveto': get_newsnr_sgveto, - 'newsnr_sgveto_psdvar': get_newsnr_sgveto_psdvar, - 'newsnr_sgveto_psdvar_threshold': get_newsnr_sgveto_psdvar_threshold, - 'newsnr_sgveto_psdvar_scaled': get_newsnr_sgveto_psdvar_scaled, - 'newsnr_sgveto_psdvar_scaled_threshold': - get_newsnr_sgveto_psdvar_scaled_threshold, + "snr": get_snr, + "newsnr": get_newsnr, + "new_snr": get_newsnr, + "newsnr_sgveto": get_newsnr_sgveto, + "newsnr_sgveto_psdvar": get_newsnr_sgveto_psdvar, + "newsnr_sgveto_psdvar_threshold": get_newsnr_sgveto_psdvar_threshold, + "newsnr_sgveto_psdvar_scaled": get_newsnr_sgveto_psdvar_scaled, + "newsnr_sgveto_psdvar_scaled_threshold": get_newsnr_sgveto_psdvar_scaled_threshold, } # Lists of datasets required in the trigs object for each function reqd_datasets = {} -reqd_datasets['snr'] = ['snr'] -reqd_datasets['newsnr'] = reqd_datasets['snr'] + ['chisq', 'chisq_dof'] -reqd_datasets['new_snr'] = reqd_datasets['newsnr'] -reqd_datasets['newsnr_sgveto'] = reqd_datasets['newsnr'] + ['sg_chisq'] -reqd_datasets['newsnr_sgveto_psdvar'] = \ - reqd_datasets['newsnr_sgveto'] + ['psd_var_val'] -reqd_datasets['newsnr_sgveto_psdvar_threshold'] = \ - reqd_datasets['newsnr_sgveto_psdvar'] -reqd_datasets['newsnr_sgveto_psdvar_scaled'] = \ - reqd_datasets['newsnr_sgveto_psdvar'] -reqd_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ - reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets["snr"] = ["snr"] +reqd_datasets["newsnr"] = reqd_datasets["snr"] + ["chisq", "chisq_dof"] +reqd_datasets["new_snr"] = reqd_datasets["newsnr"] +reqd_datasets["newsnr_sgveto"] = reqd_datasets["newsnr"] + ["sg_chisq"] +reqd_datasets["newsnr_sgveto_psdvar"] = reqd_datasets["newsnr_sgveto"] + ["psd_var_val"] +reqd_datasets["newsnr_sgveto_psdvar_threshold"] = reqd_datasets["newsnr_sgveto_psdvar"] +reqd_datasets["newsnr_sgveto_psdvar_scaled"] = reqd_datasets["newsnr_sgveto_psdvar"] +reqd_datasets["newsnr_sgveto_psdvar_scaled_threshold"] = reqd_datasets[ + "newsnr_sgveto_psdvar" +] def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): @@ -387,7 +361,7 @@ def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): try: sngl_func = sngls_ranking_function_dict[statname] except KeyError as exc: - err_msg = 'Single-detector ranking {} not recognized'.format(statname) + err_msg = "Single-detector ranking {} not recognized".format(statname) raise ValueError(err_msg) from exc # NOTE: In the sngl_funcs all the kwargs are explicitly stated, so any From e99b941676f9be40985df130f95f1a6eeadd84b2 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Mon, 20 May 2024 08:56:41 -0700 Subject: [PATCH 28/32] Revert "run black on pycbc/events/ranking in order to try and get codeclimate to be quiet" This reverts commit 4f082eae0c62a353cd6fd17f9d892f26c4ea0a4a. --- pycbc/events/ranking.py | 228 ++++++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 101 deletions(-) diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 1c62792b0a3..d733f444296 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,20 +7,23 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250.0, **kwargs): # pylint:disable=unused-argument - """Calculate the effective SNR statistic. See (S5y1 paper) for definition.""" +def effsnr(snr, reduced_x2, fac=250., + **kwargs): # pylint:disable=unused-argument + """Calculate the effective SNR statistic. See (S5y1 paper) for definition. + """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) rchisq = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) - esnr = snr / (1 + snr**2 / fac) ** 0.25 / rchisq**0.25 + esnr = snr / (1 + snr ** 2 / fac) ** 0.25 / rchisq ** 0.25 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return esnr else: return esnr[0] -def newsnr(snr, reduced_x2, q=6.0, n=2.0, **kwargs): # pylint:disable=unused-argument +def newsnr(snr, reduced_x2, q=6., n=2., + **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -29,63 +32,69 @@ def newsnr(snr, reduced_x2, q=6.0, n=2.0, **kwargs): # pylint:disable=unused-ar reduced_x2 = numpy.array(reduced_x2, ndmin=1, dtype=numpy.float64) # newsnr is only different from snr if reduced chisq > 1 - ind = numpy.where(reduced_x2 > 1.0)[0] - nsnr[ind] *= (0.5 * (1.0 + reduced_x2[ind] ** (q / n))) ** (-1.0 / q) + ind = numpy.where(reduced_x2 > 1.)[0] + nsnr[ind] *= (0.5 * (1. + reduced_x2[ind] ** (q/n))) ** (-1./q) # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq, **kwargs): # pylint:disable=unused-argument - """Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" - nsnr = numpy.array(newsnr(snr, brchisq, **kwargs), ndmin=1) +def newsnr_sgveto(snr, brchisq, sgchisq, + **kwargs): # pylint:disable=unused-argument + """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" + nsnr = numpy.array( + newsnr( + snr, + brchisq, + **kwargs), + ndmin=1) sgchisq = numpy.array(sgchisq, ndmin=1) t = numpy.array(sgchisq > 4, ndmin=1) if len(t): nsnr[t] = nsnr[t] / (sgchisq[t] / 4.0) ** 0.5 # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar( - snr, brchisq, sgchisq, psd_var_val, min_expected_psdvar=0.65, **kwargs -): # pylint:disable=unused-argument - """Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and +def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, + min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument + """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this # being used in the statistic. This low value might arise because a # significant fraction of the "short" PSD period was gated (for instance). psd_var_val = numpy.array(psd_var_val, copy=True) - psd_var_val[psd_var_val < min_expected_psdvar] = 1.0 - scaled_snr = snr * (psd_var_val**-0.5) - scaled_brchisq = brchisq * (psd_var_val**-1.0) - nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq, **kwargs) + psd_var_val[psd_var_val < min_expected_psdvar] = 1. + scaled_snr = snr * (psd_var_val ** -0.5) + scaled_brchisq = brchisq * (psd_var_val ** -1.) + nsnr = newsnr_sgveto( + scaled_snr, + scaled_brchisq, + sgchisq, + **kwargs + ) # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_threshold( - snr, - brchisq, - sgchisq, - psd_var_val, - min_expected_psdvar=0.65, - brchisq_threshold=10.0, - psd_var_val_threshold=10.0, - **kwargs -): # pylint:disable=unused-argument - """newsnr_sgveto_psdvar with thresholds applied. +def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, + min_expected_psdvar=0.65, + brchisq_threshold=10.0, + psd_var_val_threshold=10.0, + **kwargs): # pylint:disable=unused-argument + """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options to threshold on chi-squared or PSD variation. @@ -99,47 +108,59 @@ def newsnr_sgveto_psdvar_threshold( **kwargs ) nsnr = numpy.array(nsnr, ndmin=1) - nsnr[brchisq > brchisq_threshold] = 1.0 - nsnr[psd_var_val > psd_var_val_threshold] = 1.0 + nsnr[brchisq > brchisq_threshold] = 1. + nsnr[psd_var_val > psd_var_val_threshold] = 1. # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_scaled( - snr, brchisq, sgchisq, psd_var_val, scaling=0.33, min_expected_psdvar=0.65, **kwargs -): # pylint:disable=unused-argument - """Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD - variation statistic.""" - nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq, **kwargs), ndmin=1) +def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, + scaling=0.33, min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument + """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD + variation statistic. """ + nsnr = numpy.array( + newsnr_sgveto( + snr, + brchisq, + sgchisq, + **kwargs), + ndmin=1) psd_var_val = numpy.array(psd_var_val, ndmin=1, copy=True) - psd_var_val[psd_var_val < min_expected_psdvar] = 1.0 + psd_var_val[psd_var_val < min_expected_psdvar] = 1. # Default scale is 0.33 as tuned from analysis of data from O2 chunks - nsnr = nsnr / psd_var_val**scaling + nsnr = nsnr / psd_var_val ** scaling # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] -def newsnr_sgveto_psdvar_scaled_threshold( - snr, bchisq, sgchisq, psd_var_val, threshold=2.0, **kwargs -): # pylint:disable=unused-argument - """Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and +def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, + threshold=2.0, + **kwargs): # pylint:disable=unused-argument + """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ - nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val, **kwargs) + nsnr = newsnr_sgveto_psdvar_scaled( + snr, + bchisq, + sgchisq, + psd_var_val, + **kwargs + ) nsnr = numpy.array(nsnr, ndmin=1) - nsnr[bchisq > threshold] = 1.0 + nsnr[bchisq > threshold] = 1. # If snr input is float, return a float. Otherwise return numpy array. - if hasattr(snr, "__len__"): + if hasattr(snr, '__len__'): return nsnr else: return nsnr[0] @@ -160,7 +181,7 @@ def get_snr(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of snr values """ - return numpy.array(trigs["snr"][:], ndmin=1, dtype=numpy.float32) + return numpy.array(trigs['snr'][:], ndmin=1, dtype=numpy.float32) def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument @@ -178,8 +199,12 @@ def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 - nsnr = newsnr(trigs["snr"][:], trigs["chisq"][:] / dof, **kwargs) + dof = 2. * trigs['chisq_dof'][:] - 2. + nsnr = newsnr( + trigs['snr'][:], + trigs['chisq'][:] / dof, + **kwargs + ) return numpy.array(nsnr, ndmin=1, dtype=numpy.float32) @@ -198,9 +223,12 @@ def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg = newsnr_sgveto( - trigs["snr"][:], trigs["chisq"][:] / dof, trigs["sg_chisq"][:], **kwargs + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + **kwargs ) return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) @@ -221,20 +249,18 @@ def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg_psd = newsnr_sgveto_psdvar( - trigs["snr"][:], - trigs["chisq"][:] / dof, - trigs["sg_chisq"][:], - trigs["psd_var_val"][:], + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], **kwargs ) return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold( - trigs, **kwargs -): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -250,12 +276,11 @@ def get_newsnr_sgveto_psdvar_threshold( numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg_psdt = newsnr_sgveto_psdvar_threshold( - trigs["snr"][:], - trigs["chisq"][:] / dof, - trigs["sg_chisq"][:], - trigs["psd_var_val"][:], + trigs['snr'][:], trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) @@ -277,20 +302,18 @@ def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-a numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg_psdscale = newsnr_sgveto_psdvar_scaled( - trigs["snr"][:], - trigs["chisq"][:] / dof, - trigs["sg_chisq"][:], - trigs["psd_var_val"][:], + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], **kwargs ) return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold( - trigs, **kwargs -): # pylint:disable=unused-argument +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -307,40 +330,43 @@ def get_newsnr_sgveto_psdvar_scaled_threshold( numpy.ndarray Array of newsnr values """ - dof = 2.0 * trigs["chisq_dof"][:] - 2.0 + dof = 2. * trigs['chisq_dof'][:] - 2. nsnr_sg_psdt = newsnr_sgveto_psdvar_scaled_threshold( - trigs["snr"][:], - trigs["chisq"][:] / dof, - trigs["sg_chisq"][:], - trigs["psd_var_val"][:], + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) sngls_ranking_function_dict = { - "snr": get_snr, - "newsnr": get_newsnr, - "new_snr": get_newsnr, - "newsnr_sgveto": get_newsnr_sgveto, - "newsnr_sgveto_psdvar": get_newsnr_sgveto_psdvar, - "newsnr_sgveto_psdvar_threshold": get_newsnr_sgveto_psdvar_threshold, - "newsnr_sgveto_psdvar_scaled": get_newsnr_sgveto_psdvar_scaled, - "newsnr_sgveto_psdvar_scaled_threshold": get_newsnr_sgveto_psdvar_scaled_threshold, + 'snr': get_snr, + 'newsnr': get_newsnr, + 'new_snr': get_newsnr, + 'newsnr_sgveto': get_newsnr_sgveto, + 'newsnr_sgveto_psdvar': get_newsnr_sgveto_psdvar, + 'newsnr_sgveto_psdvar_threshold': get_newsnr_sgveto_psdvar_threshold, + 'newsnr_sgveto_psdvar_scaled': get_newsnr_sgveto_psdvar_scaled, + 'newsnr_sgveto_psdvar_scaled_threshold': + get_newsnr_sgveto_psdvar_scaled_threshold, } # Lists of datasets required in the trigs object for each function reqd_datasets = {} -reqd_datasets["snr"] = ["snr"] -reqd_datasets["newsnr"] = reqd_datasets["snr"] + ["chisq", "chisq_dof"] -reqd_datasets["new_snr"] = reqd_datasets["newsnr"] -reqd_datasets["newsnr_sgveto"] = reqd_datasets["newsnr"] + ["sg_chisq"] -reqd_datasets["newsnr_sgveto_psdvar"] = reqd_datasets["newsnr_sgveto"] + ["psd_var_val"] -reqd_datasets["newsnr_sgveto_psdvar_threshold"] = reqd_datasets["newsnr_sgveto_psdvar"] -reqd_datasets["newsnr_sgveto_psdvar_scaled"] = reqd_datasets["newsnr_sgveto_psdvar"] -reqd_datasets["newsnr_sgveto_psdvar_scaled_threshold"] = reqd_datasets[ - "newsnr_sgveto_psdvar" -] +reqd_datasets['snr'] = ['snr'] +reqd_datasets['newsnr'] = reqd_datasets['snr'] + ['chisq', 'chisq_dof'] +reqd_datasets['new_snr'] = reqd_datasets['newsnr'] +reqd_datasets['newsnr_sgveto'] = reqd_datasets['newsnr'] + ['sg_chisq'] +reqd_datasets['newsnr_sgveto_psdvar'] = \ + reqd_datasets['newsnr_sgveto'] + ['psd_var_val'] +reqd_datasets['newsnr_sgveto_psdvar_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): @@ -361,7 +387,7 @@ def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): try: sngl_func = sngls_ranking_function_dict[statname] except KeyError as exc: - err_msg = "Single-detector ranking {} not recognized".format(statname) + err_msg = 'Single-detector ranking {} not recognized'.format(statname) raise ValueError(err_msg) from exc # NOTE: In the sngl_funcs all the kwargs are explicitly stated, so any From e9f3c1f364bceec091f57a0febc94fb81cc9a0e1 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 28 Jun 2024 05:45:44 -0700 Subject: [PATCH 29/32] minor fixes --- pycbc/events/stat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 2848dd881f0..98ff2cd8825 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -782,9 +782,9 @@ def logsignalrate(self, stats, shift, to_shift): # These weren't in our histogram so give them max penalty # instead of random value - missed = numpy.where(self.param_bin[ref_ifo][loc] != nbinned)[ - 0 - ] + missed = numpy.where( + self.param_bin[ref_ifo][loc] != nbinned + )[0] rate[rtype[missed]] = self.max_penalty # Scale by signal population SNR rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4.0 @@ -867,10 +867,10 @@ def coinc_lim_for_thresh( if not self.has_hist: self.get_hist() - fixed_statsq = sum( - [b['snglstat'] ** 2 for a, b in sngls_list if a != limifo] + fixed_stat_sq = sum( + [b["snglstat"] ** 2 for a, b in sngls_list if a != limifo] ) - s1 = thresh ** 2. - fixed_statsq + s1 = thresh ** 2. - fixed_stat_sq # Assume best case scenario and use maximum signal rate s1 -= 2.0 * self.hist_max s1[s1 < 0] = 0 From 13489d7de27bc00c5695c43c089c17b52a1c1689 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Fri, 8 Nov 2024 05:48:41 -0800 Subject: [PATCH 30/32] Bring up to date with recent changes --- bin/all_sky_search/pycbc_sngls_findtrigs | 10 +- pycbc/events/stat.py | 186 ++++++++++++++++------- 2 files changed, 139 insertions(+), 57 deletions(-) diff --git a/bin/all_sky_search/pycbc_sngls_findtrigs b/bin/all_sky_search/pycbc_sngls_findtrigs index 304a00af785..9b2e3bfb76d 100644 --- a/bin/all_sky_search/pycbc_sngls_findtrigs +++ b/bin/all_sky_search/pycbc_sngls_findtrigs @@ -161,6 +161,11 @@ for i, tnum in enumerate(template_ids): "Calculating statistic in template %d out of %d", i, len(template_ids), ) + else: + logging.debug( + "Calculating statistic in template %d out of %d", + i, len(template_ids), + ) tids_uncut = trigs.set_template(tnum) trigger_keep_ids = cuts.apply_trigger_cuts(trigs, trigger_cut_dict, @@ -177,8 +182,7 @@ for i, tnum in enumerate(template_ids): # Stat class instance to calculate the ranking statistic sds = rank_method.single(trigs)[trigger_keep_ids] - stat_t = rank_method.rank_stat_single((ifo, sds), - **extra_kwargs) + stat_t = rank_method.rank_stat_single((ifo, sds)) trigger_times = trigs['end_time'][:][trigger_keep_ids] if args.cluster_window is not None: cid = coinc.cluster_over_time(stat_t, trigger_times, @@ -195,7 +199,7 @@ for i, tnum in enumerate(template_ids): # Perform decimation dec_facs = np.ones_like(template_ids_all) stat_all = np.array(stat_all) -template_ids_all = np.array(template_ids_all) +template_ids_all = np.array(template_ids_all, dtype=int) trigger_ids_all = np.array(trigger_ids_all) trigger_times_all = np.array(trigger_times_all) diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index 98ff2cd8825..52477914174 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -930,6 +930,8 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.fits_by_tid = {} for i in self.bg_ifos: self.fits_by_tid[i] = self.assign_fits(i) + if self.kwargs["normalize_fit_rate"]: + self.reassign_rate(i) # These are important for the coinc_lim_for_thresh method # Is the single threshold a < or > limit? @@ -947,6 +949,11 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.kwargs.get("minimum_statistic_cutoff", -30.0) ) + # Modifier to get a sensible value of the fit slope below threshold + self.alphabelow = float( + self.kwargs.get("alpha_below_thresh", numpy.inf) + ) + # This will be used to keep track of the template number being used self.curr_tnum = None @@ -967,14 +974,20 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): # Reweight the noise rate by the dq reweighting factor self.dq_rates_by_state = {} self.dq_bin_by_tid = {} - self.dq_state_segments = {} + self.dq_state_segments = None + self.low_latency = False + self.single_dtype.append(('dq_state', int)) for ifo in self.ifos: key = f"{ifo}-dq_stat_info" if key in self.files.keys(): self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) - self.dq_state_segments[ifo] = self.setup_segments(key) + self.check_low_latency(key) + if not self.low_latency: + if self.dq_state_segments is None: + self.dq_state_segments = {} + self.dq_state_segments[ifo] = self.setup_segments(key) if self.kwargs["chirp_mass"]: # Reweight the signal rate by the chirp mass of the template @@ -1049,9 +1062,7 @@ def assign_dq_rates(self, key): def setup_segments(self, key): """ - Check if segments definitions are in stat file - If they are, we are running offline and need to store them - If they aren't, we are running online + Store segments from stat file """ ifo = key.split("-")[0] with h5py.File(self.files[key], "r") as dq_file: @@ -1059,9 +1070,8 @@ def setup_segments(self, key): dq_state_segs_dict = {} for k in ifo_grp["dq_segments"].keys(): seg_dict = {} - seg_dict["start"] = ifo_grp[f"dq_segments/{k}/segment_starts"][ - : - ] + seg_dict["start"] = \ + ifo_grp[f"dq_segments/{k}/segment_starts"][:] seg_dict["end"] = ifo_grp[f"dq_segments/{k}/segment_ends"][:] dq_state_segs_dict[k] = seg_dict @@ -1070,23 +1080,15 @@ def setup_segments(self, key): def find_dq_noise_rate(self, trigs, dq_state): """Get dq values for a specific ifo and dq states""" - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs["ifo"] - assert len(numpy.unique(ifo)) == 1 - # Should be exactly one ifo provided - ifo = ifo[0] - - dq_val = numpy.zeros(len(dq_state)) + dq_val = numpy.ones(len(dq_state)) - if ifo in self.dq_rates_by_state: + if self.curr_ifo in self.dq_rates_by_state: for i, st in enumerate(dq_state): if isinstance(self.curr_tnum, numpy.ndarray): - bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum[i]] + bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum[i]] else: - bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum] - dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] + bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum] + dq_val[i] = self.dq_rates_by_state[self.curr_ifo][bin_name][st] return dq_val def find_dq_state_by_time(self, ifo, times): @@ -1103,6 +1105,53 @@ def find_dq_state_by_time(self, ifo, times): dq_state[inds] = int(k[9:]) return dq_state + def check_low_latency(self, key): + """ + Check if the statistic file indicates low latency mode. + Parameters + ---------- + key: str + Statistic file key string. + Returns + ------- + None + """ + ifo = key.split('-')[0] + with h5py.File(self.files[key], 'r') as dq_file: + ifo_grp = dq_file[ifo] + if 'dq_segments' not in ifo_grp.keys(): + # if segs are not in file, we must be in LL + if self.dq_state_segments is not None: + raise ValueError( + 'Either all dq stat files must have segments or none' + ) + self.low_latency = True + elif self.low_latency: + raise ValueError( + 'Either all dq stat files must have segments or none' + ) + + def reassign_rate(self, ifo): + """ + Reassign the rate to be number per time rather than an arbitrarily + normalised number. + + Parameters + ----------- + ifo: str + The ifo to consider. + """ + with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: + analysis_time = float(coeff_file.attrs['analysis_time']) + fbt = 'fit_by_template' in coeff_file + + self.fits_by_tid[ifo]['smoothed_rate_above_thresh'] /= analysis_time + self.fits_by_tid[ifo]['smoothed_rate_in_template'] /= analysis_time + # The by-template fits may have been stored in the smoothed fits file + if fbt: + self.fits_by_tid[ifo]['fit_by_rate_above_thresh'] /= analysis_time + self.fits_by_tid[ifo]['fit_by_rate_in_template'] /= analysis_time + def assign_fits(self, ifo): """ Extract fits from single-detector rate fit files @@ -1152,13 +1201,6 @@ def assign_fits(self, ifo): "count_in_template" ][:][tid_sort].astype(float) - if self.kwargs["normalize_fit_rate"]: - analysis_time = float(coeff_file.attrs["analysis_time"]) - fits_by_tid_dict["smoothed_rate_above_thresh"] /= analysis_time - fits_by_tid_dict["smoothed_rate_in_template"] /= analysis_time - if "fit_by_template" in coeff_file: - fits_by_tid_dict["fit_by_rate_above_thresh"] /= analysis_time - fits_by_tid_dict["fit_by_rate_in_template"] /= analysis_time # Keep the fit threshold in fits_by_tid fits_by_tid_dict["thresh"] = coeff_file.attrs["stat_threshold"] @@ -1173,11 +1215,18 @@ def update_file(self, key): If others are used (i.e. this statistic is inherited), they will need updated separately """ + # First - check if the phasetd file has been updated, this is + # inherited from the PhaseTDStatistic + if PhaseTDStatistic.update_file(self, key): + return True + if key.endswith('-fit_coeffs'): # This is a ExpFitStatistic file which needs updating # Which ifo is it? ifo = key[:2] self.fits_by_tid[ifo] = self.assign_fits(ifo) + if self.kwargs["normalize_fit_rate"]: + self.reassign_rate(ifo) self.get_ref_vals(ifo) logger.info( "Updating %s statistic %s file", @@ -1185,6 +1234,30 @@ def update_file(self, key): key ) return True + + # Is the key a KDE statistic file that we update here? + if key.endswith('kde_file'): + logger.info( + "Updating %s statistic %s file", + ''.join(self.ifos), + key + ) + kde_style = key.split('-')[0] + self.assign_kdes(kde_style) + return True + + # We also need to check if the DQ files have updated + if key.endswith('dq_stat_info'): + ifo = key.split('-')[0] + logger.info( + "Updating %s statistic %s file", + ifo, + key + ) + self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) + self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) + return True + return False def get_ref_vals(self, ifo): @@ -1311,38 +1384,16 @@ def lognoiserate(self, trigs): + numpy.log(ratei) ) - alphabelow = self.kwargs.get("alpha_below_thresh", 6.0) - try: - alphabelow = float(alphabelow) + if not numpy.isinf(self.alphabelow): # Above the threshold we use the usual fit coefficient (alphai) # below threshold use specified alphabelow bt = sngl_stat < thresh lognoiselbt = ( - -alphabelow * (sngl_stat - thresh) - + numpy.log(alphabelow) + -self.alphabelow * (sngl_stat - thresh) + + numpy.log(self.alphabelow) + numpy.log(ratei) ) lognoisel[bt] = lognoiselbt[bt] - except ValueError: - # The float conversion will raise this if passed e.g. 'None' - # In that case we simply use the fit value everywhere - pass - - if self.kwargs["dq"]: - # Reweight the lognoiserate things by the dq reweighting factor - # make sure every trig has a dq state - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs["ifo"] - assert len(numpy.unique(ifo)) == 1 - # Should be exactly one ifo provided - ifo = ifo[0] - - dq_state = self.find_dq_state_by_time(ifo, trigs["end_time"][:]) - dq_rate = self.find_dq_noise_rate(trigs, dq_state) - dq_rate = numpy.maximum(dq_rate, 1) - lognoisel += numpy.log(dq_rate) return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) @@ -1364,11 +1415,20 @@ def single(self, trigs): The array of single detector values """ + try: + self.curr_ifo = trigs.ifo + except AttributeError: + self.curr_ifo = trigs.get('ifo', None) + if self.curr_ifo is None: + self.curr_ifo = self.ifos[0] + assert self.curr_ifo in self.ifos + # Should be exactly one ifo provided + assert len(numpy.unique(self.curr_ifo)) == 1 + # single-ifo stat = log of noise rate sngl_stat = self.lognoiserate(trigs) # populate other fields to calculate phase/time/amp consistency singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles["snglstat"] = sngl_stat singles["coa_phase"] = trigs["coa_phase"][:] singles["end_time"] = trigs["end_time"][:] singles["snr"] = trigs["snr"][:] @@ -1397,6 +1457,19 @@ def single(self, trigs): mass2 = trigs['mass2'] self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) + if self.kwargs["dq"]: + if self.low_latency: + # trigs should already have a dq state assigned + singles['dq_state'] = trigs['dq_state'][:] + else: + singles['dq_state'] = self.find_dq_state_by_time( + self.curr_ifo, trigs['end_time'][:] + ) + dq_rate = self.find_dq_noise_rate(trigs, singles['dq_state']) + dq_rate = numpy.maximum(dq_rate, 1) + sngl_stat += numpy.log(dq_rate) + + singles["snglstat"] = sngl_stat return numpy.array(singles, ndmin=1) def sensitive_volume_factor(self, sngls): @@ -1702,6 +1775,11 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.single_increasing = True self.single_dtype = numpy.float32 + # Modifier to get a sensible value of the fit slope below threshold + self.alphabelow = float( + self.kwargs.get("alpha_below_thresh", numpy.inf) + ) + def single(self, trigs): """ Calculate the necessary single detector information From d81a83e743c56bbe920b429349e2ba32e5fe125d Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 13 Nov 2024 07:45:24 -0800 Subject: [PATCH 31/32] Use new ranking statistics in CI --- examples/live/run.sh | 9 ++++++++- examples/search/plotting.ini | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/examples/live/run.sh b/examples/live/run.sh index 434f134d5ad..d90df828b09 100755 --- a/examples/live/run.sh +++ b/examples/live/run.sh @@ -186,7 +186,14 @@ python -m mpi4py `which pycbc_live` \ --output-path output \ --day-hour-output-prefix \ --sngl-ranking newsnr_sgveto_psdvar_threshold \ ---ranking-statistic phasetd_exp_fit_fgbg_norm \ +--ranking-statistic \ + exp_fit \ +--statistic-features \ + phasetd \ + sensitive_volume \ + normalize_fit_rate \ +--statistic-keywords \ + alpha_below_thresh:6 \ --statistic-files \ statHL.hdf \ statHV.hdf \ diff --git a/examples/search/plotting.ini b/examples/search/plotting.ini index 5a7d4f55837..307edb5b237 100644 --- a/examples/search/plotting.ini +++ b/examples/search/plotting.ini @@ -38,6 +38,8 @@ non-coinc-time-only = vetoed-time-only = ranking-statistic = ${sngls|ranking-statistic} statistic-files = ${sngls|statistic-files} +statistic-features = ${sngls|statistic-features} +statistic-keywords = ${sngls|statistic-keywords} [injection_minifollowup] ifar-threshold = 1 @@ -50,6 +52,8 @@ sngl-ranking = newsnr_sgveto_psdvar [page_snglinfo-vetoed] ranking-statistic = ${sngls|ranking-statistic} +statistic-features = ${sngls|statistic-features} +statistic-keywords = ${sngls|statistic-keywords} [single_template_plot] From 4db495ad604528f1802880c5e04a947f92a47972 Mon Sep 17 00:00:00 2001 From: GarethCabournDavies Date: Wed, 13 Nov 2024 08:31:04 -0800 Subject: [PATCH 32/32] Start working on getting modular statistic into Live usage --- bin/live/pycbc_live_single_significance_fits | 3 --- bin/minifollowups/pycbc_sngl_minifollowup | 3 --- pycbc/events/single.py | 16 +++++++++++++--- pycbc/io/hdf.py | 5 +---- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_significance_fits index b1e76c32612..ab8243f6f45 100644 --- a/bin/live/pycbc_live_single_significance_fits +++ b/bin/live/pycbc_live_single_significance_fits @@ -71,8 +71,6 @@ pycbc.init_logging(args.verbose) sngls_io.verify_live_significance_trigger_pruning_options(args, parser) sngls_io.verify_live_significance_duration_bin_options(args, parser) -stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords) - duration_bin_edges = sngls_io.duration_bins_from_cli(args) logging.info( "Duration bin edges: %s", @@ -192,7 +190,6 @@ for counter, filename in enumerate(files): sds = rank_method[ifo].single(triggers_cut) sngls_value = rank_method[ifo].rank_stat_single( (ifo, sds), - **stat_kwargs ) triggers_cut['stat'] = sngls_value diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup index ea65cfb97c7..ea290ecfc76 100644 --- a/bin/minifollowups/pycbc_sngl_minifollowup +++ b/bin/minifollowups/pycbc_sngl_minifollowup @@ -238,13 +238,10 @@ if args.maximum_duration is not None: logging.info('Finding loudest clustered events') rank_method = stat.get_statistic_from_opts(args, [args.instrument]) -extra_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords) - trigs.mask_to_n_loudest_clustered_events( rank_method, n_loudest=num_events, cluster_window=args.cluster_window, - statistic_kwargs=extra_kwargs, ) times = trigs.end_time diff --git a/pycbc/events/single.py b/pycbc/events/single.py index db22fd08712..a1d47de9154 100644 --- a/pycbc/events/single.py +++ b/pycbc/events/single.py @@ -25,6 +25,8 @@ def __init__(self, ifo, fixed_ifar=None, maximum_ifar=None, statistic=None, + stat_features=None, + stat_keywords=None, sngl_ranking=None, stat_files=None, statistic_refresh_rate=None, @@ -54,6 +56,10 @@ def __init__(self, ifo, threshold criteria statistic: str The name of the statistic to rank events. + stat_features: list of str + The names of features to use for the statistic + stat_keywords: list of key:value strings + argument for statistic keywords sngl_ranking: str The single detector ranking to use with the background statistic stat_files: list of strs @@ -80,11 +86,15 @@ class (in seconds), default not do do this self.statistic_refresh_rate = statistic_refresh_rate stat_class = stat.get_statistic(statistic) + stat_extras = stat.parse_statistic_feature_options( + stat_features, + stat_keywords, + ) self.stat_calculator = stat_class( sngl_ranking, stat_files, ifos=[ifo], - **kwargs + **stat_extras ) self.thresholds = { @@ -223,7 +233,6 @@ def from_cli(cls, args, ifo): # (may be empty) stat_files = sum(stat_files, []) - kwargs = stat.parse_statistic_keywords_opt(stat_keywords) return cls( ifo, ranking_threshold=args.single_ranking_threshold[ifo], reduced_chisq_threshold=args.single_reduced_chisq_threshold[ifo], @@ -234,9 +243,10 @@ def from_cli(cls, args, ifo): sngl_ifar_est_dist=args.sngl_ifar_est_dist[ifo], statistic=args.ranking_statistic, sngl_ranking=args.sngl_ranking, + stat_features=args.statistic_features, + stat_keywords=args.statistic_keywords, stat_files=stat_files, statistic_refresh_rate=args.statistic_refresh_rate, - **kwargs ) def check(self, trigs, data_reader): diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index c98e09fcafa..9c7a07130c1 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -724,7 +724,7 @@ def mask_to_n_loudest_clustered_events(self, rank_method, statistic_threshold=None, n_loudest=10, cluster_window=10, - statistic_kwargs=None): + ): """Edits the mask property of the class to point to the N loudest single detector events as ranked by ranking statistic. @@ -733,12 +733,9 @@ def mask_to_n_loudest_clustered_events(self, rank_method, statistic using statistic_threshold """ - if statistic_kwargs is None: - statistic_kwargs = {} sds = rank_method.single(self.trig_dict()) stat = rank_method.rank_stat_single( (self.ifo, sds), - **statistic_kwargs ) if len(stat) == 0: # No triggers at all, so just return here