diff --git a/bin/all_sky_search/pycbc_sngls_findtrigs b/bin/all_sky_search/pycbc_sngls_findtrigs index 066cec88d5f..9b2e3bfb76d 100644 --- a/bin/all_sky_search/pycbc_sngls_findtrigs +++ b/bin/all_sky_search/pycbc_sngls_findtrigs @@ -131,17 +131,6 @@ if args.cluster_window is not None: logging.info('Clustering events over %s s window within each template', args.cluster_window) -extra_kwargs = {} -for inputstr in args.statistic_keywords: - try: - key, value = inputstr.split(':') - extra_kwargs[key] = value - except ValueError: - err_txt = "--statistic-keywords must take input in the " \ - "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \ - "Received {}".format(args.statistic_keywords) - raise ValueError(err_txt) - loudest_keep_vals = args.loudest_keep_values.strip('[]').split(',') threshes = [] @@ -172,6 +161,11 @@ for i, tnum in enumerate(template_ids): "Calculating statistic in template %d out of %d", i, len(template_ids), ) + else: + logging.debug( + "Calculating statistic in template %d out of %d", + i, len(template_ids), + ) tids_uncut = trigs.set_template(tnum) trigger_keep_ids = cuts.apply_trigger_cuts(trigs, trigger_cut_dict, @@ -188,9 +182,8 @@ for i, tnum in enumerate(template_ids): # Stat class instance to calculate the ranking statistic sds = rank_method.single(trigs)[trigger_keep_ids] - stat_t = rank_method.rank_stat_single((ifo, sds), - **extra_kwargs) - trigger_times = sds['end_time'] + stat_t = rank_method.rank_stat_single((ifo, sds)) + trigger_times = trigs['end_time'][:][trigger_keep_ids] if args.cluster_window is not None: cid = coinc.cluster_over_time(stat_t, trigger_times, args.cluster_window) @@ -206,7 +199,7 @@ for i, tnum in enumerate(template_ids): # Perform decimation dec_facs = np.ones_like(template_ids_all) stat_all = np.array(stat_all) -template_ids_all = np.array(template_ids_all) +template_ids_all = np.array(template_ids_all, dtype=int) trigger_ids_all = np.array(trigger_ids_all) trigger_times_all = np.array(trigger_times_all) diff --git a/bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_significance_fits index b1e76c32612..ab8243f6f45 100644 --- a/bin/live/pycbc_live_single_significance_fits +++ b/bin/live/pycbc_live_single_significance_fits @@ -71,8 +71,6 @@ pycbc.init_logging(args.verbose) sngls_io.verify_live_significance_trigger_pruning_options(args, parser) sngls_io.verify_live_significance_duration_bin_options(args, parser) -stat_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords) - duration_bin_edges = sngls_io.duration_bins_from_cli(args) logging.info( "Duration bin edges: %s", @@ -192,7 +190,6 @@ for counter, filename in enumerate(files): sds = rank_method[ifo].single(triggers_cut) sngls_value = rank_method[ifo].rank_stat_single( (ifo, sds), - **stat_kwargs ) triggers_cut['stat'] = sngls_value diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup index ea65cfb97c7..ea290ecfc76 100644 --- a/bin/minifollowups/pycbc_sngl_minifollowup +++ b/bin/minifollowups/pycbc_sngl_minifollowup @@ -238,13 +238,10 @@ if args.maximum_duration is not None: logging.info('Finding loudest clustered events') rank_method = stat.get_statistic_from_opts(args, [args.instrument]) -extra_kwargs = stat.parse_statistic_keywords_opt(args.statistic_keywords) - trigs.mask_to_n_loudest_clustered_events( rank_method, n_loudest=num_events, cluster_window=args.cluster_window, - statistic_kwargs=extra_kwargs, ) times = trigs.end_time diff --git a/docs/workflow/pycbc_make_offline_search_workflow.rst b/docs/workflow/pycbc_make_offline_search_workflow.rst index 7addf015000..53366262dcd 100644 --- a/docs/workflow/pycbc_make_offline_search_workflow.rst +++ b/docs/workflow/pycbc_make_offline_search_workflow.rst @@ -306,9 +306,73 @@ Specify the name of the channel you want to run the inspiral analysis over for H [coinc] coinc-threshold = 0.000 + ranking-statistic = exp_fit + sngl-ranking = newsnr_sgveto_psdvar + statistic-features = sensitive_volume normalize_fit_rate phasetd + statistic-keywords = alpha_below_thresh:6 sngl_ranking_min_expected_psdvar:0.7 Here we are doing exact match coincidence. So we take the light travel time between detectors and look for triggers which are coincident within this time window. The threshold defines if you want to extend the window. +How triggers are ranked is defined by the ranking-statistic, sngl-ranking, statistic-features and statistic-keywords options. + - ``sngl-ranking`` = The ranking used for single-detector triggers, this is generally a re-weighting of the SNR. + - ``ranking-statistic`` = How the triggers from a set of detectors are ranked in order to calculate significance. This will take the form of an snr-like combination (``quadsum``, ``phasetd``, ``exp_fit_csnr``), or a log-rates-like statistic, ``exp_fit``. See Ranking Statistic table below for the options. + - ``statistic-features`` = If using ranking-statistic ``exp_fit``, then these are the features to add or subtract from the ranking statistic. These are described in the Statistic Features table below. + - ``statistic-keywords`` = Some statistics require keywords to modify the behavior of the statistic in certain situations. Keywords affecting the sngl-ranking calculation are also given here, starting with ``sngl_ranking_``. These are described in the Statistic Keywords table below. + - ``statistic-files`` = Files to be used in the statistic calculation, of particular note here are the files needed for DQ and KDE reranking. + +.. list-table:: Ranking Statistic + :widths: 25 75 + :header-rows: 1 + + * - Statistic name + - Description + * - ``quadsum`` + - The quadrature sum of triggers in each detector in the triggered network. ``sngl_ranking_only`` can also be given and is exactly equivalent. + * - ``phasetd`` + - The same as ``quadsum``, but reweighted by the coincident parameters. + * - ``exp_fit_csnr`` + - This is a reworking of the exponential fit designed to resemble network SNR. Uses a monotonic function of the negative log noise rate density which approximates combined sngl-ranking for coincs with similar newsnr in each ifo + * - ``exp_fit`` + - The ratio of signal-to-noise rates in the triggered network of detectors. The trigger density at a given sngl-ranking is approximated for each template, and this is combined for the triggered network. + +.. list-table:: Statistic Features + :widths: 25 75 + :header-rows: 1 + + * - Feature name + - Description + * - ``phasetd`` + - Use a histogram of expected phase and time differences, and amplitude ratio, for signals to determine a factor to be added for the signal rate. + * - ``sensitive_volume`` + - Signal rate is expected to be proportional to the cube of the sensitive distance. This feature adds a factor of :math:`log(\sigma^3)` minus a benchmark value, to make this zero in many cases. + * - ``normalize_fit_rate`` + - Normalise the exponential fits to use a rate rather than an absolute count of triggers. This means that statistics should be comparable over differently-sized analyses. + * - ``dq`` + - Apply a reweighting factor according to the rates of triggers during data-quality flags vs the rate outside this. Must supply a reranking file using ``statistic-files`` for each detector, with stat attribute '{detector}-dq_stat_info' + * - ``kde`` + - Use a file to re-rank according to the signal and density rates calculated using a KDE approach. Must supply two reranking files using ``statistic-files`` with stat attributes 'signal-kde_file' and 'template-kde_file' respectively. + * - ``chirp_mass`` + - Apply a factor of :math:`log((M_c / 20) ^{11 / 3})` to the statistic. This makes the signal rate uniform over chirp mass, as this factor cancels out the power of -11 / 3 caused by differences in the density of template placement. + +.. list-table:: Statistic Keywords + :widths: 25 75 + :header-rows: 1 + + * - Keyword + - Description + * - ``benchmark_lograte`` + - This is a numerical factor to be subtracted from the log rate ratio in order to alter the dynamic range. Default -14.6. + * - ``minimum_statistic_cutoff`` + - Cutoff for the statistic in order to avoid underflowing and very small statistic values. Default -30. + * - ``alpha_below_thresh`` + - The fit coefficient (alpha) below the fit threshold (defined in the fit_by_template jobs) will be replaced by a standard value. This is as below this threshold, Gaussian noise can dominate over the glitch response that dominates above the threshold, and rates will be underestimated, boosting quiet things in noisy templates. For Gaussian noise, this will be approximately 6 (the default). To use whatever the fit value is, supply alpha_below_thresh:None. + * - ``reference_ifos`` + - If using the ``sensitive_volume`` feature, these are the detectors used to determine the benchmark value by which the sensitive volume is compared. We use the median sensitive volume in the network of detectors supplied. Default H1,L1. + * - ``max_chirp_mass`` + - If using the ``chirp_mass`` feature, this chirp mass defines a maximum weighting which can be applied to the statistic. + * - ``sngl_ranking_*`` + - This is used to provide the keyword arguments to functions in `the events.ranking module `_. For example, to use a different psdvar threshold in the newsnr_sgveto_psdvar_threshold function, we would use ``sngl_ranking_psd_var_val_threshold:10``. + :: [coinc-full] diff --git a/examples/live/run.sh b/examples/live/run.sh index 434f134d5ad..d90df828b09 100755 --- a/examples/live/run.sh +++ b/examples/live/run.sh @@ -186,7 +186,14 @@ python -m mpi4py `which pycbc_live` \ --output-path output \ --day-hour-output-prefix \ --sngl-ranking newsnr_sgveto_psdvar_threshold \ ---ranking-statistic phasetd_exp_fit_fgbg_norm \ +--ranking-statistic \ + exp_fit \ +--statistic-features \ + phasetd \ + sensitive_volume \ + normalize_fit_rate \ +--statistic-keywords \ + alpha_below_thresh:6 \ --statistic-files \ statHL.hdf \ statHV.hdf \ diff --git a/examples/search/analysis.ini b/examples/search/analysis.ini index 4361ef1eb2c..eb1e0d0202d 100644 --- a/examples/search/analysis.ini +++ b/examples/search/analysis.ini @@ -171,7 +171,9 @@ smoothing-width = 0.4 analyze = [coinc&sngls] -ranking-statistic = phasetd_exp_fit_fgbg_norm +ranking-statistic = exp_fit +statistic-features = phasetd sensitive_volume normalize_fit_rate +statistic-keywords = alpha_below_thresh:6 sngl-ranking = newsnr_sgveto_psdvar randomize-template-order = statistic-files = ${resolve:./statHL.hdf} ${resolve:./statLV.hdf} ${resolve:./statHV.hdf} ${resolve:./statHLV.hdf} diff --git a/examples/search/plotting.ini b/examples/search/plotting.ini index 5a7d4f55837..307edb5b237 100644 --- a/examples/search/plotting.ini +++ b/examples/search/plotting.ini @@ -38,6 +38,8 @@ non-coinc-time-only = vetoed-time-only = ranking-statistic = ${sngls|ranking-statistic} statistic-files = ${sngls|statistic-files} +statistic-features = ${sngls|statistic-features} +statistic-keywords = ${sngls|statistic-keywords} [injection_minifollowup] ifar-threshold = 1 @@ -50,6 +52,8 @@ sngl-ranking = newsnr_sgveto_psdvar [page_snglinfo-vetoed] ranking-statistic = ${sngls|ranking-statistic} +statistic-features = ${sngls|statistic-features} +statistic-keywords = ${sngls|statistic-keywords} [single_template_plot] diff --git a/pycbc/events/coinc.py b/pycbc/events/coinc.py index ee729a97bcc..4d686941e11 100644 --- a/pycbc/events/coinc.py +++ b/pycbc/events/coinc.py @@ -973,12 +973,16 @@ def from_cli(cls, args, num_templates, analysis_chunk, ifos): # Allow None inputs stat_files = args.statistic_files or [] + stat_features = args.statistic_features or [] stat_keywords = args.statistic_keywords or [] # flatten the list of lists of filenames to a single list (may be empty) stat_files = sum(stat_files, []) - kwargs = pycbcstat.parse_statistic_keywords_opt(stat_keywords) + kwargs = pycbcstat.parse_statistic_feature_options( + stat_features, + stat_keywords, + ) return cls(num_templates, analysis_chunk, args.ranking_statistic, diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py index 0368bb05371..d733f444296 100644 --- a/pycbc/events/ranking.py +++ b/pycbc/events/ranking.py @@ -7,7 +7,8 @@ logger = logging.getLogger('pycbc.events.ranking') -def effsnr(snr, reduced_x2, fac=250.): +def effsnr(snr, reduced_x2, fac=250., + **kwargs): # pylint:disable=unused-argument """Calculate the effective SNR statistic. See (S5y1 paper) for definition. """ snr = numpy.array(snr, ndmin=1, dtype=numpy.float64) @@ -21,7 +22,8 @@ def effsnr(snr, reduced_x2, fac=250.): return esnr[0] -def newsnr(snr, reduced_x2, q=6., n=2.): +def newsnr(snr, reduced_x2, q=6., n=2., + **kwargs): # pylint:disable=unused-argument """Calculate the re-weighted SNR statistic ('newSNR') from given SNR and reduced chi-squared values. See http://arxiv.org/abs/1208.3491 for definition. Previous implementation in glue/ligolw/lsctables.py @@ -40,9 +42,15 @@ def newsnr(snr, reduced_x2, q=6., n=2.): return nsnr[0] -def newsnr_sgveto(snr, brchisq, sgchisq): +def newsnr_sgveto(snr, brchisq, sgchisq, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq""" - nsnr = numpy.array(newsnr(snr, brchisq), ndmin=1) + nsnr = numpy.array( + newsnr( + snr, + brchisq, + **kwargs), + ndmin=1) sgchisq = numpy.array(sgchisq, ndmin=1) t = numpy.array(sgchisq > 4, ndmin=1) if len(t): @@ -56,7 +64,8 @@ def newsnr_sgveto(snr, brchisq, sgchisq): def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=0.65): + min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from SNR, reduced Allen chisq, sine-Gaussian chisq and PSD variation statistic""" # If PSD var is lower than the 'minimum usually expected value' stop this @@ -66,7 +75,12 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, psd_var_val[psd_var_val < min_expected_psdvar] = 1. scaled_snr = snr * (psd_var_val ** -0.5) scaled_brchisq = brchisq * (psd_var_val ** -1.) - nsnr = newsnr_sgveto(scaled_snr, scaled_brchisq, sgchisq) + nsnr = newsnr_sgveto( + scaled_snr, + scaled_brchisq, + sgchisq, + **kwargs + ) # If snr input is float, return a float. Otherwise return numpy array. if hasattr(snr, '__len__'): @@ -78,14 +92,21 @@ def newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, min_expected_psdvar=0.65, brchisq_threshold=10.0, - psd_var_val_threshold=10.0): + psd_var_val_threshold=10.0, + **kwargs): # pylint:disable=unused-argument """ newsnr_sgveto_psdvar with thresholds applied. This is the newsnr_sgveto_psdvar statistic with additional options to threshold on chi-squared or PSD variation. """ - nsnr = newsnr_sgveto_psdvar(snr, brchisq, sgchisq, psd_var_val, - min_expected_psdvar=min_expected_psdvar) + nsnr = newsnr_sgveto_psdvar( + snr, + brchisq, + sgchisq, + psd_var_val, + min_expected_psdvar=min_expected_psdvar, + **kwargs + ) nsnr = numpy.array(nsnr, ndmin=1) nsnr[brchisq > brchisq_threshold] = 1. nsnr[psd_var_val > psd_var_val_threshold] = 1. @@ -98,10 +119,17 @@ def newsnr_sgveto_psdvar_threshold(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, - scaling=0.33, min_expected_psdvar=0.65): + scaling=0.33, min_expected_psdvar=0.65, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR, Sine-Gaussian Chisq and scaled PSD variation statistic. """ - nsnr = numpy.array(newsnr_sgveto(snr, brchisq, sgchisq), ndmin=1) + nsnr = numpy.array( + newsnr_sgveto( + snr, + brchisq, + sgchisq, + **kwargs), + ndmin=1) psd_var_val = numpy.array(psd_var_val, ndmin=1, copy=True) psd_var_val[psd_var_val < min_expected_psdvar] = 1. @@ -116,11 +144,18 @@ def newsnr_sgveto_psdvar_scaled(snr, brchisq, sgchisq, psd_var_val, def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, - threshold=2.0): + threshold=2.0, + **kwargs): # pylint:disable=unused-argument """ Combined SNR derived from NewSNR and Sine-Gaussian Chisq, and scaled psd variation. """ - nsnr = newsnr_sgveto_psdvar_scaled(snr, bchisq, sgchisq, psd_var_val) + nsnr = newsnr_sgveto_psdvar_scaled( + snr, + bchisq, + sgchisq, + psd_var_val, + **kwargs + ) nsnr = numpy.array(nsnr, ndmin=1) nsnr[bchisq > threshold] = 1. @@ -131,7 +166,7 @@ def newsnr_sgveto_psdvar_scaled_threshold(snr, bchisq, sgchisq, psd_var_val, return nsnr[0] -def get_snr(trigs): +def get_snr(trigs, **kwargs): # pylint:disable=unused-argument """ Return SNR from a trigs/dictionary object @@ -149,7 +184,7 @@ def get_snr(trigs): return numpy.array(trigs['snr'][:], ndmin=1, dtype=numpy.float32) -def get_newsnr(trigs): +def get_newsnr(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr ('reweighted SNR') for a trigs/dictionary object @@ -165,11 +200,15 @@ def get_newsnr(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr = newsnr(trigs['snr'][:], trigs['chisq'][:] / dof) + nsnr = newsnr( + trigs['snr'][:], + trigs['chisq'][:] / dof, + **kwargs + ) return numpy.array(nsnr, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto(trigs): +def get_newsnr_sgveto(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weigthed by the sine-gaussian veto @@ -185,13 +224,16 @@ def get_newsnr_sgveto(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg = newsnr_sgveto(trigs['snr'][:], - trigs['chisq'][:] / dof, - trigs['sg_chisq'][:]) + nsnr_sg = newsnr_sgveto( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + **kwargs + ) return numpy.array(nsnr_sg, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar(trigs): +def get_newsnr_sgveto_psdvar(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate snr re-weighted by Allen chisq, sine-gaussian veto and psd variation statistic @@ -208,14 +250,17 @@ def get_newsnr_sgveto_psdvar(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psd = \ - newsnr_sgveto_psdvar(trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + nsnr_sg_psd = newsnr_sgveto_psdvar( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psd, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_threshold(trigs): +def get_newsnr_sgveto_psdvar_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -235,12 +280,13 @@ def get_newsnr_sgveto_psdvar_threshold(trigs): nsnr_sg_psdt = newsnr_sgveto_psdvar_threshold( trigs['snr'][:], trigs['chisq'][:] / dof, trigs['sg_chisq'][:], - trigs['psd_var_val'][:] + trigs['psd_var_val'][:], + **kwargs ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled(trigs): +def get_newsnr_sgveto_psdvar_scaled(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic @@ -257,15 +303,17 @@ def get_newsnr_sgveto_psdvar_scaled(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psdscale = \ - newsnr_sgveto_psdvar_scaled( - trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + nsnr_sg_psdscale = newsnr_sgveto_psdvar_scaled( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psdscale, ndmin=1, dtype=numpy.float32) -def get_newsnr_sgveto_psdvar_scaled_threshold(trigs): +def get_newsnr_sgveto_psdvar_scaled_threshold(trigs, **kwargs): # pylint:disable=unused-argument """ Calculate newsnr re-weighted by the sine-gaussian veto and scaled psd variation statistic. A further threshold is applied to the @@ -283,11 +331,13 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs): Array of newsnr values """ dof = 2. * trigs['chisq_dof'][:] - 2. - nsnr_sg_psdt = \ - newsnr_sgveto_psdvar_scaled_threshold( - trigs['snr'][:], trigs['chisq'][:] / dof, - trigs['sg_chisq'][:], - trigs['psd_var_val'][:]) + nsnr_sg_psdt = newsnr_sgveto_psdvar_scaled_threshold( + trigs['snr'][:], + trigs['chisq'][:] / dof, + trigs['sg_chisq'][:], + trigs['psd_var_val'][:], + **kwargs + ) return numpy.array(nsnr_sg_psdt, ndmin=1, dtype=numpy.float32) @@ -299,23 +349,24 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs): 'newsnr_sgveto_psdvar': get_newsnr_sgveto_psdvar, 'newsnr_sgveto_psdvar_threshold': get_newsnr_sgveto_psdvar_threshold, 'newsnr_sgveto_psdvar_scaled': get_newsnr_sgveto_psdvar_scaled, - 'newsnr_sgveto_psdvar_scaled_threshold': get_newsnr_sgveto_psdvar_scaled_threshold, + 'newsnr_sgveto_psdvar_scaled_threshold': + get_newsnr_sgveto_psdvar_scaled_threshold, } # Lists of datasets required in the trigs object for each function -required_datasets = {} -required_datasets['snr'] = ['snr'] -required_datasets['newsnr'] = required_datasets['snr'] + ['chisq', 'chisq_dof'] -required_datasets['new_snr'] = required_datasets['newsnr'] -required_datasets['newsnr_sgveto'] = required_datasets['newsnr'] + ['sg_chisq'] -required_datasets['newsnr_sgveto_psdvar'] = \ - required_datasets['newsnr_sgveto'] + ['psd_var_val'] -required_datasets['newsnr_sgveto_psdvar_threshold'] = \ - required_datasets['newsnr_sgveto_psdvar'] -required_datasets['newsnr_sgveto_psdvar_scaled'] = \ - required_datasets['newsnr_sgveto_psdvar'] -required_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ - required_datasets['newsnr_sgveto_psdvar'] +reqd_datasets = {} +reqd_datasets['snr'] = ['snr'] +reqd_datasets['newsnr'] = reqd_datasets['snr'] + ['chisq', 'chisq_dof'] +reqd_datasets['new_snr'] = reqd_datasets['newsnr'] +reqd_datasets['newsnr_sgveto'] = reqd_datasets['newsnr'] + ['sg_chisq'] +reqd_datasets['newsnr_sgveto_psdvar'] = \ + reqd_datasets['newsnr_sgveto'] + ['psd_var_val'] +reqd_datasets['newsnr_sgveto_psdvar_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] +reqd_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \ + reqd_datasets['newsnr_sgveto_psdvar'] def get_sngls_ranking_from_trigs(trigs, statname, **kwargs): diff --git a/pycbc/events/single.py b/pycbc/events/single.py index db22fd08712..a1d47de9154 100644 --- a/pycbc/events/single.py +++ b/pycbc/events/single.py @@ -25,6 +25,8 @@ def __init__(self, ifo, fixed_ifar=None, maximum_ifar=None, statistic=None, + stat_features=None, + stat_keywords=None, sngl_ranking=None, stat_files=None, statistic_refresh_rate=None, @@ -54,6 +56,10 @@ def __init__(self, ifo, threshold criteria statistic: str The name of the statistic to rank events. + stat_features: list of str + The names of features to use for the statistic + stat_keywords: list of key:value strings + argument for statistic keywords sngl_ranking: str The single detector ranking to use with the background statistic stat_files: list of strs @@ -80,11 +86,15 @@ class (in seconds), default not do do this self.statistic_refresh_rate = statistic_refresh_rate stat_class = stat.get_statistic(statistic) + stat_extras = stat.parse_statistic_feature_options( + stat_features, + stat_keywords, + ) self.stat_calculator = stat_class( sngl_ranking, stat_files, ifos=[ifo], - **kwargs + **stat_extras ) self.thresholds = { @@ -223,7 +233,6 @@ def from_cli(cls, args, ifo): # (may be empty) stat_files = sum(stat_files, []) - kwargs = stat.parse_statistic_keywords_opt(stat_keywords) return cls( ifo, ranking_threshold=args.single_ranking_threshold[ifo], reduced_chisq_threshold=args.single_reduced_chisq_threshold[ifo], @@ -234,9 +243,10 @@ def from_cli(cls, args, ifo): sngl_ifar_est_dist=args.sngl_ifar_est_dist[ifo], statistic=args.ranking_statistic, sngl_ranking=args.sngl_ranking, + stat_features=args.statistic_features, + stat_keywords=args.statistic_keywords, stat_files=stat_files, statistic_refresh_rate=args.statistic_refresh_rate, - **kwargs ) def check(self, trigs, data_reader): diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py index e04ba732cad..52477914174 100644 --- a/pycbc/events/stat.py +++ b/pycbc/events/stat.py @@ -36,11 +36,20 @@ from .eventmgr_cython import logsignalrateinternals_computepsignalbins from .eventmgr_cython import logsignalrateinternals_compute2detrate -logger = logging.getLogger('pycbc.events.stat') +logger = logging.getLogger("pycbc.events.stat") + +_allowed_statistic_features = [ + "phasetd", + "kde", + "dq", + "chirp_mass", + "sensitive_volume", + "normalize_fit_rate", +] class Stat(object): - """Base class which should be extended to provide a coincident statistic""" + """Base class which should be extended to provide a statistic""" def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): """ @@ -62,13 +71,15 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.files = {} files = files or [] for filename in files: - with h5py.File(filename, 'r') as f: - stat = f.attrs['stat'] - if hasattr(stat, 'decode'): + with h5py.File(filename, "r") as f: + stat = f.attrs["stat"] + if hasattr(stat, "decode"): stat = stat.decode() if stat in self.files: - raise RuntimeError("We already have one file with stat attr =" - " %s. Can't provide more than one!" % stat) + raise RuntimeError( + "We already have one file with stat attr = " + "%s. Can't provide more than one!" % stat + ) logger.info("Found file %s for stat %s", filename, stat) self.files[stat] = filename # Keep track of when stat files hashes so it can be @@ -87,9 +98,15 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): self.sngl_ranking = sngl_ranking self.sngl_ranking_kwargs = {} + self.kwargs = {} for key, value in kwargs.items(): - if key.startswith('sngl_ranking_'): - self.sngl_ranking_kwargs[key[13:]] = value + if key.startswith("sngl_ranking_"): + # Note that all the sngl_ranking keywords are floats, + # so this conversion is safe - if that changes in the future, + # we may need something more sophisticated + self.sngl_ranking_kwargs[key[13:]] = float(value) + else: + self.kwargs[key] = value def get_file_hashes(self): """ @@ -167,12 +184,10 @@ def get_sngl_ranking(self, trigs): The array of single detector values """ return ranking.get_sngls_ranking_from_trigs( - trigs, - self.sngl_ranking, - **self.sngl_ranking_kwargs + trigs, self.sngl_ranking, **self.sngl_ranking_kwargs ) - def single(self, trigs): # pylint:disable=unused-argument + def single(self, trigs): # pylint:disable=unused-argument """ Calculate the necessary single detector information @@ -190,8 +205,7 @@ def single(self, trigs): # pylint:disable=unused-argument err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): # pylint:disable=unused-argument """ Calculate the statistic for a single detector candidate @@ -210,8 +224,9 @@ def rank_stat_single(self, single_info, err_msg += "sub-classes. You shouldn't be seeing this error!" raise NotImplementedError(err_msg) - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. """ @@ -240,8 +255,9 @@ def _check_coinc_lim_subclass(self, allowed_names): err_msg += "list of allowed_names above." raise NotImplementedError(err_msg) - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -275,8 +291,7 @@ def single(self, trigs): """ return self.get_sngl_ranking(trigs) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -296,8 +311,9 @@ def rank_stat_single(self, single_info, """ return single_info[1] - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, sngls_list, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic. @@ -316,14 +332,15 @@ def rank_stat_coinc(self, sngls_list, slide, step, to_shift, numpy.ndarray Array of coincident ranking statistic values """ - cstat = sum(sngl[1] ** 2. for sngl in sngls_list) ** 0.5 + cstat = sum(sngl[1] ** 2.0 for sngl in sngls_list) ** 0.5 # For single-detector "cuts" the single ranking is set to -1 for sngls in sngls_list: cstat[sngls == -1] = 0 return cstat - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest @@ -347,12 +364,12 @@ def coinc_lim_for_thresh(self, s, thresh, limifo, exceed thresh. """ # Safety against subclassing and not rethinking this - allowed_names = ['QuadratureSumStatistic'] + allowed_names = ["QuadratureSumStatistic"] self._check_coinc_lim_subclass(allowed_names) - s0 = thresh ** 2. - sum(sngl[1] ** 2. for sngl in s) + s0 = thresh**2.0 - sum(sngl[1] ** 2.0 for sngl in s) s0[s0 < 0] = 0 - return s0 ** 0.5 + return s0**0.5 class PhaseTDStatistic(QuadratureSumStatistic): @@ -363,8 +380,14 @@ class PhaseTDStatistic(QuadratureSumStatistic): amplitude ratios between triggers in different ifos. """ - def __init__(self, sngl_ranking, files=None, ifos=None, - pregenerate_hist=True, **kwargs): + def __init__( + self, + sngl_ranking, + files=None, + ifos=None, + pregenerate_hist=True, + **kwargs, + ): """ Parameters ---------- @@ -384,21 +407,22 @@ def __init__(self, sngl_ranking, files=None, ifos=None, If False, do not pregenerate histogram on class instantiation. Default is True. """ - QuadratureSumStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) + QuadratureSumStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) self.single_dtype = [ - ('snglstat', numpy.float32), - ('coa_phase', numpy.float32), - ('end_time', numpy.float64), - ('sigmasq', numpy.float32), - ('snr', numpy.float32) + ("snglstat", numpy.float32), + ("coa_phase", numpy.float32), + ("end_time", numpy.float64), + ("sigmasq", numpy.float32), + ("snr", numpy.float32), ] # Assign attribute so that it can be replaced with other functions self.has_hist = False self.hist_ifos = None - self.ref_snr = 5. + self.ref_snr = 5.0 self.relsense = {} self.swidth = self.pwidth = self.twidth = None self.srbmin = self.srbmax = None @@ -406,7 +430,7 @@ def __init__(self, sngl_ranking, files=None, ifos=None, self.pdtype = [] self.weights = {} self.param_bin = {} - self.two_det_flag = (len(ifos) == 2) + self.two_det_flag = len(ifos) == 2 self.two_det_weights = {} # Some memory self.pdif = numpy.zeros(128, dtype=numpy.float64) @@ -442,8 +466,8 @@ def get_hist(self, ifos=None): for name in self.files: # Pick out the statistic files that provide phase / time/ amp # relationships and match to the ifos in use - if 'phasetd_newsnr' in name: - ifokey = name.split('_')[2] + if "phasetd_newsnr" in name: + ifokey = name.split("_")[2] num = len(ifokey) / 2 if num != len(ifos): continue @@ -473,32 +497,32 @@ def get_hist(self, ifos=None): weights = {} param = {} - with h5py.File(self.files[selected], 'r') as histfile: - self.hist_ifos = histfile.attrs['ifos'] + with h5py.File(self.files[selected], "r") as histfile: + self.hist_ifos = histfile.attrs["ifos"] # Patch for pre-hdf5=3.0 histogram files try: logger.info("Decoding hist ifos ..") - self.hist_ifos = [i.decode('UTF-8') for i in self.hist_ifos] + self.hist_ifos = [i.decode("UTF-8") for i in self.hist_ifos] except (UnicodeDecodeError, AttributeError): pass # Histogram bin attributes - self.twidth = histfile.attrs['twidth'] - self.pwidth = histfile.attrs['pwidth'] - self.swidth = histfile.attrs['swidth'] - self.srbmin = histfile.attrs['srbmin'] - self.srbmax = histfile.attrs['srbmax'] - relfac = histfile.attrs['sensitivity_ratios'] + self.twidth = histfile.attrs["twidth"] + self.pwidth = histfile.attrs["pwidth"] + self.swidth = histfile.attrs["swidth"] + self.srbmin = histfile.attrs["srbmin"] + self.srbmax = histfile.attrs["srbmax"] + relfac = histfile.attrs["sensitivity_ratios"] for ifo in self.hist_ifos: - weights[ifo] = histfile[ifo]['weights'][:] - param[ifo] = histfile[ifo]['param_bin'][:] + weights[ifo] = histfile[ifo]["weights"][:] + param[ifo] = histfile[ifo]["param_bin"][:] n_ifos = len(self.hist_ifos) bin_volume = (self.twidth * self.pwidth * self.swidth) ** (n_ifos - 1) - self.hist_max = - 1. * numpy.inf + self.hist_max = -1.0 * numpy.inf # Read histogram for each ifo, to use if that ifo has smallest SNR in # the coinc @@ -512,11 +536,14 @@ def get_hist(self, ifos=None): if param[ifo].dtype == numpy.int8: # Older style, incorrectly sorted histogram file ncol = param[ifo].shape[1] - self.pdtype = [('c%s' % i, param[ifo].dtype) for i in range(ncol)] - self.param_bin[ifo] = numpy.zeros(len(self.weights[ifo]), - dtype=self.pdtype) + self.pdtype = [ + ("c%s" % i, param[ifo].dtype) for i in range(ncol) + ] + self.param_bin[ifo] = numpy.zeros( + len(self.weights[ifo]), dtype=self.pdtype + ) for i in range(ncol): - self.param_bin[ifo]['c%s' % i] = param[ifo][:, i] + self.param_bin[ifo]["c%s" % i] = param[ifo][:, i] lsort = self.param_bin[ifo].argsort() self.param_bin[ifo] = self.param_bin[ifo][lsort] @@ -549,32 +576,42 @@ def get_hist(self, ifos=None): # "weights" table a O(N) rather than O(NlogN) operation. It # sacrifices RAM to do this, so is a good tradeoff for 2 # detectors, but not for 3! - if not hasattr(self, 'c0_size'): + if not hasattr(self, "c0_size"): self.c0_size = {} self.c1_size = {} self.c2_size = {} self.c0_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c0']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c0"]).max() + 1) ) self.c1_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c1']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c1"]).max() + 1) ) self.c2_size[ifo] = numpy.int32( - 2 * (abs(self.param_bin[ifo]['c2']).max() + 1) + 2 * (abs(self.param_bin[ifo]["c2"]).max() + 1) ) - array_size = [self.c0_size[ifo], self.c1_size[ifo], - self.c2_size[ifo]] + array_size = [ + self.c0_size[ifo], + self.c1_size[ifo], + self.c2_size[ifo], + ] dtypec = self.weights[ifo].dtype - self.two_det_weights[ifo] = \ + self.two_det_weights[ifo] = ( numpy.zeros(array_size, dtype=dtypec) + self.max_penalty - id0 = self.param_bin[ifo]['c0'].astype(numpy.int32) \ + ) + id0 = ( + self.param_bin[ifo]["c0"].astype(numpy.int32) + self.c0_size[ifo] // 2 - id1 = self.param_bin[ifo]['c1'].astype(numpy.int32) \ + ) + id1 = ( + self.param_bin[ifo]["c1"].astype(numpy.int32) + self.c1_size[ifo] // 2 - id2 = self.param_bin[ifo]['c2'].astype(numpy.int32) \ + ) + id2 = ( + self.param_bin[ifo]["c2"].astype(numpy.int32) + self.c2_size[ifo] // 2 + ) self.two_det_weights[ifo][id0, id1, id2] = self.weights[ifo] for ifo, sense in zip(self.hist_ifos, relfac): @@ -608,7 +645,8 @@ def update_file(self, key): def logsignalrate(self, stats, shift, to_shift): """ - Calculate the normalized log rate density of signals via lookup + Calculate the normalized log rate density of coincident signals + via lookup Parameters ---------- @@ -633,12 +671,14 @@ def logsignalrate(self, stats, shift, to_shift): # Figure out which ifo of the contributing ifos has the smallest SNR, # to use as reference for choosing the signal histogram. - snrs = numpy.array([numpy.array(stats[ifo]['snr'], ndmin=1) - for ifo in self.ifos]) + snrs = numpy.array( + [numpy.array(stats[ifo]["snr"], ndmin=1) for ifo in self.ifos] + ) smin = snrs.argmin(axis=0) # Store a list of the triggers using each ifo as reference - rtypes = {ifo: numpy.where(smin == j)[0] - for j, ifo in enumerate(self.ifos)} + rtypes = { + ifo: numpy.where(smin == j)[0] for j, ifo in enumerate(self.ifos) + } # Get reference ifo information rate = numpy.zeros(len(shift), dtype=numpy.float32) @@ -701,13 +741,13 @@ def logsignalrate(self, stats, shift, to_shift): self.swidth, to_shift[ref_ifo], to_shift[ifo], - length + length, ) binned += [ self.tbin[:length], self.pbin[:length], - self.sbin[:length] + self.sbin[:length], ] # Read signal weight from precalculated histogram @@ -726,7 +766,7 @@ def logsignalrate(self, stats, shift, to_shift): self.two_det_weights[ref_ifo], self.max_penalty, self.ref_snr, - len(rtype) + len(rtype), ) else: # Low[er]-RAM, high[er]-CPU option for >two det @@ -734,7 +774,7 @@ def logsignalrate(self, stats, shift, to_shift): # Convert binned to same dtype as stored in hist nbinned = numpy.zeros(len(binned[1]), dtype=self.pdtype) for i, b in enumerate(binned): - nbinned[f'c{i}'] = b + nbinned[f"c{i}"] = b loc = numpy.searchsorted(self.param_bin[ref_ifo], nbinned) loc[loc == len(self.weights[ref_ifo])] = 0 @@ -747,7 +787,7 @@ def logsignalrate(self, stats, shift, to_shift): )[0] rate[rtype[missed]] = self.max_penalty # Scale by signal population SNR - rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4. + rate[rtype] *= (sref[rtype] / self.ref_snr) ** -4.0 return numpy.log(rate) @@ -771,15 +811,14 @@ def single(self, trigs): """ sngl_stat = self.get_sngl_ranking(trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] + singles["snglstat"] = sngl_stat + singles["coa_phase"] = trigs["coa_phase"][:] + singles["end_time"] = trigs["end_time"][:] + singles["sigmasq"] = trigs["sigmasq"][:] + singles["snr"] = trigs["snr"][:] return numpy.array(singles, ndmin=1) - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_single(self, single_info): """ Calculate the statistic for a single detector candidate @@ -797,51 +836,56 @@ def rank_stat_single(self, single_info, numpy.ndarray The array of single detector statistics """ - return single_info[1]['snglstat'] + return single_info[1]["snglstat"] - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, sngls_list, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ Calculate the coincident detection statistic, defined in Eq 2 of [Nitz et al, 2017](https://doi.org/10.3847/1538-4357/aa8f50). """ - rstat = sum(s[1]['snglstat'] ** 2 for s in sngls_list) - cstat = rstat + 2. * self.logsignalrate(dict(sngls_list), - slide * step, - to_shift) + rstat = sum(s[1]["snglstat"] ** 2 for s in sngls_list) + cstat = rstat + 2.0 * self.logsignalrate( + dict(sngls_list), slide * step, to_shift + ) cstat[cstat < 0] = 0 - return cstat ** 0.5 + return cstat**0.5 - def coinc_lim_for_thresh(self, sngls_list, thresh, limifo, - **kwargs): # pylint:disable=unused-argument + def coinc_lim_for_thresh( + self, sngls_list, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ Optimization function to identify coincs too quiet to be of interest. Calculate the required single detector statistic to exceed the threshold for each of the input triggers. """ # Safety against subclassing and not rethinking this - allowed_names = ['PhaseTDStatistic'] + allowed_names = ["PhaseTDStatistic"] self._check_coinc_lim_subclass(allowed_names) if not self.has_hist: self.get_hist() - fixed_statsq = sum( - [b['snglstat'] ** 2 for a, b in sngls_list if a != limifo] + fixed_stat_sq = sum( + [b["snglstat"] ** 2 for a, b in sngls_list if a != limifo] ) - s1 = thresh ** 2. - fixed_statsq + s1 = thresh ** 2. - fixed_stat_sq # Assume best case scenario and use maximum signal rate - s1 -= 2. * self.hist_max + s1 -= 2.0 * self.hist_max s1[s1 < 0] = 0 - return s1 ** 0.5 + return s1**0.5 -class ExpFitStatistic(QuadratureSumStatistic): +class ExpFitStatistic(PhaseTDStatistic): """ Detection statistic using an exponential falloff noise model. Statistic approximates the negative log noise coinc rate density per template over single-ifo newsnr values. + + Extra features can be added by supplying keyword arguments when + initialising. """ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): @@ -859,32 +903,258 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): ifos: list of strs, not used here The list of detector names + kwargs: values and features needed for the statistic """ if not files: - raise RuntimeError("Statistic files not specified") - QuadratureSumStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) + raise RuntimeError("Files not specified") - # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' - parsed_attrs = [f.split('-') for f in self.files.keys()] - self.bg_ifos = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'fit_coeffs')] + PhaseTDStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) + # Get the single-detector rates fit files + # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' + parsed_attrs = [f.split("-") for f in self.files.keys()] + self.bg_ifos = [ + at[0] + for at in parsed_attrs + if (len(at) == 2 and at[1] == "fit_coeffs") + ] if not len(self.bg_ifos): - raise RuntimeError("None of the statistic files has the required " - "attribute called {ifo}-fit_coeffs !") + raise RuntimeError( + "None of the statistic files has the required " + "attribute called {ifo}-fit_coeffs !" + ) + # Get the single-detector rates fit values self.fits_by_tid = {} - self.alphamax = {} for i in self.bg_ifos: self.fits_by_tid[i] = self.assign_fits(i) - self.get_ref_vals(i) + if self.kwargs["normalize_fit_rate"]: + self.reassign_rate(i) + # These are important for the coinc_lim_for_thresh method + # Is the single threshold a < or > limit? self.single_increasing = False + # Things for calculating the best-case scenario for signal rate + self.max_sigmasq = -numpy.inf + self.min_snr = numpy.inf + + # Some modifiers for the statistic to get it into a nice range + self.benchmark_lograte = float( + self.kwargs.get("benchmark_lograte", -14.6) + ) + self.min_stat = float( + self.kwargs.get("minimum_statistic_cutoff", -30.0) + ) + + # Modifier to get a sensible value of the fit slope below threshold + self.alphabelow = float( + self.kwargs.get("alpha_below_thresh", numpy.inf) + ) + + # This will be used to keep track of the template number being used + self.curr_tnum = None + + # Go through the keywords and add class information as needed: + if self.kwargs["sensitive_volume"]: + # Add network sensitivity beckmark + self.single_dtype.append(("benchmark_logvol", numpy.float32)) + # benchmark_logvol is a benchmark sensitivity array + # over template id + ref_ifos = self.kwargs.get("reference_ifos", "H1,L1").split(",") + hl_net_med_sigma = numpy.amin( + [self.fits_by_tid[ifo]["median_sigma"] for ifo in ref_ifos], + axis=0, + ) + self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) + + if self.kwargs["dq"]: + # Reweight the noise rate by the dq reweighting factor + self.dq_rates_by_state = {} + self.dq_bin_by_tid = {} + self.dq_state_segments = None + self.low_latency = False + self.single_dtype.append(('dq_state', int)) + + for ifo in self.ifos: + key = f"{ifo}-dq_stat_info" + if key in self.files.keys(): + self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) + self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) + self.check_low_latency(key) + if not self.low_latency: + if self.dq_state_segments is None: + self.dq_state_segments = {} + self.dq_state_segments[ifo] = self.setup_segments(key) + + if self.kwargs["chirp_mass"]: + # Reweight the signal rate by the chirp mass of the template + # This may be stored as a float, so cast just in case + self.mcm = float(self.kwargs.get("max_chirp_mass", numpy.inf)) + self.curr_mchirp = None + + if self.kwargs["kde"]: + # Reweight the signal rate by a weighting factor from the KDE of + # a signal population normalised by expected relative rate of noise + # triggers for a template + self.kde_names = [] + self.find_kdes() + self.kde_by_tid = {} + for kname in self.kde_names: + self.assign_kdes(kname) + + def assign_template_bins(self, key): + """ + Assign bin ID values + Assign each template id to a bin name based on a + referenced statistic file. + + Parameters + ---------- + key: str + statistic file key string + + Returns + --------- + bin_dict: dict of strs + Dictionary containing the bin name for each template id + """ + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: + tids = [] + bin_nums = [] + bin_grp = dq_file[f"{ifo}/bins"] + for bin_name in bin_grp.keys(): + bin_tids = bin_grp[f"{bin_name}/tids"][:] + tids = list(tids) + list(bin_tids.astype(int)) + bin_nums = list(bin_nums) + list([bin_name] * len(bin_tids)) + + bin_dict = dict(zip(tids, bin_nums)) + return bin_dict + + def assign_dq_rates(self, key): + """ + Assign dq values to each time for every bin based on a + referenced statistic file. + + Parameters + ---------- + key: str + statistic file key string + + Returns + --------- + dq_dict: dict of {time: dq_value} dicts for each bin + Dictionary containing the mapping between the time + and the dq value for each individual bin. + + """ + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: + bin_grp = dq_file[f"{ifo}/bins"] + dq_dict = {} + for bin_name in bin_grp.keys(): + dq_dict[bin_name] = bin_grp[f"{bin_name}/dq_rates"][:] + + return dq_dict + + def setup_segments(self, key): + """ + Store segments from stat file + """ + ifo = key.split("-")[0] + with h5py.File(self.files[key], "r") as dq_file: + ifo_grp = dq_file[ifo] + dq_state_segs_dict = {} + for k in ifo_grp["dq_segments"].keys(): + seg_dict = {} + seg_dict["start"] = \ + ifo_grp[f"dq_segments/{k}/segment_starts"][:] + seg_dict["end"] = ifo_grp[f"dq_segments/{k}/segment_ends"][:] + dq_state_segs_dict[k] = seg_dict + + return dq_state_segs_dict + + def find_dq_noise_rate(self, trigs, dq_state): + """Get dq values for a specific ifo and dq states""" + + dq_val = numpy.ones(len(dq_state)) + + if self.curr_ifo in self.dq_rates_by_state: + for i, st in enumerate(dq_state): + if isinstance(self.curr_tnum, numpy.ndarray): + bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum[i]] + else: + bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum] + dq_val[i] = self.dq_rates_by_state[self.curr_ifo][bin_name][st] + return dq_val + + def find_dq_state_by_time(self, ifo, times): + """Get the dq state for an ifo at times""" + dq_state = numpy.zeros(len(times), dtype=numpy.uint8) + if ifo in self.dq_state_segments: + from pycbc.events.veto import indices_within_times + + for k in self.dq_state_segments[ifo]: + starts = self.dq_state_segments[ifo][k]["start"] + ends = self.dq_state_segments[ifo][k]["end"] + inds = indices_within_times(times, starts, ends) + # states are named in file as 'dq_state_N', need to extract N + dq_state[inds] = int(k[9:]) + return dq_state + + def check_low_latency(self, key): + """ + Check if the statistic file indicates low latency mode. + Parameters + ---------- + key: str + Statistic file key string. + Returns + ------- + None + """ + ifo = key.split('-')[0] + with h5py.File(self.files[key], 'r') as dq_file: + ifo_grp = dq_file[ifo] + if 'dq_segments' not in ifo_grp.keys(): + # if segs are not in file, we must be in LL + if self.dq_state_segments is not None: + raise ValueError( + 'Either all dq stat files must have segments or none' + ) + self.low_latency = True + elif self.low_latency: + raise ValueError( + 'Either all dq stat files must have segments or none' + ) + + def reassign_rate(self, ifo): + """ + Reassign the rate to be number per time rather than an arbitrarily + normalised number. + + Parameters + ----------- + ifo: str + The ifo to consider. + """ + with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: + analysis_time = float(coeff_file.attrs['analysis_time']) + fbt = 'fit_by_template' in coeff_file + + self.fits_by_tid[ifo]['smoothed_rate_above_thresh'] /= analysis_time + self.fits_by_tid[ifo]['smoothed_rate_in_template'] /= analysis_time + # The by-template fits may have been stored in the smoothed fits file + if fbt: + self.fits_by_tid[ifo]['fit_by_rate_above_thresh'] /= analysis_time + self.fits_by_tid[ifo]['fit_by_rate_in_template'] /= analysis_time + def assign_fits(self, ifo): """ - Extract fits from fit files + Extract fits from single-detector rate fit files Parameters ----------- @@ -897,32 +1167,43 @@ def assign_fits(self, ifo): A dictionary containing the fit information in the `alpha`, `rate` and `thresh` keys. """ - coeff_file = h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') - template_id = coeff_file['template_id'][:] + coeff_file = h5py.File(self.files[f"{ifo}-fit_coeffs"], "r") + template_id = coeff_file["template_id"][:] # the template_ids and fit coeffs are stored in an arbitrary order # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) fits_by_tid_dict = {} - fits_by_tid_dict['smoothed_fit_coeff'] = \ - coeff_file['fit_coeff'][:][tid_sort] - fits_by_tid_dict['smoothed_rate_above_thresh'] = \ - coeff_file['count_above_thresh'][:][tid_sort].astype(float) - fits_by_tid_dict['smoothed_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) + fits_by_tid_dict["smoothed_fit_coeff"] = coeff_file["fit_coeff"][:][ + tid_sort + ] + fits_by_tid_dict["smoothed_rate_above_thresh"] = coeff_file[ + "count_above_thresh" + ][:][tid_sort].astype(float) + fits_by_tid_dict["smoothed_rate_in_template"] = coeff_file[ + "count_in_template" + ][:][tid_sort].astype(float) + if self.kwargs["sensitive_volume"]: + fits_by_tid_dict["median_sigma"] = coeff_file["median_sigma"][:][ + tid_sort + ].astype(float) # The by-template fits may have been stored in the smoothed fits file - if 'fit_by_template' in coeff_file: - coeff_fbt = coeff_file['fit_by_template'] - fits_by_tid_dict['fit_by_fit_coeff'] = \ - coeff_fbt['fit_coeff'][:][tid_sort] - fits_by_tid_dict['fit_by_rate_above_thresh'] = \ - coeff_fbt['count_above_thresh'][:][tid_sort].astype(float) - fits_by_tid_dict['fit_by_rate_in_template'] = \ - coeff_file['count_in_template'][:][tid_sort].astype(float) + if "fit_by_template" in coeff_file: + coeff_fbt = coeff_file["fit_by_template"] + fits_by_tid_dict["fit_by_fit_coeff"] = coeff_fbt["fit_coeff"][:][ + tid_sort + ] + fits_by_tid_dict["fit_by_rate_above_thresh"] = coeff_fbt[ + "count_above_thresh" + ][:][tid_sort].astype(float) + fits_by_tid_dict["fit_by_rate_in_template"] = coeff_file[ + "count_in_template" + ][:][tid_sort].astype(float) + # Keep the fit threshold in fits_by_tid - fits_by_tid_dict['thresh'] = coeff_file.attrs['stat_threshold'] + fits_by_tid_dict["thresh"] = coeff_file.attrs["stat_threshold"] coeff_file.close() @@ -934,11 +1215,18 @@ def update_file(self, key): If others are used (i.e. this statistic is inherited), they will need updated separately """ + # First - check if the phasetd file has been updated, this is + # inherited from the PhaseTDStatistic + if PhaseTDStatistic.update_file(self, key): + return True + if key.endswith('-fit_coeffs'): # This is a ExpFitStatistic file which needs updating # Which ifo is it? ifo = key[:2] self.fits_by_tid[ifo] = self.assign_fits(ifo) + if self.kwargs["normalize_fit_rate"]: + self.reassign_rate(ifo) self.get_ref_vals(ifo) logger.info( "Updating %s statistic %s file", @@ -946,6 +1234,30 @@ def update_file(self, key): key ) return True + + # Is the key a KDE statistic file that we update here? + if key.endswith('kde_file'): + logger.info( + "Updating %s statistic %s file", + ''.join(self.ifos), + key + ) + kde_style = key.split('-')[0] + self.assign_kdes(kde_style) + return True + + # We also need to check if the DQ files have updated + if key.endswith('dq_stat_info'): + ifo = key.split('-')[0] + logger.info( + "Updating %s statistic %s file", + ifo, + key + ) + self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) + self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) + return True + return False def get_ref_vals(self, ifo): @@ -983,13 +1295,6 @@ def find_fits(self, trigs): thresh: float or numpy array The thresh fit value(s) """ - try: - # Exists where trigs is a class with the template num attribute - tnum = trigs.template_num - except AttributeError: - # Exists where trigs is dict-like - tnum = trigs['template_id'] - try: ifo = trigs.ifo except AttributeError: @@ -1000,17 +1305,56 @@ def find_fits(self, trigs): # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id - alphai = self.fits_by_tid[ifo]['smoothed_fit_coeff'][tnum] - ratei = self.fits_by_tid[ifo]['smoothed_rate_above_thresh'][tnum] - thresh = self.fits_by_tid[ifo]['thresh'] + alphai = self.fits_by_tid[ifo]["smoothed_fit_coeff"][self.curr_tnum] + ratei = self.fits_by_tid[ifo]["smoothed_rate_above_thresh"][self.curr_tnum] + thresh = self.fits_by_tid[ifo]["thresh"] return alphai, ratei, thresh + def find_kdes(self): + """ + Find which associated files are for the KDE reweighting + """ + # The stat file attributes are hard-coded as 'signal-kde_file' + # and 'template-kde_file' + parsed_attrs = [f.split("-") for f in self.files.keys()] + self.kde_names = [ + at[0] + for at in parsed_attrs + if (len(at) == 2 and at[1] == "kde_file") + ] + assert sorted(self.kde_names) == ["signal", "template"], ( + "Two stat files are required, they should have stat attr " + "'signal-kde_file' and 'template-kde_file' respectively" + ) + + def assign_kdes(self, kname): + """ + Extract values from KDE files + + Parameters + ----------- + kname: str + Used to label the kde files. + """ + with h5py.File(self.files[kname + "-kde_file"], "r") as kde_file: + self.kde_by_tid[kname + "_kdevals"] = kde_file["data_kde"][:] + + def kde_ratio(self): + """ + Calculate the weighting factor according to the ratio of the + signal and template KDE lookup tables + """ + signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] + template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] + + return numpy.log(signal_kde / template_kde) + def lognoiserate(self, trigs): """ Calculate the log noise rate density over single-ifo ranking - Read in single trigger information, compute the ranking + Read in single trigger information, make the sngl_stat and rescale by the fitted coefficients alpha and rate Parameters @@ -1023,14 +1367,34 @@ def lognoiserate(self, trigs): lognoisel: numpy.array Array of log noise rate density for each input trigger. """ + # What is the template number currently being used? + try: + # exists if accessed via coinc_findtrigs, this is a number + self.curr_tnum = trigs.template_num + except AttributeError: + # exists for SingleDetTriggers & pycbc_live get_coinc, + # this is a numpy array + self.curr_tnum = trigs["template_id"] + alphai, ratei, thresh = self.find_fits(trigs) sngl_stat = self.get_sngl_ranking(trigs) - # alphai is constant of proportionality between single-ifo newsnr and - # negative log noise likelihood in given template - # ratei is rate of trigs in given template compared to average - # thresh is stat threshold used in given ifo - lognoisel = - alphai * (sngl_stat - thresh) + numpy.log(alphai) + \ - numpy.log(ratei) + lognoisel = ( + -alphai * (sngl_stat - thresh) + + numpy.log(alphai) + + numpy.log(ratei) + ) + + if not numpy.isinf(self.alphabelow): + # Above the threshold we use the usual fit coefficient (alphai) + # below threshold use specified alphabelow + bt = sngl_stat < thresh + lognoiselbt = ( + -self.alphabelow * (sngl_stat - thresh) + + numpy.log(self.alphabelow) + + numpy.log(ratei) + ) + lognoisel[bt] = lognoiselbt[bt] + return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) def single(self, trigs): @@ -1038,6 +1402,7 @@ def single(self, trigs): Calculate the necessary single detector information In this case the ranking rescaled (see the lognoiserate method here). + with the phase, end time, SNR values added in. Parameters ---------- @@ -1050,1461 +1415,340 @@ def single(self, trigs): The array of single detector values """ - return self.lognoiserate(trigs) - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for a single detector candidate + try: + self.curr_ifo = trigs.ifo + except AttributeError: + self.curr_ifo = trigs.get('ifo', None) + if self.curr_ifo is None: + self.curr_ifo = self.ifos[0] + assert self.curr_ifo in self.ifos + # Should be exactly one ifo provided + assert len(numpy.unique(self.curr_ifo)) == 1 - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. + # single-ifo stat = log of noise rate + sngl_stat = self.lognoiserate(trigs) + # populate other fields to calculate phase/time/amp consistency + singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) + singles["coa_phase"] = trigs["coa_phase"][:] + singles["end_time"] = trigs["end_time"][:] + singles["snr"] = trigs["snr"][:] + # Save info about best-case scenario for use later + self.min_snr = min(singles["snr"].min(), self.min_snr) + + if self.kwargs["sensitive_volume"]: + # populate fields to allow sensitive volume factor calculation + singles["sigmasq"] = trigs["sigmasq"][:] + # Store benchmark log volume as single-ifo information since + # the ranking methods do not have access to template id + singles["benchmark_logvol"] = self.benchmark_logvol[self.curr_tnum] + + # Save info about the best-case scenario for use later: + max_sigsq = numpy.max(singles["sigmasq"]) + self.max_sigmasq = max(max_sigsq, self.max_sigmasq) + + if self.kwargs["chirp_mass"]: + from pycbc.conversions import mchirp_from_mass1_mass2 - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) + try: + mass1 = trigs.param['mass1'] + mass2 = trigs.param['mass2'] + except AttributeError: + mass1 = trigs['mass1'] + mass2 = trigs['mass2'] + self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) + + if self.kwargs["dq"]: + if self.low_latency: + # trigs should already have a dq state assigned + singles['dq_state'] = trigs['dq_state'][:] + else: + singles['dq_state'] = self.find_dq_state_by_time( + self.curr_ifo, trigs['end_time'][:] + ) + dq_rate = self.find_dq_noise_rate(trigs, singles['dq_state']) + dq_rate = numpy.maximum(dq_rate, 1) + sngl_stat += numpy.log(dq_rate) - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) + singles["snglstat"] = sngl_stat + return numpy.array(singles, ndmin=1) - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest + def sensitive_volume_factor(self, sngls): """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) + Determine the factor for different network sensitivities - # Keeping this here to help write the new coinc method. - def coinc_OLD(self, s0, s1, slide, step): # pylint:disable=unused-argument - """Calculate the final coinc ranking statistic""" - - # Approximate log likelihood ratio by summing single-ifo negative - # log noise likelihoods - loglr = - s0 - s1 - # add squares of threshold stat values via idealized Gaussian formula - threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] - loglr += sum([t ** 2. / 2. for t in threshes]) - # convert back to a coinc-SNR-like statistic - # via log likelihood ratio \propto rho_c^2 / 2 - return (2. * loglr) ** 0.5 - - # Keeping this here to help write the new coinc_lim method - def coinc_lim_for_thresh_OLD(self, s0, thresh): - """Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. + Assuming a homogeneous distribution of sources, the signal rate + should be given by the volume of the sphere to which we are + sensitive. Parameters ---------- - s0: numpy.ndarray - Single detector ranking statistic for the first detector. - thresh: float - The threshold on the coincident statistic. + sngls: tuple + Single-detector information, tuples of the (ifo, sngl_object), + where sngl_object is a ReadByTemplate object or similar. Returns ------- - numpy.ndarray - Array of limits on the second detector single statistic to - exceed thresh. - """ - s1 = - (thresh ** 2.) / 2. - s0 - threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] - s1 += sum([t ** 2. / 2. for t in threshes]) - return s1 - - -class ExpFitCombinedSNR(ExpFitStatistic): - """ - Reworking of ExpFitStatistic designed to resemble network SNR - - Use a monotonic function of the negative log noise rate density which - approximates combined (new)snr for coincs with similar newsnr in each ifo - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - - ifos: list of strs, not used here - The list of detector names - """ - ExpFitStatistic.__init__(self, sngl_ranking, files=files, ifos=ifos, - **kwargs) - # for low-mass templates the exponential slope alpha \approx 6 - self.alpharef = 6. - self.single_increasing = True - self.single_dtype = numpy.float32 - - def use_alphamax(self): - """ - Compute the reference alpha from the fit files. - - Use the harmonic mean of the maximum individual ifo slopes as the - reference value of alpha. - """ - inv_alphas = [1. / self.alphamax[i] for i in self.bg_ifos] - self.alpharef = 1. / (sum(inv_alphas) / len(inv_alphas)) - - def single(self, trigs): - """ - Calculate the necessary single detector information - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - - Returns - ------- - numpy.ndarray - The array of single detector values - """ - logr_n = self.lognoiserate(trigs) - _, _, thresh = self.find_fits(trigs) - # shift by log of reference slope alpha - logr_n += -1. * numpy.log(self.alpharef) - # add threshold and rescale by reference slope - stat = thresh - (logr_n / self.alpharef) - return numpy.array(stat, ndmin=1, dtype=numpy.float32) - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for single detector candidates - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - if self.single_increasing: - sngl_multiifo = single_info[1] - else: - sngl_multiifo = -1. * single_info[1] - return sngl_multiifo - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - # scale by 1/sqrt(number of ifos) to resemble network SNR - return sum(sngl[1] for sngl in s) / (len(s) ** 0.5) - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitCombinedSNR'] - self._check_coinc_lim_subclass(allowed_names) - - return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) - - -class PhaseTDExpFitStatistic(PhaseTDStatistic, ExpFitCombinedSNR): - """ - Statistic combining exponential noise model with signal histogram PDF - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, needed here - The list of detector names - """ - # read in both foreground PDF and background fit info - ExpFitCombinedSNR.__init__(self, sngl_ranking, files=files, ifos=ifos, - **kwargs) - # need the self.single_dtype value from PhaseTDStatistic - PhaseTDStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Here we inherit the PhaseTD and ExpFit file checks, - # nothing else needs doing - uf_exp_fit = ExpFitCombinedSNR.update_file(self, key) - uf_phasetd = PhaseTDStatistic.update_file(self, key) - return uf_exp_fit or uf_phasetd - - def single(self, trigs): - """ - Calculate the necessary single detector information - - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma and SNR values added in. - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - - Returns - ------- - numpy.ndarray - The array of single detector values - """ - # same single-ifo stat as ExpFitCombinedSNR - sngl_stat = ExpFitCombinedSNR.single(self, trigs) - singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] - return numpy.array(singles, ndmin=1) - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for a single detector candidate - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - """ - err_msg = "Sorry! No-one has implemented this method yet! " - raise NotImplementedError(err_msg) - - # Keeping the old statistic code here for now to help with reimplementing - def coinc_OLD(self, s0, s1, slide, step): - # logsignalrate function inherited from PhaseTDStatistic - logr_s = self.logsignalrate(s0, s1, slide * step) - # rescale by ExpFitCombinedSNR reference slope as for sngl stat - cstat = s0['snglstat'] + s1['snglstat'] + logr_s / self.alpharef - # cut off underflowing and very small values - cstat[cstat < 8.] = 8. - # scale to resemble network SNR - return cstat / (2. ** 0.5) - - def coinc_lim_for_thresh_OLD(self, s0, thresh): - # if the threshold is below this value all triggers will - # pass because of rounding in the coinc method - if thresh <= (8. / (2. ** 0.5)): - return -1. * numpy.ones(len(s0['snglstat'])) * numpy.inf - if not self.has_hist: - self.get_hist() - # Assume best case scenario and use maximum signal rate - logr_s = self.hist_max - s1 = (2 ** 0.5) * thresh - s0['snglstat'] - logr_s / self.alpharef - return s1 - - -class ExpFitBgRateStatistic(ExpFitStatistic): - """ - Detection statistic using an exponential falloff noise model. - - Statistic calculates the log noise coinc rate for each - template over single-ifo newsnr values. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - benchmark_lograte=-14.6, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - benchmark_lograte: float, default=-14.6 - benchmark_lograte is log of a representative noise trigger rate. - The default comes from H1L1 (O2) and is 4.5e-7 Hz. - """ - - super(ExpFitBgRateStatistic, self).__init__(sngl_ranking, - files=files, ifos=ifos, - **kwargs) - self.benchmark_lograte = benchmark_lograte - - # Reassign the rate to be number per time rather than an arbitrarily - # normalised number - for ifo in self.bg_ifos: - self.reassign_rate(ifo) - - def reassign_rate(self, ifo): - """ - Reassign the rate to be number per time rather - - Reassign the rate to be number per time rather than an arbitrarily - normalised number. - - Parameters - ----------- - ifo: str - The ifo to consider. - """ - with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: - analysis_time = float(coeff_file.attrs['analysis_time']) - fbt = 'fit_by_template' in coeff_file - - self.fits_by_tid[ifo]['smoothed_rate_above_thresh'] /= analysis_time - self.fits_by_tid[ifo]['smoothed_rate_in_template'] /= analysis_time - # The by-template fits may have been stored in the smoothed fits file - if fbt: - self.fits_by_tid[ifo]['fit_by_rate_above_thresh'] /= analysis_time - self.fits_by_tid[ifo]['fit_by_rate_in_template'] /= analysis_time - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Check if the file to update is an ExpFit file - uf_expfit = ExpFitStatistic.update_file(self, key) - # If this has been updated we must do the reassign_rate step here - # on top of the file update from earlier - if uf_expfit: - # This is a fit coeff file which needs updating - # Which ifo is it? - ifo = key[:2] - self.reassign_rate(ifo) - return True - return False - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - # ranking statistic is -ln(expected rate density of noise triggers) - # plus normalization constant - sngl_dict = {sngl[0]: sngl[1] for sngl in s} - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs['time_addition']) - loglr = - ln_noise_rate + self.benchmark_lograte - return loglr - - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitBgRateStatistic'] - self._check_coinc_lim_subclass(allowed_names) - - sngl_dict = {sngl[0]: sngl[1] for sngl in s} - sngl_dict[limifo] = numpy.zeros(len(s[0][1])) - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_dict, kwargs['time_addition']) - loglr = - thresh - ln_noise_rate + self.benchmark_lograte - return loglr - - -class ExpFitFgBgNormStatistic(PhaseTDStatistic, - ExpFitBgRateStatistic): - """ - Statistic combining PhaseTD, ExpFitBg and additional foreground info. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - reference_ifos='H1,L1', **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs - The list of detector names - reference_ifos: string of comma separated ifo prefixes - Detectors to be used as the reference network for network - sensitivity comparisons. Each must be in fits_by_tid - """ - # read in background fit info and store it - ExpFitBgRateStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - # if ifos not already set, determine via background fit info - self.ifos = self.ifos or self.bg_ifos - # PhaseTD statistic single_dtype plus network sensitivity benchmark - PhaseTDStatistic.__init__(self, sngl_ranking, files=files, - ifos=self.ifos, **kwargs) - self.single_dtype.append(('benchmark_logvol', numpy.float32)) - - for ifo in self.bg_ifos: - self.assign_median_sigma(ifo) - - self.ref_ifos = reference_ifos.split(',') - self.benchmark_logvol = None - self.assign_benchmark_logvol() - self.single_increasing = False - # Initialize variable to hold event template id(s) - self.curr_tnum = None - - def assign_median_sigma(self, ifo): - """ - Read and sort the median_sigma values from input files. - - Parameters - ---------- - ifo: str - The ifo to consider. - """ - - with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file: - template_id = coeff_file['template_id'][:] - tid_sort = numpy.argsort(template_id) - self.fits_by_tid[ifo]['median_sigma'] = \ - coeff_file['median_sigma'][:][tid_sort] - - def assign_benchmark_logvol(self): - """ - Assign the benchmark log-volume used by the statistic. - This is the sensitive log-volume of each template in the - network of reference IFOs - """ - # benchmark_logvol is a benchmark sensitivity array over template id - bench_net_med_sigma = numpy.amin( - [self.fits_by_tid[ifo]['median_sigma'] for ifo in self.ref_ifos], - axis=0, - ) - self.benchmark_logvol = 3. * numpy.log(bench_net_med_sigma) - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Here we inherit the PhaseTD file checks - uf_phasetd = PhaseTDStatistic.update_file(self, key) - uf_exp_fit = ExpFitBgRateStatistic.update_file(self, key) - if uf_phasetd: - # The key to update refers to a PhaseTDStatistic file - return True - if uf_exp_fit: - # The key to update refers to a ExpFitBgRateStatistic file - # In this case we must reload some statistic information - # Which ifo is it? - ifo = key[:2] - self.assign_median_sigma(ifo) - self.assign_benchmark_logvol() - return True - return False - - def lognoiserate(self, trigs, alphabelow=6): - """ - Calculate the log noise rate density over single-ifo ranking - - Read in single trigger information, make the newsnr statistic - and rescale by the fitted coefficients alpha and rate - - Parameters - ----------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - alphabelow: float, default=6 - Use this slope to fit the noise triggers below the point at which - fits are present in the input files. - - Returns - --------- - lognoisel: numpy.array - Array of log noise rate density for each input trigger. - """ - alphai, ratei, thresh = self.find_fits(trigs) - newsnr = self.get_sngl_ranking(trigs) - # Above the threshold we use the usual fit coefficient (alpha) - # below threshold use specified alphabelow - bt = newsnr < thresh - lognoisel = - alphai * (newsnr - thresh) + numpy.log(alphai) + \ - numpy.log(ratei) - lognoiselbt = - alphabelow * (newsnr - thresh) + \ - numpy.log(alphabelow) + numpy.log(ratei) - lognoisel[bt] = lognoiselbt[bt] - return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32) - - def single(self, trigs): - """ - Calculate the necessary single detector information - - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma, SNR, template_id and the - benchmark_logvol values added in. - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - Returns - ------- - numpy.ndarray - The array of single detector values - """ - try: - # exists if accessed via coinc_findtrigs - self.curr_tnum = trigs.template_num - except AttributeError: - # exists for SingleDetTriggers & pycbc_live get_coinc - self.curr_tnum = trigs['template_id'] - - # single-ifo stat = log of noise rate - sngl_stat = self.lognoiserate(trigs) - # populate other fields to calculate phase/time/amp consistency - # and sigma comparison - singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) - singles['snglstat'] = sngl_stat - singles['coa_phase'] = trigs['coa_phase'][:] - singles['end_time'] = trigs['end_time'][:] - singles['sigmasq'] = trigs['sigmasq'][:] - singles['snr'] = trigs['snr'][:] - - # Store benchmark log volume as single-ifo information since the coinc - # method does not have access to template id - singles['benchmark_logvol'] = self.benchmark_logvol[self.curr_tnum] - return numpy.array(singles, ndmin=1) - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for single detector candidates - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - sngls = single_info[1] - - ln_noise_rate = sngls['snglstat'] - ln_noise_rate -= self.benchmark_lograte - network_sigmasq = sngls['sigmasq'] - network_logvol = 1.5 * numpy.log(network_sigmasq) - benchmark_logvol = sngls['benchmark_logvol'] - network_logvol -= benchmark_logvol - ln_s = -4 * numpy.log(sngls['snr'] / self.ref_snr) - loglr = network_logvol - ln_noise_rate + ln_s - # cut off underflowing and very small values - loglr[loglr < -30.] = -30. - return loglr - - def rank_stat_coinc(self, s, slide, step, to_shift, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - - sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} - # Find total volume of phase-time-amplitude space occupied by - # noise coincs - if 'dets' in kwargs: - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition'], - kwargs['dets']) - # Extent of time-difference space occupied - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition'], - kwargs['dets']) - else: - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition']) - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition']) - - ln_noise_rate -= self.benchmark_lograte - - # Network sensitivity for a given coinc type is approximately - # determined by the least sensitive ifo - network_sigmasq = numpy.amin([sngl[1]['sigmasq'] for sngl in s], - axis=0) - # Volume \propto sigma^3 or sigmasq^1.5 - network_logvol = 1.5 * numpy.log(network_sigmasq) - # Get benchmark log volume as single-ifo information : - # benchmark_logvol for a given template is not ifo-dependent, so - # choose the first ifo for convenience - benchmark_logvol = s[0][1]['benchmark_logvol'] - network_logvol -= benchmark_logvol - - # Use prior histogram to get Bayes factor for signal vs noise - # given the time, phase and SNR differences between IFOs - - # First get signal PDF logr_s - stat = {ifo: st for ifo, st in s} - logr_s = self.logsignalrate(stat, slide * step, to_shift) - - # Volume is the allowed time difference window, multiplied by 2pi for - # each phase difference dimension and by allowed range of SNR ratio - # for each SNR ratio dimension : there are (n_ifos - 1) dimensions - # for both phase and SNR - n_ifos = len(self.hist_ifos) - hist_vol = noise_twindow * \ - (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ - (n_ifos - 1) - # Noise PDF is 1/volume, assuming a uniform distribution of noise - # coincs - logr_n = - numpy.log(hist_vol) - - # Combine to get final statistic: log of - # ((rate of signals / rate of noise) * PTA Bayes factor) - loglr = network_logvol - ln_noise_rate + logr_s - logr_n - - # cut off underflowing and very small values - loglr[loglr < -30.] = -30. - return loglr - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - - # Safety against subclassing and not rethinking this - allowed_names = ['ExpFitFgBgNormStatistic', - 'ExpFitFgBgNormBBHStatistic', - 'DQExpFitFgBgNormStatistic', - 'DQExpFitFgBgKDEStatistic', - 'ExpFitFgBgKDEStatistic'] - self._check_coinc_lim_subclass(allowed_names) - - if not self.has_hist: - self.get_hist() - # if the threshold is below this value all triggers will - # pass because of rounding in the coinc method - if thresh <= -30.: - return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf - sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} - # Add limifo to singles dict so that overlap time is calculated correctly - sngl_rates[limifo] = numpy.zeros(len(s[0][1])) - ln_noise_rate = coinc_rate.combination_noise_lograte( - sngl_rates, kwargs['time_addition']) - ln_noise_rate -= self.benchmark_lograte - - # Assume best case and use the maximum sigma squared from all triggers - network_sigmasq = numpy.ones(len(s[0][1])) * kwargs['max_sigmasq'] - # Volume \propto sigma^3 or sigmasq^1.5 - network_logvol = 1.5 * numpy.log(network_sigmasq) - # Get benchmark log volume as single-ifo information : - # benchmark_logvol for a given template is not ifo-dependent, so - # choose the first ifo for convenience - benchmark_logvol = s[0][1]['benchmark_logvol'] - network_logvol -= benchmark_logvol - - # Assume best case scenario and use maximum signal rate - logr_s = numpy.log(self.hist_max - * (kwargs['min_snr'] / self.ref_snr) ** -4.) - - # Find total volume of phase-time-amplitude space occupied by noise - # coincs - # Extent of time-difference space occupied - noise_twindow = coinc_rate.multiifo_noise_coincident_area( - self.hist_ifos, kwargs['time_addition']) - # Volume is the allowed time difference window, multiplied by 2pi for - # each phase difference dimension and by allowed range of SNR ratio - # for each SNR ratio dimension : there are (n_ifos - 1) dimensions - # for both phase and SNR - n_ifos = len(self.hist_ifos) - hist_vol = noise_twindow * \ - (2. * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ - (n_ifos - 1) - # Noise PDF is 1/volume, assuming a uniform distribution of noise - # coincs - logr_n = - numpy.log(hist_vol) - - loglr = - thresh + network_logvol - ln_noise_rate + logr_s - logr_n - return loglr - - -class ExpFitFgBgNormBBHStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with a mass weighting factor. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - is multiplied by a signal rate prior modelled as uniform over chirp mass. - As templates are distributed roughly according to mchirp^(-11/3) we - weight by the inverse of this. This ensures that quiet signals at high - mass where template density is sparse are not swamped by events at lower - masses where template density is high. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - max_chirp_mass=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - max_chirp_mass: float, default=None - If given, if a template's chirp mass is above this value it will - be reweighted as if it had this chirp mass. This is to avoid the - problem where the distribution fails to be accurate at high mass - and we can have a case where a single highest-mass template might - produce *all* the loudest background (and foreground) events. - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.mcm = max_chirp_mass - self.curr_mchirp = None - - def logsignalrate(self, stats, shift, to_shift): - """ - Calculate the normalized log rate density of signals via lookup - - This calls back to the Parent class and then applies the chirp mass - weighting factor. - - Parameters - ---------- - stats: list of dicts giving single-ifo quantities, ordered as - self.ifos - shift: numpy array of float, size of the time shift vector for each - coinc to be ranked - to_shift: list of int, multiple of the time shift to apply ordered - as self.ifos - - Returns - ------- - value: log of coinc signal rate density for the given single-ifo - triggers and time shifts - """ - # Model signal rate as uniform over chirp mass, background rate is - # proportional to mchirp^(-11/3) due to density of templates - logr_s = ExpFitFgBgNormStatistic.logsignalrate( - self, - stats, - shift, - to_shift - ) - logr_s += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return logr_s - - def single(self, trigs): - """ - Calculate the necessary single detector information - - In this case the ranking rescaled (see the lognoiserate method here) - with the phase, end time, sigma, SNR, template_id and the - benchmark_logvol values added in. This also stored the current chirp - mass for use when computing the coinc statistic values. - - Parameters - ---------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. - - Returns - ------- - numpy.ndarray - The array of single detector values - """ - from pycbc.conversions import mchirp_from_mass1_mass2 - try: - mass1 = trigs.param['mass1'] - mass2 = trigs.param['mass2'] - except AttributeError: - mass1 = trigs['mass1'] - mass2 = trigs['mass2'] - self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2) - - if self.mcm is not None: - # Careful - input might be a str, so cast to float - self.curr_mchirp = min(self.curr_mchirp, float(self.mcm)) - return ExpFitFgBgNormStatistic.single(self, trigs) - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for a single detector candidate - - This calls back to the Parent class and then applies the chirp mass - weighting factor. - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( - self, - single_info, - **kwargs) - rank_sngl += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return rank_sngl - - def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs): - """ - Calculate the coincident detection statistic. - - Parameters - ---------- - sngls_list: list - List of (ifo, single detector statistic) tuples - slide: (unused in this statistic) - step: (unused in this statistic) - to_shift: list - List of integers indicating what multiples of the time shift will - be applied (unused in this statistic) - - Returns - ------- - numpy.ndarray - Array of coincident ranking statistic values - """ - - if 'mchirp' in kwargs: - self.curr_mchirp = kwargs['mchirp'] - - return ExpFitFgBgNormStatistic.rank_stat_coinc(self, - sngls_list, - slide, - step, - to_shift, - **kwargs) - - def coinc_lim_for_thresh(self, s, thresh, limifo, - **kwargs): # pylint:disable=unused-argument - """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed - the threshold for each of the input triggers. - - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. - - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. - """ - loglr = ExpFitFgBgNormStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) - loglr += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.)) - return loglr - - -class ExpFitFgBgKDEStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with an additional mass and spin weighting - factor determined by KDE statistic files. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - ratio is multiplied by the ratio of signal KDE to template KDE over some - parameters covering the bank. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): - """ - Parameters - ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.kde_names = [] - self.find_kdes() - self.kde_by_tid = {} - for kname in self.kde_names: - self.assign_kdes(kname) - - def find_kdes(self): - """ - Find which associated files are for the KDE reweighting - """ - # The stat file attributes are hard-coded as 'signal-kde_file' - # and 'template-kde_file' - parsed_attrs = [f.split('-') for f in self.files.keys()] - self.kde_names = [at[0] for at in parsed_attrs if - (len(at) == 2 and at[1] == 'kde_file')] - assert sorted(self.kde_names) == ['signal', 'template'], \ - "Two stat files are required, they should have stat attr " \ - "'signal-kde_file' and 'template-kde_file' respectively" - - def assign_kdes(self, kname): - """ - Extract values from KDE files - - Parameters - ----------- - kname: str - Used to label the kde files. - """ - with h5py.File(self.files[kname + '-kde_file'], 'r') as kde_file: - self.kde_by_tid[kname + '_kdevals'] = kde_file['data_kde'][:] - - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from ExpFitFgBgNormStatistic - uf_expfit = ExpFitFgBgNormStatistic.update_file(self, key) - if uf_expfit: - # The key to update refers to a ExpFitFgBgNormStatistic file - return True - # Is the key a KDE statistic file that we update here? - if key.endswith('kde_file'): - logger.info( - "Updating %s statistic %s file", - ''.join(self.ifos), - key - ) - kde_style = key.split('-')[0] - self.assign_kdes(kde_style) - return True - return False - - def kde_ratio(self): - """ - Calculate the weighting factor according to the ratio of the - signal and template KDE lookup tables - """ - signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] - template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] - - return numpy.log(signal_kde / template_kde) - - def logsignalrate(self, stats, shift, to_shift): - """ - Calculate the normalized log rate density of signals via lookup. - - This calls back to the parent class and then applies the ratio_kde - weighting factor. - - Parameters - ---------- - stats: list of dicts giving single-ifo quantities, ordered as - self.ifos - shift: numpy array of float, size of the time shift vector for each - coinc to be ranked - to_shift: list of int, multiple of the time shift to apply ordered - as self.ifos - - Returns - ------- - value: log of coinc signal rate density for the given single-ifo - triggers and time shifts - """ - logr_s = ExpFitFgBgNormStatistic.logsignalrate(self, stats, shift, - to_shift) - logr_s += self.kde_ratio() - - return logr_s - - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument - """ - Calculate the statistic for a single detector candidate - - Parameters - ---------- - single_info: tuple - Tuple containing two values. The first is the ifo (str) and the - second is the single detector triggers. - - Returns - ------- - numpy.ndarray - The array of single detector statistics - """ - rank_sngl = ExpFitFgBgNormStatistic.rank_stat_single( - self, - single_info, - **kwargs) - rank_sngl += self.kde_ratio() - return rank_sngl - - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): + network_logvol: numpy.array + Array of values for the network log-volume factor. This is the + log of the cube of the sensitive distance (sigma), divided by + a benchmark volume. """ - Optimization function to identify coincs too quiet to be of interest - - Calculate the required single detector statistic to exceed the - threshold for each of the input trigers. + # Network sensitivity for a given coinc type is approximately + # determined by the least sensitive ifo + network_sigmasq = numpy.amin( + [sngl[1]["sigmasq"] for sngl in sngls], axis=0 + ) + # Volume \propto sigma^3 or sigmasq^1.5 + network_logvol = 1.5 * numpy.log(network_sigmasq) + # Get benchmark log volume as single-ifo information : + # benchmark_logvol for a given template is not ifo-dependent, so + # choose the first ifo for convenience + benchmark_logvol = sngls[0][1]["benchmark_logvol"] + network_logvol -= benchmark_logvol - Parameters - ---------- - s: list - List of (ifo, single detector statistic) tuples for all detectors - except limifo. - thresh: float - The threshold on the coincident statistic. - limifo: string - The ifo for which the limit is to be found. + return network_logvol - Returns - ------- - numpy.ndarray - Array of limits on the limifo single statistic to - exceed thresh. + def logsignalrate_shared(self, sngls_info): """ - loglr = ExpFitFgBgNormStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) - signal_kde = self.kde_by_tid["signal_kdevals"][self.curr_tnum] - template_kde = self.kde_by_tid["template_kdevals"][self.curr_tnum] - loglr += numpy.log(signal_kde / template_kde) - return loglr + Calculate the parts of the log signal rate which are shared by + both the single and coinc ranking statistics - -class DQExpFitFgBgNormStatistic(ExpFitFgBgNormStatistic): - """ - The ExpFitFgBgNormStatistic with DQ-based reranking. - - This is the same as the ExpFitFgBgNormStatistic except the likelihood - ratio is corrected via estimating relative noise trigger rates based on - the DQ time series. - """ - - def __init__(self, sngl_ranking, files=None, ifos=None, - **kwargs): - """ Parameters ---------- - sngl_ranking: str - The name of the ranking to use for the single-detector triggers. - files: list of strs, needed here - A list containing the filenames of hdf format files used to help - construct the coincident statistics. The files must have a 'stat' - attribute which is used to associate them with the appropriate - statistic class. - ifos: list of strs, not used here - The list of detector names - """ - ExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.dq_rates_by_state = {} - self.dq_bin_by_tid = {} - self.dq_state_segments = None - self.low_latency = False - self.single_dtype.append(('dq_state', int)) - - for ifo in self.ifos: - key = f'{ifo}-dq_stat_info' - if key in self.files.keys(): - self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) - self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) - self.check_low_latency(key) - if not self.low_latency: - if self.dq_state_segments is None: - self.dq_state_segments = {} - self.dq_state_segments[ifo] = self.setup_segments(key) + sngls_info: tuple + Single-detector information, tuples of the (ifo, sngl_object), + where sngl_object is a ReadByTemplate object or similar. - def check_low_latency(self, key): - """ - Check if the statistic file indicates low latency mode. - Parameters - ---------- - key: str - Statistic file key string. Returns - ------- - None - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - ifo_grp = dq_file[ifo] - if 'dq_segments' not in ifo_grp.keys(): - # if segs are not in file, we must be in LL - if self.dq_state_segments is not None: - raise ValueError( - 'Either all dq stat files must have segments or none' - ) - self.low_latency = True - elif self.low_latency: - raise ValueError( - 'Either all dq stat files must have segments or none' - ) + ------- + sr_factor: numpy.ndarray + Array of values to be added to the ranking statistic when + taking various signal rate factors into account + """ + # Other features affecting the signal rate + sr_factor = 0 + if self.kwargs["sensitive_volume"]: + # Sensitive volume - this is proportional to signal rate + # assuming a homogeneous universe + sr_factor += self.sensitive_volume_factor(sngls_info) + + if self.kwargs["chirp_mass"]: + # chirp mass reweighting + if isinstance(self.curr_mchirp, numpy.ndarray): + mchirp = numpy.minimum(self.curr_mchirp, self.mcm) + else: + # curr_mchirp will be a number + mchirp = min(self.curr_mchirp, self.mcm) + sr_factor += numpy.log((mchirp / 20.0) ** (11.0 / 3.0)) - def assign_template_bins(self, key): + if self.kwargs["kde"]: + # KDE reweighting + sr_factor += self.kde_ratio() + + return sr_factor + + def rank_stat_single(self, single_info): """ - Assign bin ID values - Assign each template id to a bin name based on a - referenced statistic file. + Calculate the statistic for single detector candidates Parameters ---------- - key: str - statistic file key string + single_info: tuple + Tuple containing two values. The first is the ifo (str) and the + second is the single detector triggers. Returns - --------- - bin_dict: dict of strs - Dictionary containing the bin name for each template id + ------- + numpy.ndarray + The array of single detector statistics """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - tids = [] - bin_nums = [] - bin_grp = dq_file[f'{ifo}/bins'] - for bin_name in bin_grp.keys(): - bin_tids = bin_grp[f'{bin_name}/tids'][:] - tids = list(tids) + list(bin_tids.astype(int)) - bin_nums = list(bin_nums) + list([bin_name] * len(bin_tids)) + sngls = single_info[1] - bin_dict = dict(zip(tids, bin_nums)) - return bin_dict + # Basic noise rate: the exp fit rate from the single statistic + ln_noise_rate = sngls["snglstat"] + ln_noise_rate -= self.benchmark_lograte - def assign_dq_rates(self, key): + # Basic signal rate: snr ** -4 + ln_s = -4 * numpy.log(sngls["snr"] / self.ref_snr) + # Add in the feature-dependent terms to the signal rate + ln_s += self.logsignalrate_shared((single_info,)) + + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate + + # cut off underflowing and very small values + loglr[loglr < self.min_stat] = self.min_stat + return loglr + + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ - Assign dq values to each time for every bin based on a - referenced statistic file. + Calculate the coincident detection statistic. Parameters ---------- - key: str - statistic file key string + s: list + List of (ifo, single detector statistic) tuples + slide: numpy array + The number of steps by which to shift each trigger + step: float + The size of each step, to be multiplied by to_shift + to_shift: list + List of integers indicating what multiples of the time shift will + be applied + kwargs: various + Key-word arguments to define different features and tunable + values in the statistic. Only time_addition is used here Returns - --------- - dq_dict: dict of {time: dq_value} dicts for each bin - Dictionary containing the mapping between the time - and the dq value for each individual bin. + ------- + loglr: numpy.ndarray + The ranking statistic for each trigger based on the supplied + triggers, requested features and keyword arguments """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - bin_grp = dq_file[f'{ifo}/bins'] - dq_dict = {} - for bin_name in bin_grp.keys(): - dq_dict[bin_name] = bin_grp[f'{bin_name}/dq_rates'][:] + # ranking statistic is -ln(expected rate density of noise triggers) + # plus normalization constant + sngl_dict = {sngl[0]: sngl[1]["snglstat"] for sngl in s} - return dq_dict + # Basic noise rate: sum of noise rates multiplied by the + # window they can form coincidences in + ln_noise_rate = coinc_rate.combination_noise_lograte( + sngl_dict, + kwargs["time_addition"], + dets=kwargs.get('dets', None), + ) - def setup_segments(self, key): - """ - Store segments from stat file - """ - ifo = key.split('-')[0] - with h5py.File(self.files[key], 'r') as dq_file: - ifo_grp = dq_file[ifo] - dq_state_segs_dict = {} - for k in ifo_grp['dq_segments'].keys(): - seg_dict = {} - seg_dict['start'] = \ - ifo_grp[f'dq_segments/{k}/segment_starts'][:] - seg_dict['end'] = \ - ifo_grp[f'dq_segments/{k}/segment_ends'][:] - dq_state_segs_dict[k] = seg_dict + ln_noise_rate -= self.benchmark_lograte - return dq_state_segs_dict + # Basic option is not to have any signal-based assumptions, + # so this is zero to begin with + ln_s = 0 - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from ExpFitFgBgNormStatistic - uf_expfit = ExpFitFgBgNormStatistic.update_file(self, key) - if uf_expfit: - # We have updated a ExpFitFgBgNormStatistic file already - return True - # We also need to check if the DQ files have updated - if key.endswith('dq_stat_info'): - ifo = key.split('-')[0] - logger.info( - "Updating %s statistic %s file", - ifo, - key + if self.kwargs["phasetd"]: + # Find total volume of phase-time-amplitude space occupied by + # noise coincs, so that the logsignalrate function is properly + # normalized + # Extent of time-difference space occupied + noise_twindow = coinc_rate.multiifo_noise_coincident_area( + self.hist_ifos, + kwargs["time_addition"], + dets=kwargs.get('dets', None), ) - self.dq_rates_by_state[ifo] = self.assign_dq_rates(key) - self.dq_bin_by_tid[ifo] = self.assign_template_bins(key) - return True - return False + # Volume is the allowed time difference window, multiplied by 2pi + # for each phase difference dimension and by allowed range of SNR + # ratio for each SNR ratio dimension : there are (n_ifos - 1) + # dimensions for both phase and SNR + n_ifos = len(self.hist_ifos) + snr_range = (self.srbmax - self.srbmin) * self.swidth + hist_vol = noise_twindow * (2.0 * numpy.pi * snr_range) ** ( + n_ifos - 1 + ) + # Noise PDF is 1/volume, assuming a uniform distribution of noise + # coincs + ln_noise_rate -= numpy.log(hist_vol) - def find_dq_noise_rate(self, trigs): - """Get dq values for a specific ifo and dq states""" + # What is the signal pdf? + stat = {ifo: st for ifo, st in s} + ln_s += self.logsignalrate(stat, slide * step, to_shift) - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs.get('ifo', None) - if ifo is None: - ifo = self.ifos[0] - assert ifo in self.ifos + # Add in the feature-dependent terms to the signal rate + ln_s += self.logsignalrate_shared(s) - dq_state = trigs['dq_state'] - dq_val = numpy.ones(len(dq_state)) + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate - tnum = self.curr_tnum - if ifo in self.dq_rates_by_state: - for (i, st) in enumerate(dq_state): - if isinstance(tnum, numpy.ndarray): - bin_name = self.dq_bin_by_tid[ifo][tnum[i]] - else: - bin_name = self.dq_bin_by_tid[ifo][tnum] - dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st] - return dq_val + # cut off underflowing and very small values + loglr[loglr < self.min_stat] = self.min_stat - def find_dq_state_by_time(self, ifo, times): - """Get the dq state for an ifo at times""" - dq_state = numpy.zeros(len(times), dtype=numpy.uint8) - if ifo in self.dq_state_segments: - from pycbc.events.veto import indices_within_times - for k in self.dq_state_segments[ifo]: - starts = self.dq_state_segments[ifo][k]['start'] - ends = self.dq_state_segments[ifo][k]['end'] - inds = indices_within_times(times, starts, ends) - # states are named in file as 'dq_state_N', need to extract N - dq_state[inds] = int(k[9:]) - return dq_state + return loglr - def lognoiserate(self, trigs): + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ - Calculate the log noise rate density over single-ifo ranking + Optimization function to identify coincs too quiet to be of interest - Read in single trigger information, compute the ranking - and rescale by the fitted coefficients alpha and rate + We are trying to get rid of as many events here at the point where + we can be confident that they will not meet ranking statistic + thresholds. + + The calculation here is "What is the minimum required snglstat in + the pivot IFO which could possibly pass the threshold?" + + There are a couple of points to be wary of here, e.g. in the signal + rate calculation, we take the best-case scenario. By using the + best-case for signal rate in this calculation, some events are kept + at this point which are hopeless. Parameters - ----------- - trigs: dict of numpy.ndarrays, h5py group or similar dict-like object - Object holding single detector trigger information. + ---------- + s: list + List of (ifo, single detector statistic) tuples + thresh: float + The threshold value to be checked against + limifo: string + The pivot detector, i.e. the one in which the sngl stat must + reach the value output by ths function + kwargs: various + Key-word arguments to define different features and tunable + values in the statistic. Only time_addition is used here Returns - --------- - lognoiserate: numpy.array - Array of log noise rate density for each input trigger. + ------- + numpy.array + The minimum snglstat values required in the 'pivot' detector + in order to reach the threshold specified """ + # Safety against subclassing and not rethinking this + allowed_names = ["ExpFitStatistic"] + self._check_coinc_lim_subclass(allowed_names) - dq_rate = self.find_dq_noise_rate(trigs) - dq_rate = numpy.maximum(dq_rate, 1) + if thresh <= self.min_stat: + return numpy.ones(len(s[0][1]["snglstat"])) * numpy.inf - logr_n = ExpFitFgBgNormStatistic.lognoiserate( - self, trigs) - logr_n += numpy.log(dq_rate) - return logr_n + # Modify the sngls so that the pivot ifo snglstats are zero + sngl_dict = {sngl[0]: sngl[1]["snglstat"] for sngl in s} + sngl_dict[limifo] = numpy.zeros(len(s[0][1])) - def single(self, trigs): - # make sure every trig has a dq state - try: - ifo = trigs.ifo - except AttributeError: - ifo = trigs.get('ifo', None) - if ifo is None: - ifo = self.ifos[0] - assert ifo in self.ifos + # Noise rate calculated as normal. Because of the modification + # above, this is the rank_stat_coinc noise rate calculation + # minus the pivot ifo's snglstat + ln_noise_rate = coinc_rate.combination_noise_lograte( + sngl_dict, kwargs["time_addition"] + ) + ln_noise_rate -= self.benchmark_lograte - singles = ExpFitFgBgNormStatistic.single(self, trigs) + # Basic option is not to have any signal-based assumptions, + # so this is zero to begin with + ln_s = 0 - if self.low_latency: - # trigs should already have a dq state assigned - singles['dq_state'] = trigs['dq_state'][:] - else: - singles['dq_state'] = self.find_dq_state_by_time( - ifo, trigs['end_time'][:] + if self.kwargs["phasetd"]: + if not self.has_hist: + self.get_hist() + # Assume best-case scenario and use maximum signal rate + ln_s = numpy.log( + self.hist_max * (self.min_snr / self.ref_snr) ** -4.0 ) - return singles + # Shared info is the same as in the coinc calculation + ln_s += self.logsignalrate_shared(s) -class DQExpFitFgBgKDEStatistic(DQExpFitFgBgNormStatistic): + # Combine the signal and noise rates + loglr = ln_s - ln_noise_rate + + # From this combined rate, what is the minimum snglstat value + # in the pivot IFO needed to reach the threshold? + return loglr - thresh + + +class ExpFitCombinedSNR(ExpFitStatistic): """ - The ExpFitFgBgKDEStatistic with DQ-based reranking. + Reworking of ExpFitStatistic designed to resemble network SNR - This is the same as the DQExpFitFgBgNormStatistic except the signal - rate is adjusted according to the KDE statistic files + Use a monotonic function of the negative log noise rate density which + approximates combined (new)snr for coincs with similar newsnr in each ifo """ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): @@ -2513,78 +1757,134 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs): ---------- sngl_ranking: str The name of the ranking to use for the single-detector triggers. + files: list of strs, needed here A list containing the filenames of hdf format files used to help construct the coincident statistics. The files must have a 'stat' attribute which is used to associate them with the appropriate statistic class. + ifos: list of strs, not used here The list of detector names """ - DQExpFitFgBgNormStatistic.__init__(self, sngl_ranking, files=files, - ifos=ifos, **kwargs) - self.kde_names = [] - ExpFitFgBgKDEStatistic.find_kdes(self) - self.kde_by_tid = {} - for kname in self.kde_names: - ExpFitFgBgKDEStatistic.assign_kdes(self, kname) + ExpFitStatistic.__init__( + self, sngl_ranking, files=files, ifos=ifos, **kwargs + ) + # for low-mass templates the exponential slope alpha \approx 6 + self.alpharef = 6.0 + self.single_increasing = True + self.single_dtype = numpy.float32 - def update_file(self, key): - """ - Update file used in this statistic. - If others are used (i.e. this statistic is inherited), they will - need updated separately - """ - # Inherit from DQExpFitFgBgNormStatistic and ExpFitFgBgKDEStatistic - uf_dq = DQExpFitFgBgNormStatistic.update_file(self, key) - uf_kde = ExpFitFgBgKDEStatistic.update_file(self, key) - return uf_dq or uf_kde + # Modifier to get a sensible value of the fit slope below threshold + self.alphabelow = float( + self.kwargs.get("alpha_below_thresh", numpy.inf) + ) - def kde_ratio(self): + def single(self, trigs): """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.kde_signalrate + Calculate the necessary single detector information + + Parameters + ---------- + trigs: dict of numpy.ndarrays, h5py group or similar dict-like object + Object holding single detector trigger information. + + Returns + ------- + numpy.ndarray + The array of single detector values """ - return ExpFitFgBgKDEStatistic.kde_ratio(self) + logr_n = self.lognoiserate(trigs) + _, _, thresh = self.find_fits(trigs) + # shift by log of reference slope alpha + logr_n += -1.0 * numpy.log(self.alpharef) + # add threshold and rescale by reference slope + stat = thresh - (logr_n / self.alpharef) + return numpy.array(stat, ndmin=1, dtype=numpy.float32) - def logsignalrate(self, stats, shift, to_shift): + def rank_stat_single(self, single_info): """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.logsignalrate + Calculate the statistic for single detector candidates + + Parameters + ---------- + single_info: tuple + Tuple containing two values. The first is the ifo (str) and the + second is the single detector triggers. + + Returns + ------- + numpy.ndarray + The array of single detector statistics """ - return ExpFitFgBgKDEStatistic.logsignalrate(self, stats, shift, - to_shift) + if self.single_increasing: + sngl_multiifo = single_info[1] + else: + sngl_multiifo = -1.0 * single_info[1] + return sngl_multiifo - def rank_stat_single(self, single_info, - **kwargs): # pylint:disable=unused-argument + def rank_stat_coinc( + self, s, slide, step, to_shift, **kwargs + ): # pylint:disable=unused-argument """ - Inherited, see docstring for ExpFitFgBgKDEStatistic.rank_stat_single + Calculate the coincident detection statistic. + + Parameters + ---------- + sngls_list: list + List of (ifo, single detector statistic) tuples + slide: (unused in this statistic) + step: (unused in this statistic) + to_shift: list + List of integers indicating what multiples of the time shift will + be applied (unused in this statistic) + + Returns + ------- + numpy.ndarray + Array of coincident ranking statistic values """ - return ExpFitFgBgKDEStatistic.rank_stat_single( - self, - single_info, - **kwargs) + # scale by 1/sqrt(number of ifos) to resemble network SNR + return sum(sngl[1] for sngl in s) / (len(s) ** 0.5) - def coinc_lim_for_thresh(self, s, thresh, limifo, **kwargs): + def coinc_lim_for_thresh( + self, s, thresh, limifo, **kwargs + ): # pylint:disable=unused-argument """ - Inherited, see docstring for - ExpFitFgBgKDEStatistic.coinc_lim_for_thresh + Optimization function to identify coincs too quiet to be of interest + + Calculate the required single detector statistic to exceed + the threshold for each of the input triggers. + + Parameters + ---------- + s: list + List of (ifo, single detector statistic) tuples for all detectors + except limifo. + thresh: float + The threshold on the coincident statistic. + limifo: string + The ifo for which the limit is to be found. + + Returns + ------- + numpy.ndarray + Array of limits on the limifo single statistic to + exceed thresh. """ - return ExpFitFgBgKDEStatistic.coinc_lim_for_thresh( - self, s, thresh, limifo, **kwargs) + # Safety against subclassing and not rethinking this + allowed_names = ["ExpFitCombinedSNR"] + self._check_coinc_lim_subclass(allowed_names) + + return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s) statistic_dict = { - 'quadsum': QuadratureSumStatistic, - 'single_ranking_only': QuadratureSumStatistic, - 'phasetd': PhaseTDStatistic, - 'exp_fit_stat': ExpFitStatistic, - 'exp_fit_csnr': ExpFitCombinedSNR, - 'phasetd_exp_fit_stat': PhaseTDExpFitStatistic, - 'dq_phasetd_exp_fit_fgbg_norm': DQExpFitFgBgNormStatistic, - 'exp_fit_bg_rate': ExpFitBgRateStatistic, - 'phasetd_exp_fit_fgbg_norm': ExpFitFgBgNormStatistic, - 'phasetd_exp_fit_fgbg_bbh_norm': ExpFitFgBgNormBBHStatistic, - 'phasetd_exp_fit_fgbg_kde': ExpFitFgBgKDEStatistic, - 'dq_phasetd_exp_fit_fgbg_kde': DQExpFitFgBgKDEStatistic, + "quadsum": QuadratureSumStatistic, + "single_ranking_only": QuadratureSumStatistic, + "phasetd": PhaseTDStatistic, + "exp_fit_csnr": ExpFitCombinedSNR, + "exp_fit": ExpFitStatistic, } @@ -2610,7 +1910,7 @@ def get_statistic(stat): try: return statistic_dict[stat] except KeyError: - raise RuntimeError('%s is not an available detection statistic' % stat) + raise RuntimeError("%s is not an available detection statistic" % stat) def insert_statistic_option_group(parser, default_ranking_statistic=None): @@ -2642,38 +1942,47 @@ def insert_statistic_option_group(parser, default_ranking_statistic=None): default=default_ranking_statistic, choices=statistic_dict.keys(), required=True if default_ranking_statistic is None else False, - help="The coinc ranking statistic to calculate" + help="The coinc ranking statistic to calculate", ) statistic_opt_group.add_argument( "--sngl-ranking", choices=ranking.sngls_ranking_function_dict.keys(), required=True, - help="The single-detector trigger ranking to use." + help="The single-detector trigger ranking to use.", ) statistic_opt_group.add_argument( "--statistic-files", - nargs='*', - action='append', + nargs="*", + action="append", default=[], - help="Files containing ranking statistic info" + help="Files containing ranking statistic info", ) statistic_opt_group.add_argument( "--statistic-keywords", - nargs='*', + nargs="*", default=[], help="Provide additional key-word arguments to be sent to " - "the statistic class when it is initialized. Should " - "be given in format --statistic-keywords " - "KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ..." + "the statistic class when it is initialized. Should " + "be given in format --statistic-keywords " + "KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ...", + ) + + statistic_opt_group.add_argument( + "--statistic-features", + nargs="*", + default=[], + choices=_allowed_statistic_features, + help="Provide additional arguments to include particular " + "features in the ranking statistic.", ) return statistic_opt_group -def parse_statistic_keywords_opt(stat_kwarg_list): +def parse_statistic_feature_options(stat_features, stat_kwarg_list): """ Parse the list of statistic keywords into an appropriate dictionary. @@ -2691,14 +2000,27 @@ def parse_statistic_keywords_opt(stat_kwarg_list): Statistic keywords in dict format """ stat_kwarg_dict = {} + + # Check that the statistic keywords are allowed + for feature in stat_features: + if feature not in _allowed_statistic_features: + err_msg = f"--statistic-feature {feature} not recognised" + raise NotImplementedError(err_msg) + + # Set values for each feature key to a boolean of whether we want them + for feature in _allowed_statistic_features: + stat_kwarg_dict[feature] = feature in stat_features + for inputstr in stat_kwarg_list: try: - key, value = inputstr.split(':') + key, value = inputstr.split(":") stat_kwarg_dict[key] = value except ValueError: - err_txt = "--statistic-keywords must take input in the " \ - "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \ - "Received {}".format(' '.join(stat_kwarg_list)) + err_txt = ( + "--statistic-keywords must take input in the " + "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " + "Received {}".format(" ".join(stat_kwarg_list)) + ) raise ValueError(err_txt) return stat_kwarg_dict @@ -2736,13 +2058,13 @@ def get_statistic_from_opts(opts, ifos): isinstance(opts.statistic_files[0], list): opts.statistic_files = sum(opts.statistic_files, []) - extra_kwargs = parse_statistic_keywords_opt(opts.statistic_keywords) + extra_kwargs = parse_statistic_feature_options( + opts.statistic_features, + opts.statistic_keywords, + ) stat_class = get_statistic(opts.ranking_statistic)( - opts.sngl_ranking, - opts.statistic_files, - ifos=ifos, - **extra_kwargs + opts.sngl_ranking, opts.statistic_files, ifos=ifos, **extra_kwargs ) return stat_class diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py index e5d61afe4a1..9c7a07130c1 100644 --- a/pycbc/io/hdf.py +++ b/pycbc/io/hdf.py @@ -576,7 +576,7 @@ def __init__(self, trig_file, detector, bank_file=None, veto_file=None, logger.info("Applying threshold of %.3f on %s", filter_threshold, filter_rank) fcn_dsets = (ranking.sngls_ranking_function_dict[filter_rank], - ranking.required_datasets[filter_rank]) + ranking.reqd_datasets[filter_rank]) idx, _ = self.trigs_f.select( lambda rank: rank > filter_threshold, filter_rank, @@ -724,7 +724,7 @@ def mask_to_n_loudest_clustered_events(self, rank_method, statistic_threshold=None, n_loudest=10, cluster_window=10, - statistic_kwargs=None): + ): """Edits the mask property of the class to point to the N loudest single detector events as ranked by ranking statistic. @@ -733,12 +733,9 @@ def mask_to_n_loudest_clustered_events(self, rank_method, statistic using statistic_threshold """ - if statistic_kwargs is None: - statistic_kwargs = {} sds = rank_method.single(self.trig_dict()) stat = rank_method.rank_stat_single( (self.ifo, sds), - **statistic_kwargs ) if len(stat) == 0: # No triggers at all, so just return here diff --git a/test/test_live_coinc_compare.py b/test/test_live_coinc_compare.py index 531bb8d16f0..4039541fcbb 100644 --- a/test/test_live_coinc_compare.py +++ b/test/test_live_coinc_compare.py @@ -73,6 +73,7 @@ def setUp(self, *args): ranking_statistic="phasetd", statistic_files=[stat_file_paths], statistic_keywords=None, + statistic_features=None, timeslide_interval=0.1, background_ifar_limit=100, store_background=True,