Bring up to date with recent changes

gwastro · Nov 8, 2024 · 13489d7 · 13489d7
1 parent e9f3c1f
commit 13489d7
Show file tree

Hide file tree

Showing 2 changed files with 139 additions and 57 deletions.
diff --git a/bin/all_sky_search/pycbc_sngls_findtrigs b/bin/all_sky_search/pycbc_sngls_findtrigs
@@ -161,6 +161,11 @@ for i, tnum in enumerate(template_ids):
             "Calculating statistic in template %d out of %d",
             i, len(template_ids),
         )
+    else:
+        logging.debug(
+            "Calculating statistic in template %d out of %d",
+            i, len(template_ids),
+        )
     tids_uncut = trigs.set_template(tnum)
 
     trigger_keep_ids = cuts.apply_trigger_cuts(trigs, trigger_cut_dict,
@@ -177,8 +182,7 @@ for i, tnum in enumerate(template_ids):
 
     # Stat class instance to calculate the ranking statistic
     sds = rank_method.single(trigs)[trigger_keep_ids]
-    stat_t = rank_method.rank_stat_single((ifo, sds),
-                                          **extra_kwargs)
+    stat_t = rank_method.rank_stat_single((ifo, sds))
     trigger_times = trigs['end_time'][:][trigger_keep_ids]
     if args.cluster_window is not None:
         cid = coinc.cluster_over_time(stat_t, trigger_times,
@@ -195,7 +199,7 @@ for i, tnum in enumerate(template_ids):
 # Perform decimation
 dec_facs = np.ones_like(template_ids_all)
 stat_all = np.array(stat_all)
-template_ids_all = np.array(template_ids_all)
+template_ids_all = np.array(template_ids_all, dtype=int)
 trigger_ids_all = np.array(trigger_ids_all)
 trigger_times_all = np.array(trigger_times_all)
 

diff --git a/pycbc/events/stat.py b/pycbc/events/stat.py
@@ -930,6 +930,8 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
         self.fits_by_tid = {}
         for i in self.bg_ifos:
             self.fits_by_tid[i] = self.assign_fits(i)
+            if self.kwargs["normalize_fit_rate"]:
+                self.reassign_rate(i)
 
         # These are important for the coinc_lim_for_thresh method
         # Is the single threshold a < or > limit?
@@ -947,6 +949,11 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
             self.kwargs.get("minimum_statistic_cutoff", -30.0)
         )
 
+        # Modifier to get a sensible value of the fit slope below threshold
+        self.alphabelow = float(
+            self.kwargs.get("alpha_below_thresh", numpy.inf)
+        )
+
         # This will be used to keep track of the template number being used
         self.curr_tnum = None
 
@@ -967,14 +974,20 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
             # Reweight the noise rate by the dq reweighting factor
             self.dq_rates_by_state = {}
             self.dq_bin_by_tid = {}
-            self.dq_state_segments = {}
+            self.dq_state_segments = None
+            self.low_latency = False
+            self.single_dtype.append(('dq_state', int))
 
             for ifo in self.ifos:
                 key = f"{ifo}-dq_stat_info"
                 if key in self.files.keys():
                     self.dq_rates_by_state[ifo] = self.assign_dq_rates(key)
                     self.dq_bin_by_tid[ifo] = self.assign_template_bins(key)
-                    self.dq_state_segments[ifo] = self.setup_segments(key)
+                    self.check_low_latency(key)
+                    if not self.low_latency:
+                        if self.dq_state_segments is None:
+                            self.dq_state_segments = {}
+                        self.dq_state_segments[ifo] = self.setup_segments(key)
 
         if self.kwargs["chirp_mass"]:
             # Reweight the signal rate by the chirp mass of the template
@@ -1049,19 +1062,16 @@ def assign_dq_rates(self, key):
 
     def setup_segments(self, key):
         """
-        Check if segments definitions are in stat file
-        If they are, we are running offline and need to store them
-        If they aren't, we are running online
+        Store segments from stat file
         """
         ifo = key.split("-")[0]
         with h5py.File(self.files[key], "r") as dq_file:
             ifo_grp = dq_file[ifo]
             dq_state_segs_dict = {}
             for k in ifo_grp["dq_segments"].keys():
                 seg_dict = {}
-                seg_dict["start"] = ifo_grp[f"dq_segments/{k}/segment_starts"][
-                    :
-                ]
+                seg_dict["start"] = \
+                    ifo_grp[f"dq_segments/{k}/segment_starts"][:]
                 seg_dict["end"] = ifo_grp[f"dq_segments/{k}/segment_ends"][:]
                 dq_state_segs_dict[k] = seg_dict
 
@@ -1070,23 +1080,15 @@ def setup_segments(self, key):
     def find_dq_noise_rate(self, trigs, dq_state):
         """Get dq values for a specific ifo and dq states"""
 
-        try:
-            ifo = trigs.ifo
-        except AttributeError:
-            ifo = trigs["ifo"]
-            assert len(numpy.unique(ifo)) == 1
-            # Should be exactly one ifo provided
-            ifo = ifo[0]
-
-        dq_val = numpy.zeros(len(dq_state))
+        dq_val = numpy.ones(len(dq_state))
 
-        if ifo in self.dq_rates_by_state:
+        if self.curr_ifo in self.dq_rates_by_state:
             for i, st in enumerate(dq_state):
                 if isinstance(self.curr_tnum, numpy.ndarray):
-                    bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum[i]]
+                    bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum[i]]
                 else:
-                    bin_name = self.dq_bin_by_tid[ifo][self.curr_tnum]
-                dq_val[i] = self.dq_rates_by_state[ifo][bin_name][st]
+                    bin_name = self.dq_bin_by_tid[self.curr_ifo][self.curr_tnum]
+                dq_val[i] = self.dq_rates_by_state[self.curr_ifo][bin_name][st]
         return dq_val
 
     def find_dq_state_by_time(self, ifo, times):
@@ -1103,6 +1105,53 @@ def find_dq_state_by_time(self, ifo, times):
                 dq_state[inds] = int(k[9:])
         return dq_state
 
+    def check_low_latency(self, key):
+        """
+        Check if the statistic file indicates low latency mode.
+        Parameters
+        ----------
+        key: str
+            Statistic file key string.
+        Returns
+        -------
+        None
+        """
+        ifo = key.split('-')[0]
+        with h5py.File(self.files[key], 'r') as dq_file:
+            ifo_grp = dq_file[ifo]
+            if 'dq_segments' not in ifo_grp.keys():
+                # if segs are not in file, we must be in LL
+                if self.dq_state_segments is not None:
+                    raise ValueError(
+                        'Either all dq stat files must have segments or none'
+                    )
+                self.low_latency = True
+            elif self.low_latency:
+                raise ValueError(
+                    'Either all dq stat files must have segments or none'
+                )
+
+    def reassign_rate(self, ifo):
+        """
+        Reassign the rate to be number per time rather than an arbitrarily
+        normalised number.
+
+        Parameters
+        -----------
+        ifo: str
+            The ifo to consider.
+        """
+        with h5py.File(self.files[f'{ifo}-fit_coeffs'], 'r') as coeff_file:
+            analysis_time = float(coeff_file.attrs['analysis_time'])
+            fbt = 'fit_by_template' in coeff_file
+
+        self.fits_by_tid[ifo]['smoothed_rate_above_thresh'] /= analysis_time
+        self.fits_by_tid[ifo]['smoothed_rate_in_template'] /= analysis_time
+        # The by-template fits may have been stored in the smoothed fits file
+        if fbt:
+            self.fits_by_tid[ifo]['fit_by_rate_above_thresh'] /= analysis_time
+            self.fits_by_tid[ifo]['fit_by_rate_in_template'] /= analysis_time
+
     def assign_fits(self, ifo):
         """
         Extract fits from single-detector rate fit files
@@ -1152,13 +1201,6 @@ def assign_fits(self, ifo):
                 "count_in_template"
             ][:][tid_sort].astype(float)
 
-        if self.kwargs["normalize_fit_rate"]:
-            analysis_time = float(coeff_file.attrs["analysis_time"])
-            fits_by_tid_dict["smoothed_rate_above_thresh"] /= analysis_time
-            fits_by_tid_dict["smoothed_rate_in_template"] /= analysis_time
-            if "fit_by_template" in coeff_file:
-                fits_by_tid_dict["fit_by_rate_above_thresh"] /= analysis_time
-                fits_by_tid_dict["fit_by_rate_in_template"] /= analysis_time
 
         # Keep the fit threshold in fits_by_tid
         fits_by_tid_dict["thresh"] = coeff_file.attrs["stat_threshold"]
@@ -1173,18 +1215,49 @@ def update_file(self, key):
         If others are used (i.e. this statistic is inherited), they will
         need updated separately
         """
+        # First - check if the phasetd file has been updated, this is
+        # inherited from the PhaseTDStatistic
+        if PhaseTDStatistic.update_file(self, key):
+            return True
+
         if key.endswith('-fit_coeffs'):
             # This is a ExpFitStatistic file which needs updating
             # Which ifo is it?
             ifo = key[:2]
             self.fits_by_tid[ifo] = self.assign_fits(ifo)
+            if self.kwargs["normalize_fit_rate"]:
+                self.reassign_rate(ifo)
             self.get_ref_vals(ifo)
             logger.info(
                 "Updating %s statistic %s file",
                 ''.join(self.ifos),
                 key
             )
             return True
+
+        # Is the key a KDE statistic file that we update here?
+        if key.endswith('kde_file'):
+            logger.info(
+                "Updating %s statistic %s file",
+                ''.join(self.ifos),
+                key
+            )
+            kde_style = key.split('-')[0]
+            self.assign_kdes(kde_style)
+            return True
+
+        # We also need to check if the DQ files have updated
+        if key.endswith('dq_stat_info'):
+            ifo = key.split('-')[0]
+            logger.info(
+                "Updating %s statistic %s file",
+                ifo,
+                key
+            )
+            self.dq_rates_by_state[ifo] = self.assign_dq_rates(key)
+            self.dq_bin_by_tid[ifo] = self.assign_template_bins(key)
+            return True
+
         return False
 
     def get_ref_vals(self, ifo):
@@ -1311,38 +1384,16 @@ def lognoiserate(self, trigs):
             + numpy.log(ratei)
         )
 
-        alphabelow = self.kwargs.get("alpha_below_thresh", 6.0)
-        try:
-            alphabelow = float(alphabelow)
+        if not numpy.isinf(self.alphabelow):
             # Above the threshold we use the usual fit coefficient (alphai)
             # below threshold use specified alphabelow
             bt = sngl_stat < thresh
             lognoiselbt = (
-                -alphabelow * (sngl_stat - thresh)
-                + numpy.log(alphabelow)
+                -self.alphabelow * (sngl_stat - thresh)
+                + numpy.log(self.alphabelow)
                 + numpy.log(ratei)
             )
             lognoisel[bt] = lognoiselbt[bt]
-        except ValueError:
-            # The float conversion will raise this if passed e.g. 'None'
-            # In that case we simply use the fit value everywhere
-            pass
-
-        if self.kwargs["dq"]:
-            # Reweight the lognoiserate things by the dq reweighting factor
-            # make sure every trig has a dq state
-            try:
-                ifo = trigs.ifo
-            except AttributeError:
-                ifo = trigs["ifo"]
-                assert len(numpy.unique(ifo)) == 1
-                # Should be exactly one ifo provided
-                ifo = ifo[0]
-
-            dq_state = self.find_dq_state_by_time(ifo, trigs["end_time"][:])
-            dq_rate = self.find_dq_noise_rate(trigs, dq_state)
-            dq_rate = numpy.maximum(dq_rate, 1)
-            lognoisel += numpy.log(dq_rate)
 
         return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32)
 
@@ -1364,11 +1415,20 @@ def single(self, trigs):
             The array of single detector values
         """
 
+        try:
+            self.curr_ifo = trigs.ifo
+        except AttributeError:
+            self.curr_ifo = trigs.get('ifo', None)
+            if self.curr_ifo is None:
+                self.curr_ifo = self.ifos[0]
+            assert self.curr_ifo in self.ifos
+            # Should be exactly one ifo provided
+            assert len(numpy.unique(self.curr_ifo)) == 1
+
         # single-ifo stat = log of noise rate
         sngl_stat = self.lognoiserate(trigs)
         # populate other fields to calculate phase/time/amp consistency
         singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype)
-        singles["snglstat"] = sngl_stat
         singles["coa_phase"] = trigs["coa_phase"][:]
         singles["end_time"] = trigs["end_time"][:]
         singles["snr"] = trigs["snr"][:]
@@ -1397,6 +1457,19 @@ def single(self, trigs):
                 mass2 = trigs['mass2']
             self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2)
 
+        if self.kwargs["dq"]:
+            if self.low_latency:
+                # trigs should already have a dq state assigned
+                singles['dq_state'] = trigs['dq_state'][:]
+            else:
+                singles['dq_state'] = self.find_dq_state_by_time(
+                    self.curr_ifo, trigs['end_time'][:]
+                )
+            dq_rate = self.find_dq_noise_rate(trigs, singles['dq_state'])
+            dq_rate = numpy.maximum(dq_rate, 1)
+            sngl_stat += numpy.log(dq_rate)
+
+        singles["snglstat"] = sngl_stat
         return numpy.array(singles, ndmin=1)
 
     def sensitive_volume_factor(self, sngls):
@@ -1702,6 +1775,11 @@ def __init__(self, sngl_ranking, files=None, ifos=None, **kwargs):
         self.single_increasing = True
         self.single_dtype = numpy.float32
 
+        # Modifier to get a sensible value of the fit slope below threshold
+        self.alphabelow = float(
+            self.kwargs.get("alpha_below_thresh", numpy.inf)
+        )
+
     def single(self, trigs):
         """
         Calculate the necessary single detector information