gwastro · titodalcanton · Sep 28, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/bin/live/pycbc_live_combine_single_fits b/bin/live/pycbc_live_combine_single_fits
@@ -15,8 +15,10 @@
 """Combine PyCBC Live single-detector trigger fitting parameters from several
 different files."""
 
-import h5py, numpy as np, argparse
+import argparse
 import logging
+import numpy as np
+import h5py
 import pycbc
 
 
@@ -45,66 +47,80 @@ if args.conservative_percentile < 50 or \
                  "otherwise it is either not a percentile, or not "
                  "conservative.")
 
-counts_all = {ifo: [] for ifo in args.ifos}
-alphas_all = {ifo: [] for ifo in args.ifos}
-analysis_dates = []
+logging.info("%d input files", len(args.trfits_files))
+
+# We only want to combine fit results if they were done with the same
+# configuration.  So start by finding the most recent fit file and reading its
+# configuration parameters.
 
-with h5py.File(args.trfits_files[0], 'r') as fit_f0:
-    # Store some attributes so we can check that all files are
-    # comparable
+logging.info("Determining the most recent configuration parameters")
 
-    # Keep the upper and lower bins
-    bl = fit_f0['bins_lower'][:]
-    bu = fit_f0['bins_upper'][:]
+latest_date = None
+for f in args.trfits_files:
+    with h5py.File(f, 'r') as fit_f:
+        if latest_date is None or fit_f.attrs['analysis_date'] > latest_date:
+            latest_date = fit_f.attrs['analysis_date']
+            bl = fit_f['bins_lower'][:]
+            bu = fit_f['bins_upper'][:]
+            sngl_rank = fit_f.attrs['sngl_ranking']
+            fit_thresh = fit_f.attrs['fit_threshold']
+            fit_func = fit_f.attrs['fit_function']
 
-    sngl_rank = fit_f0.attrs['sngl_ranking']
-    fit_thresh = fit_f0.attrs['fit_threshold']
-    fit_func = fit_f0.attrs['fit_function']
+# Now go back through the fit files and read the actual information.  Skip the
+# files that have fit parameters inconsistent with what we found earlier.
 
-live_times = {ifo: [] for ifo in args.ifos}
+logging.info("Reading individual fit results")
 
+live_times = {ifo: [] for ifo in args.ifos}
 trigger_file_starts = []
 trigger_file_ends = []
-
-n_files = len(args.trfits_files)
-logging.info("Checking through %d files", n_files)
+counts_all = {ifo: [] for ifo in args.ifos}
+alphas_all = {ifo: [] for ifo in args.ifos}
 
 for f in args.trfits_files:
-    fits_f = h5py.File(f, 'r')
-    # Check that the file uses the same setup as file 0, to make sure
-    # all coefficients are comparable
-
-    assert fits_f.attrs['sngl_ranking'] == sngl_rank
-    assert fits_f.attrs['fit_threshold'] == fit_thresh
-    assert fits_f.attrs['fit_function'] == fit_func
-    assert all(fits_f['bins_lower'][:] == bl)
-    assert all(fits_f['bins_upper'][:] == bu)
-
-    # Get the time of the first/last triggers in the trigger_fits file
-    gps_last = 0
-    gps_first = np.inf
-    for ifo in args.ifos:
-        if ifo not in fits_f:
+    with h5py.File(f, 'r') as fits_f:
+        # Check that the file uses the same setup as file 0, to make sure
+        # all coefficients are comparable
+        same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank
+                     and fits_f.attrs['fit_threshold'] == fit_thresh
+                     and fits_f.attrs['fit_function'] == fit_func
+                     and all(fits_f['bins_lower'][:] == bl)
+                     and all(fits_f['bins_upper'][:] == bu))
+        if not same_conf:
+            logging.warn(
+                "Found a change in the fit configuration, skipping %s",
+                f
+            )
             continue
-        trig_times = fits_f[ifo]['triggers']['end_time'][:]
-        gps_last = max(gps_last, trig_times.max())
-        gps_first = min(gps_first, trig_times.min())
-    trigger_file_starts.append(gps_first)
-    trigger_file_ends.append(gps_last)
-
-    for ifo in args.ifos:
-        if ifo not in fits_f:
-            live_times[ifo].append(0)
-            counts_all[ifo].append(-1 * np.ones_like(bl))
-            alphas_all[ifo].append(-1 * np.ones_like(bl))
-        else:
-            live_times[ifo].append(fits_f[ifo].attrs['live_time'])
-            counts_all[ifo].append(fits_f[ifo + '/counts'][:])
-            alphas_all[ifo].append(fits_f[ifo + '/fit_coeff'][:])
-            if any(np.isnan(fits_f[ifo + '/fit_coeff'][:])):
-                logging.info("nan in %s, %s", f, ifo)
-                logging.info(fits_f[ifo + '/fit_coeff'][:])
-    fits_f.close()
+
+        # We now determine the (approximate) start/end times of the
+        # trigger_fits file via the time of the first/last triggers in it.
+        # Ideally this would be recorded exactly in the file.
+        gps_last = 0
+        gps_first = np.inf
+        for ifo in args.ifos:
+            if ifo not in fits_f:
+                continue
+            trig_times = fits_f[ifo]['triggers']['end_time'][:]
+            gps_last = max(gps_last, trig_times.max())
+            gps_first = min(gps_first, trig_times.min())
+        trigger_file_starts.append(gps_first)
+        trigger_file_ends.append(gps_last)
+
+        # Read the fitting parameters
+        for ifo in args.ifos:
+            if ifo not in fits_f:
+                live_times[ifo].append(0)
+                counts_all[ifo].append(-1 * np.ones_like(bl))
+                alphas_all[ifo].append(-1 * np.ones_like(bl))
+            else:
+                ffi = fits_f[ifo]
+                live_times[ifo].append(ffi.attrs['live_time'])
+                counts_all[ifo].append(ffi['counts'][:])
+                alphas_all[ifo].append(ffi['fit_coeff'][:])
+                if any(np.isnan(ffi['fit_coeff'][:])):
+                    logging.warn("nan in %s, %s", f, ifo)
+                    logging.warn(ffi['fit_coeff'][:])
 
 # Set up the date array, this is stored as an offset from the first trigger time of
 # the first file to the last trigger of the file
@@ -115,7 +131,7 @@ ad_order = np.argsort(trigger_file_starts)
 start_time_n = trigger_file_starts[ad_order[0]]
 ad = trigger_file_ends[ad_order] - start_time_n
 
-# Get the counts and alphas
+# Swap the time and bin dimensions for counts and alphas
 counts_bin = {ifo: [c for c in zip(*counts_all[ifo])] for ifo in args.ifos}
 alphas_bin = {ifo: [a for a in zip(*alphas_all[ifo])] for ifo in args.ifos}
 
@@ -125,6 +141,7 @@ cons_alphas_out = {ifo: np.zeros(len(alphas_bin[ifo])) for ifo in args.ifos}
 cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.ifos}
 
 logging.info("Writing results")
+
 fout = h5py.File(args.output, 'w')
 fout.attrs['fit_threshold'] = fit_thresh
 fout.attrs['conservative_percentile'] = args.conservative_percentile