Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to single-detector trigger fitting code for PyCBC Live #4486

Merged
merged 10 commits into from
Sep 28, 2023
121 changes: 69 additions & 52 deletions bin/live/pycbc_live_combine_single_fits
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
"""Combine PyCBC Live single-detector trigger fitting parameters from several
different files."""

import h5py, numpy as np, argparse
import argparse
import logging
import numpy as np
import h5py
import pycbc


Expand Down Expand Up @@ -45,66 +47,80 @@ if args.conservative_percentile < 50 or \
"otherwise it is either not a percentile, or not "
"conservative.")

counts_all = {ifo: [] for ifo in args.ifos}
alphas_all = {ifo: [] for ifo in args.ifos}
analysis_dates = []
logging.info("%d input files", len(args.trfits_files))

# We only want to combine fit results if they were done with the same
# configuration. So start by finding the most recent fit file and reading its
# configuration parameters.

with h5py.File(args.trfits_files[0], 'r') as fit_f0:
# Store some attributes so we can check that all files are
# comparable
logging.info("Determining the most recent configuration parameters")

# Keep the upper and lower bins
bl = fit_f0['bins_lower'][:]
bu = fit_f0['bins_upper'][:]
latest_date = None
for f in args.trfits_files:
with h5py.File(f, 'r') as fit_f:
if latest_date is None or fit_f.attrs['analysis_date'] > latest_date:
latest_date = fit_f.attrs['analysis_date']
bl = fit_f['bins_lower'][:]
bu = fit_f['bins_upper'][:]
sngl_rank = fit_f.attrs['sngl_ranking']
fit_thresh = fit_f.attrs['fit_threshold']
fit_func = fit_f.attrs['fit_function']

sngl_rank = fit_f0.attrs['sngl_ranking']
fit_thresh = fit_f0.attrs['fit_threshold']
fit_func = fit_f0.attrs['fit_function']
# Now go back through the fit files and read the actual information. Skip the
# files that have fit parameters inconsistent with what we found earlier.

live_times = {ifo: [] for ifo in args.ifos}
logging.info("Reading individual fit results")

live_times = {ifo: [] for ifo in args.ifos}
trigger_file_starts = []
trigger_file_ends = []

n_files = len(args.trfits_files)
logging.info("Checking through %d files", n_files)
counts_all = {ifo: [] for ifo in args.ifos}
alphas_all = {ifo: [] for ifo in args.ifos}

for f in args.trfits_files:
fits_f = h5py.File(f, 'r')
# Check that the file uses the same setup as file 0, to make sure
# all coefficients are comparable

assert fits_f.attrs['sngl_ranking'] == sngl_rank
assert fits_f.attrs['fit_threshold'] == fit_thresh
assert fits_f.attrs['fit_function'] == fit_func
assert all(fits_f['bins_lower'][:] == bl)
assert all(fits_f['bins_upper'][:] == bu)

# Get the time of the first/last triggers in the trigger_fits file
gps_last = 0
gps_first = np.inf
for ifo in args.ifos:
if ifo not in fits_f:
with h5py.File(f, 'r') as fits_f:
# Check that the file uses the same setup as file 0, to make sure
# all coefficients are comparable
same_conf = (fits_f.attrs['sngl_ranking'] == sngl_rank
and fits_f.attrs['fit_threshold'] == fit_thresh
and fits_f.attrs['fit_function'] == fit_func
and all(fits_f['bins_lower'][:] == bl)
and all(fits_f['bins_upper'][:] == bu))
if not same_conf:
logging.warn(
"Found a change in the fit configuration, skipping %s",
f
)
continue
trig_times = fits_f[ifo]['triggers']['end_time'][:]
gps_last = max(gps_last, trig_times.max())
gps_first = min(gps_first, trig_times.min())
trigger_file_starts.append(gps_first)
trigger_file_ends.append(gps_last)

for ifo in args.ifos:
if ifo not in fits_f:
live_times[ifo].append(0)
counts_all[ifo].append(-1 * np.ones_like(bl))
alphas_all[ifo].append(-1 * np.ones_like(bl))
else:
live_times[ifo].append(fits_f[ifo].attrs['live_time'])
counts_all[ifo].append(fits_f[ifo + '/counts'][:])
alphas_all[ifo].append(fits_f[ifo + '/fit_coeff'][:])
if any(np.isnan(fits_f[ifo + '/fit_coeff'][:])):
logging.info("nan in %s, %s", f, ifo)
logging.info(fits_f[ifo + '/fit_coeff'][:])
fits_f.close()

# We now determine the (approximate) start/end times of the
# trigger_fits file via the time of the first/last triggers in it.
# Ideally this would be recorded exactly in the file.
gps_last = 0
gps_first = np.inf
for ifo in args.ifos:
if ifo not in fits_f:
continue
trig_times = fits_f[ifo]['triggers']['end_time'][:]
gps_last = max(gps_last, trig_times.max())
gps_first = min(gps_first, trig_times.min())
trigger_file_starts.append(gps_first)
trigger_file_ends.append(gps_last)

# Read the fitting parameters
for ifo in args.ifos:
if ifo not in fits_f:
live_times[ifo].append(0)
counts_all[ifo].append(-1 * np.ones_like(bl))
alphas_all[ifo].append(-1 * np.ones_like(bl))
else:
ffi = fits_f[ifo]
live_times[ifo].append(ffi.attrs['live_time'])
counts_all[ifo].append(ffi['counts'][:])
alphas_all[ifo].append(ffi['fit_coeff'][:])
if any(np.isnan(ffi['fit_coeff'][:])):
logging.warn("nan in %s, %s", f, ifo)
logging.warn(ffi['fit_coeff'][:])

# Set up the date array, this is stored as an offset from the first trigger time of
# the first file to the last trigger of the file
Expand All @@ -115,7 +131,7 @@ ad_order = np.argsort(trigger_file_starts)
start_time_n = trigger_file_starts[ad_order[0]]
ad = trigger_file_ends[ad_order] - start_time_n

# Get the counts and alphas
# Swap the time and bin dimensions for counts and alphas
counts_bin = {ifo: [c for c in zip(*counts_all[ifo])] for ifo in args.ifos}
alphas_bin = {ifo: [a for a in zip(*alphas_all[ifo])] for ifo in args.ifos}

Expand All @@ -125,6 +141,7 @@ cons_alphas_out = {ifo: np.zeros(len(alphas_bin[ifo])) for ifo in args.ifos}
cons_counts_out = {ifo: np.inf * np.ones(len(alphas_bin[ifo])) for ifo in args.ifos}

logging.info("Writing results")

fout = h5py.File(args.output, 'w')
fout.attrs['fit_threshold'] = fit_thresh
fout.attrs['conservative_percentile'] = args.conservative_percentile
Expand Down
Loading