Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pycbc live] Allowing the use of template fits in the pycbc live ranking statistic #4527

Merged
merged 34 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
379da30
Adding all template_fit changes to this branch
ArthurTolley Oct 4, 2023
84f28a7
Merge branch 'gwastro:master' into template_fits
ArthurTolley Oct 12, 2023
e2b157d
Updating errors to print errors
ArthurTolley Oct 16, 2023
211eb61
removing assertion and comment
ArthurTolley Oct 17, 2023
0a39883
updating comment
ArthurTolley Oct 17, 2023
ffc484c
Adding coinc_threshold to test case
ArthurTolley Oct 19, 2023
4c63d57
Reworking error handling and vectorizing mchirp calc
ArthurTolley Oct 24, 2023
bdb8060
Merge branch 'gwastro:master' into template_fits
ArthurTolley Dec 6, 2023
e6d264f
Merge branch 'gwastro:master' into template_fits
ArthurTolley Dec 13, 2023
47c91bd
adding try except for code also used by coinc_findtrigs
ArthurTolley Dec 15, 2023
d6d020c
forgot the pass in the except
ArthurTolley Dec 15, 2023
c95d19a
Merge branch 'gwastro:master' into template_fits
ArthurTolley Feb 1, 2024
357742b
Add mass1 and mass2 parameters to triggers for test
ArthurTolley Feb 1, 2024
73a73c3
Moving curr_mchirp to correct stat
ArthurTolley Feb 1, 2024
4add7a9
Revert "Moving curr_mchirp to correct stat"
ArthurTolley Feb 1, 2024
6715e2f
adding changes properly
ArthurTolley Feb 1, 2024
120f8d9
if mchirp change
ArthurTolley Feb 2, 2024
ceea096
removing len(i1) check
ArthurTolley Feb 2, 2024
7f6166f
Merge branch 'gwastro:master' into template_fits
ArthurTolley Feb 2, 2024
5136b97
Changing coinc-threshold to coinc-window-pad
ArthurTolley Feb 5, 2024
0e747d6
Checking ifo from class instance not triggers
ArthurTolley Feb 5, 2024
402a449
removing erroneous string
ArthurTolley Feb 5, 2024
631dc22
Merge branch 'gwastro:master' into template_fits
ArthurTolley Feb 8, 2024
21ce202
Merge branch 'gwastro:master' into template_fits
ArthurTolley Feb 11, 2024
84c63b0
only create Detector objects once!
ArthurTolley Mar 1, 2024
d00efc3
Merge branch 'gwastro:master' into template_fits
ArthurTolley Mar 1, 2024
c367a6c
check if dets is given in kwargs, it isn't given in offline
ArthurTolley Mar 8, 2024
befd1c4
Merge branch 'gwastro:master' into template_fits
ArthurTolley Mar 10, 2024
fa5abe5
missed a dets reference
ArthurTolley Mar 10, 2024
861ce2b
removing whitespace
ArthurTolley Mar 11, 2024
640a68b
codeclimate, line length fix
ArthurTolley Mar 11, 2024
173835d
Apply suggestions from code review
GarethCabournDavies Apr 5, 2024
7a0a0b0
Apply GCD suggestions
GarethCabournDavies Apr 5, 2024
7169170
specify an older branch of BBHx in tox.ini (#4672)
WuShichao Mar 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/pycbc_live
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,10 @@ parser.add_argument('--gracedb-labels', metavar='LABEL', nargs='+',
parser.add_argument('--ifar-upload-threshold', type=float, required=True,
help='Inverse-FAR threshold for uploading candidate '
'triggers to GraceDB, in years.')
parser.add_argument('--coinc-window-pad', type=float,
help="Amount of time allowed to form a coincidence in "
"addition to the time of flight in seconds.",
default=0.002)
ArthurTolley marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument('--file-prefix', default='Live')

parser.add_argument('--round-start-time', type=int, metavar='X',
Expand Down
26 changes: 21 additions & 5 deletions pycbc/events/coinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import numpy, logging, pycbc.pnutils, pycbc.conversions, copy, lal
from pycbc.detector import Detector, ppdets
from pycbc.conversions import mchirp_from_mass1_mass2
from .eventmgr_cython import coincbuffer_expireelements
from .eventmgr_cython import coincbuffer_numgreater
from .eventmgr_cython import timecoincidence_constructidxs
Expand Down Expand Up @@ -822,7 +823,7 @@ def __init__(self, num_templates, analysis_block, background_statistic,
sngl_ranking, stat_files, ifos,
ifar_limit=100,
timeslide_interval=.035,
coinc_threshold=.002,
coinc_window_pad=.002,
return_background=False,
**kwargs):
"""
Expand All @@ -847,7 +848,7 @@ def __init__(self, num_templates, analysis_block, background_statistic,
calculate.
timeslide_interval: float
The time in seconds between consecutive timeslide offsets.
coinc_threshold: float
coinc_window_pad: float
Amount of time allowed to form a coincidence in addition to the
time of flight in seconds.
return_background: boolean
Expand All @@ -871,6 +872,7 @@ def __init__(self, num_templates, analysis_block, background_statistic,

self.timeslide_interval = timeslide_interval
self.return_background = return_background
self.coinc_window_pad = coinc_window_pad

self.ifos = ifos
if len(self.ifos) != 2:
Expand All @@ -879,8 +881,10 @@ def __init__(self, num_templates, analysis_block, background_statistic,
self.lookback_time = (ifar_limit * lal.YRJUL_SI * timeslide_interval) ** 0.5
self.buffer_size = int(numpy.ceil(self.lookback_time / analysis_block))

det0, det1 = Detector(ifos[0]), Detector(ifos[1])
self.time_window = det0.light_travel_time_to_detector(det1) + coinc_threshold
self.dets = {ifo: Detector(ifo) for ifo in ifos}

self.time_window = self.dets[ifos[0]].light_travel_time_to_detector(
self.dets[ifos[1]]) + coinc_window_pad
self.coincs = CoincExpireBuffer(self.buffer_size, self.ifos)

self.singles = {}
Expand Down Expand Up @@ -965,6 +969,7 @@ def from_cli(cls, args, num_templates, analysis_chunk, ifos):
ifar_limit=args.background_ifar_limit,
timeslide_interval=args.timeslide_interval,
ifos=ifos,
coinc_window_pad=args.coinc_window_pad,
**kwargs)

@staticmethod
Expand Down Expand Up @@ -1099,9 +1104,11 @@ def _add_singles_to_buffer(self, results, ifos):

if len(trigs['snr'] > 0):
trigsc = copy.copy(trigs)
trigsc['ifo'] = ifo
trigsc['chisq'] = trigs['chisq'] * trigs['chisq_dof']
trigsc['chisq_dof'] = (trigs['chisq_dof'] + 2) / 2
single_stat = self.stat_calculator.single(trigsc)
del trigsc['ifo']
else:
single_stat = numpy.array([], ndmin=1,
dtype=self.stat_calculator.single_dtype)
Expand Down Expand Up @@ -1167,11 +1174,16 @@ def _find_coincs(self, results, valid_ifos):
continue
# Find newly added triggers in fixed_ifo
trigs = results[fixed_ifo]
# Calculate mchirp as a vectorized operation
mchirps = mchirp_from_mass1_mass2(
trigs['mass1'], trigs['mass2']
)
# Loop over them one trigger at a time
for i in range(len(trigs['end_time'])):
trig_stat = trigs['stat'][i]
trig_time = trigs['end_time'][i]
template = trigs['template_id'][i]
mchirp = mchirps[i]

# Get current shift_ifo triggers in the same template
times = self.singles[shift_ifo].data(template)['end_time']
Expand Down Expand Up @@ -1208,11 +1220,15 @@ def _find_coincs(self, results, valid_ifos):
# ranking statistic values.
sngls_list = [[fixed_ifo, self.trig_stat_memory[:len(i1)]],
[shift_ifo, stats[i1]]]

c = self.stat_calculator.rank_stat_coinc(
sngls_list,
slide,
self.timeslide_interval,
shift_vec
shift_vec,
time_addition=self.coinc_window_pad,
mchirp=mchirp,
dets=self.dets
)

# Store data about new triggers: slide index, stat value and
Expand Down
19 changes: 13 additions & 6 deletions pycbc/events/coinc_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def combination_noise_rate(rates, slop):
return numpy.exp(combination_noise_lograte(log_rates, slop))


def combination_noise_lograte(log_rates, slop):
def combination_noise_lograte(log_rates, slop, dets=None):
"""
Calculate the expected rate of noise coincidences for a combination of
detectors given log of single detector noise rates
Expand All @@ -105,20 +105,25 @@ def combination_noise_lograte(log_rates, slop):
slop: float
time added to maximum time-of-flight between detectors to account
for timing error
dets: dict
Key: ifo string, Value: pycbc.detector.Detector object
If not provided, will be created from ifo strings

Returns
-------
numpy array
Expected log coincidence rate in the combination, units Hz
"""
# multiply product of trigger rates by the overlap time
allowed_area = multiifo_noise_coincident_area(list(log_rates), slop)
allowed_area = multiifo_noise_coincident_area(list(log_rates),
slop,
dets=dets)
# list(dict.values()) is python-3-proof
rateprod = numpy.sum(list(log_rates.values()), axis=0)
return numpy.log(allowed_area) + rateprod


def multiifo_noise_coincident_area(ifos, slop):
def multiifo_noise_coincident_area(ifos, slop, dets=None):
"""
Calculate the total extent of time offset between 2 detectors,
or area of the 2d space of time offsets for 3 detectors, for
Expand All @@ -131,16 +136,18 @@ def multiifo_noise_coincident_area(ifos, slop):
list of interferometers
slop: float
extra time to add to maximum time-of-flight for timing error
dets: dict
Key: ifo string, Value: pycbc.detector.Detector object
If not provided, will be created from ifo strings

Returns
-------
allowed_area: float
area in units of seconds^(n_ifos-1) that coincident values can fall in
"""
# set up detector objects
dets = {}
for ifo in ifos:
dets[ifo] = pycbc.detector.Detector(ifo)
if dets is None:
dets = {ifo: pycbc.detector.Detector(ifo) for ifo in ifos}
n_ifos = len(ifos)

if n_ifos == 2:
Expand Down
116 changes: 84 additions & 32 deletions pycbc/events/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,13 +845,20 @@ def find_fits(self, trigs):
The thresh fit value(s)
"""
try:
tnum = trigs.template_num # exists if accessed via coinc_findtrigs
# Exists where trigs is a class with the template num attribute
tnum = trigs.template_num
except AttributeError:
# Exists where trigs is dict-like
tnum = trigs['template_id']

try:
ifo = trigs.ifo
except AttributeError:
tnum = trigs['template_id'] # exists for SingleDetTriggers
assert len(self.ifos) == 1
# Should be exactly one ifo provided
ifo = self.ifos[0]
ifo = trigs.get('ifo', None)
if ifo is None:
ifo = self.ifos[0]
assert ifo in self.ifos
Comment on lines +857 to +860
Copy link
Contributor

@titodalcanton titodalcanton Apr 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot judge if this is correct, I do not understand what this bit is doing just by looking at it (read: this bit needs comments). Seems strange that the assertion is inside the except block though, is that intentional?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I share Tito's concerns.

I can't figure out what setup would ever cause the if ifo is None check to be True, and whether taking ifo = self.ifos[0] makes sense in this context. I think comments should explain what situation each part of this is meant to handle (i.e. online vs offline, singles vs coincs, etc.).

I think the if statement and assert can move outside the except block without any change of functionality.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I echo that this needs comments.

If trigs is dict-like you'll get to the except, then if ifo is not a key in the trigs object, then it could be met.

I am fairly sure that self.ifos[0] is not the right thing to get here though, as that will always get the same ifo, even when you're using the triggers from a different detector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GarethCabournDavies when do we expect trigs to be dict-like vs not? I've lost track

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dict-like will be when it is called using:

  • a dictionary keyed on the dataset names like pycbc_fit_sngls_by_template
  • a SingleDetTriggers object, as in pycbc_page_snglinfo and pycbc_sngl_minifollowup
  • the h5py file directly as in pycbc_fit_sngls_binned (it shouldn't really do this, and should use SingleDetTriggers)

Not dictlike will be when using ReadByTemplate, which is done in pycbc_coinc_findtrigs and pycbc_sngls_findtrigs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was hoping there was some clean division like always dict-like when in PyCBC live and always not in offline, but I guess thats not the case. Might be nice to make that more consistent in the future, but not a task for this MR. From what I can see, I do think its the case that Live always uses the dict-like path?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is only used in live in the single_stat = self.stat_calculator.single(trigsc) line above, and trigsc is dictlike, so it seems this is safe


# fits_by_tid is a dictionary of dictionaries of arrays
# indexed by ifo / coefficient name / template_id
alphai = self.fits_by_tid[ifo]['smoothed_fit_coeff'][tnum]
Expand Down Expand Up @@ -1481,10 +1488,9 @@ def single(self, trigs):
# exists if accessed via coinc_findtrigs
self.curr_tnum = trigs.template_num
except AttributeError:
# exists for SingleDetTriggers
# exists for SingleDetTriggers & pycbc_live get_coinc
self.curr_tnum = trigs['template_id']
# Should only be one ifo fit file provided
assert len(self.ifos) == 1
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved

# Store benchmark log volume as single-ifo information since the coinc
# method does not have access to template id
singles['benchmark_logvol'] = self.benchmark_logvol[self.curr_tnum]
Expand Down Expand Up @@ -1542,8 +1548,23 @@ def rank_stat_coinc(self, s, slide, step, to_shift,
"""

sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s}
ln_noise_rate = coinc_rate.combination_noise_lograte(
sngl_rates, kwargs['time_addition'])
# Find total volume of phase-time-amplitude space occupied by
# noise coincs
if 'dets' in kwargs:
ln_noise_rate = coinc_rate.combination_noise_lograte(
sngl_rates, kwargs['time_addition'],
kwargs['dets'])

# Extent of time-difference space occupied
noise_twindow = coinc_rate.multiifo_noise_coincident_area(
self.hist_ifos, kwargs['time_addition'],
kwargs['dets'])
else:
ln_noise_rate = coinc_rate.combination_noise_lograte(
sngl_rates, kwargs['time_addition'])
noise_twindow = coinc_rate.multiifo_noise_coincident_area(
self.hist_ifos, kwargs['time_addition'])

ln_noise_rate -= self.benchmark_lograte

# Network sensitivity for a given coinc type is approximately
Expand All @@ -1565,11 +1586,6 @@ def rank_stat_coinc(self, s, slide, step, to_shift,
stat = {ifo: st for ifo, st in s}
logr_s = self.logsignalrate(stat, slide * step, to_shift)

# Find total volume of phase-time-amplitude space occupied by noise
# coincs
# Extent of time-difference space occupied
noise_twindow = coinc_rate.multiifo_noise_coincident_area(
self.hist_ifos, kwargs['time_addition'])
# Volume is the allowed time difference window, multiplied by 2pi for
# each phase difference dimension and by allowed range of SNR ratio
# for each SNR ratio dimension : there are (n_ifos - 1) dimensions
Expand Down Expand Up @@ -1741,6 +1757,39 @@ def logsignalrate(self, stats, shift, to_shift):
logr_s += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.))
return logr_s

def single(self, trigs):
"""
Calculate the necessary single detector information

In this case the ranking rescaled (see the lognoiserate method here)
with the phase, end time, sigma, SNR, template_id and the
benchmark_logvol values added in. This also stored the current chirp
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved
mass for use when computing the coinc statistic values.

Parameters
----------
trigs: dict of numpy.ndarrays, h5py group or similar dict-like object
Object holding single detector trigger information.

Returns
-------
numpy.ndarray
The array of single detector values
"""
from pycbc.conversions import mchirp_from_mass1_mass2
try:
mass1 = trigs.param['mass1']
mass2 = trigs.param['mass2']
except AttributeError:
mass1 = trigs['mass1']
mass2 = trigs['mass2']
self.curr_mchirp = mchirp_from_mass1_mass2(mass1, mass2)

if self.mcm is not None:
# Careful - input might be a str, so cast to float
self.curr_mchirp = min(self.curr_mchirp, float(self.mcm))
ArthurTolley marked this conversation as resolved.
Show resolved Hide resolved
GarethCabournDavies marked this conversation as resolved.
Show resolved Hide resolved
return ExpFitFgBgNormStatistic.single(self, trigs)

def rank_stat_single(self, single_info,
**kwargs): # pylint:disable=unused-argument
"""
Expand All @@ -1767,32 +1816,35 @@ def rank_stat_single(self, single_info,
rank_sngl += numpy.log((self.curr_mchirp / 20.) ** (11. / 3.))
return rank_sngl

def single(self, trigs):
def rank_stat_coinc(self, sngls_list, slide, step, to_shift, **kwargs):
"""
Calculate the necessary single detector information

In this case the ranking rescaled (see the lognoiserate method here)
with the phase, end time, sigma, SNR, template_id and the
benchmark_logvol values added in. This also stored the current chirp
mass for use when computing the coinc statistic values.
Calculate the coincident detection statistic.

Parameters
----------
trigs: dict of numpy.ndarrays, h5py group or similar dict-like object
Object holding single detector trigger information.
sngls_list: list
List of (ifo, single detector statistic) tuples
slide: (unused in this statistic)
step: (unused in this statistic)
to_shift: list
List of integers indicating what multiples of the time shift will
be applied (unused in this statistic)

Returns
-------
numpy.ndarray
The array of single detector values
Array of coincident ranking statistic values
"""
from pycbc.conversions import mchirp_from_mass1_mass2
self.curr_mchirp = mchirp_from_mass1_mass2(trigs.param['mass1'],
trigs.param['mass2'])
if self.mcm is not None:
# Careful - input might be a str, so cast to float
self.curr_mchirp = min(self.curr_mchirp, float(self.mcm))
return ExpFitFgBgNormStatistic.single(self, trigs)

if 'mchirp' in kwargs:
self.curr_mchirp = kwargs['mchirp']

return ExpFitFgBgNormStatistic.rank_stat_coinc(self,
sngls_list,
slide,
step,
to_shift,
**kwargs)

def coinc_lim_for_thresh(self, s, thresh, limifo,
**kwargs): # pylint:disable=unused-argument
Expand Down
7 changes: 5 additions & 2 deletions test/test_live_coinc_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def get_trigs(self):
0,
self.num_templates,
size=self.num_trigs
).astype(np.int32)
).astype(np.int32),
"mass1": np.random.uniform(2.0, 100.0, size=self.num_trigs).astype(np.float32),
"mass2": np.random.uniform(2.0, 100.0, size=self.num_trigs).astype(np.float32)
titodalcanton marked this conversation as resolved.
Show resolved Hide resolved
}
self.start_time += self.analysis_chunk
return trigs
Expand All @@ -73,7 +75,8 @@ def setUp(self, *args):
statistic_keywords=None,
timeslide_interval=0.1,
background_ifar_limit=100,
store_background=True
store_background=True,
coinc_window_pad=0.002
)

# number of templates in the bank
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ deps =
{[base]deps}
pytest
; Needed for `BBHx` package to work with PyCBC
git+https://github.com/mikekatz04/BBHx.git; sys_platform == 'linux'
git+https://github.com/mikekatz04/BBHx.git@4fff509; sys_platform == 'linux'
git+https://github.com/gwastro/BBHX-waveform-model.git; sys_platform == 'linux'
conda_deps=
mysqlclient
Expand Down Expand Up @@ -66,7 +66,7 @@ commands = bash tools/pycbc_test_suite.sh
deps =
{[base]deps}
; Needed for `BBHx` package to work with PyCBC
git+https://github.com/mikekatz04/BBHx.git; sys_platform == 'linux'
git+https://github.com/mikekatz04/BBHx.git@4fff509; sys_platform == 'linux'
git+https://github.com/gwastro/BBHX-waveform-model.git; sys_platform == 'linux'
conda_deps=
mysqlclient
Expand Down
Loading