From 13c6b084c8d9d387f140fd17e8e2fea77a60c990 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Mon, 11 Nov 2024 12:02:53 -0800 Subject: [PATCH 01/10] Passing segments files and vdf to all results scripts from pycbc_pygrb_results_workflow --- bin/pygrb/pycbc_pygrb_results_workflow | 43 +++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/bin/pygrb/pycbc_pygrb_results_workflow b/bin/pygrb/pycbc_pygrb_results_workflow index a29b4392e61..8db9df5f891 100644 --- a/bin/pygrb/pycbc_pygrb_results_workflow +++ b/bin/pygrb/pycbc_pygrb_results_workflow @@ -78,7 +78,7 @@ def labels_in_files_metadata(labels, rundir, file_paths): # Function to retrieve the segments plot (produced in the preprocessing stage) # that ensures its copy is saved in the output directory before returning to # the original working directory. Appropriate meta data is added to the plot. -def display_seg_plot(output_dir, segments_dir): +def display_seg_plot(output_dir, segment_dir): """Return the File of the segments plot (which was already produced during the pre-processing) after adding the appropriate metadata to it. """ @@ -90,7 +90,7 @@ def display_seg_plot(output_dir, segments_dir): 'GRB' + wflow.cp.get('workflow', 'trigger-name') + '_segments.png' ) segments_plot = _workflow.resolve_url_to_file( - os.path.join(args.segment_dir, segments_plot_name) + os.path.join(segment_dir, segments_plot_name) ) segments_plot_path = segments_plot.storage_path im = Image.open(segments_plot_path) @@ -134,7 +134,6 @@ parser.add_argument( "-i", "--inj-files", action="store", - default=None, nargs="+", help="Location(s) of input injection results file(s)", ) @@ -142,15 +141,18 @@ parser.add_argument( "-b", "--bank-file", action="store", - default=None, help="The location of the full template bank file", ) parser.add_argument( "--segment-dir", action="store", - default=None, help="The location of the segment files", ) +parser.add_argument( + "--veto-file", + help="File containing segments used to veto injections", +) + _workflow.add_workflow_command_line_group(parser) _workflow.add_workflow_settings_cli(parser, include_subdax_opts=True) args = parser.parse_args() @@ -218,6 +220,12 @@ if not set(inj_sets).issubset(eff_secs): inj_sets = [i.upper() for i in inj_sets] inj_files = labels_in_files_metadata(inj_sets, start_rundir, args.inj_files) +# File instance of the veto file +veto_file = args.veto_file +if veto_file: + veto_file = os.path.join(start_rundir, args.veto_file) + veto_file = _workflow.resolve_url_to_file(veto_file) + # IFOs actually used: determined by data availability ifos = extract_ifos(offsource_file.storage_path) wflow.ifos = ifos @@ -226,8 +234,6 @@ plotting_nodes = [] html_nodes = [] # Convert the segments files to a FileList -seg_filenames = ['bufferSeg.txt', 'offSourceSeg.txt', 'onSourceSeg.txt'] -seg_files = [os.path.join(args.segment_dir, f) for f in seg_filenames] seg_files = _workflow.build_segment_filelist(args.segment_dir) # Logfile of this workflow @@ -320,6 +326,8 @@ for snr_type in timeseries: trig_file=offsource_file, inj_file=inj_file, ifo=ifo, + seg_files=seg_files, + veto_file=veto_file, tags=tags, ) plotting_nodes.append(plot_node) @@ -359,6 +367,8 @@ for veto in vetoes: trig_file=offsource_file, inj_file=inj_file, ifo=ifo_arg, + seg_files=seg_files, + veto_file=veto_file, tags=tags, ) plotting_nodes.append(plot_node) @@ -392,6 +402,8 @@ if wflow.cp.has_section('pygrb_plot_coh_ifosnr'): trig_file=offsource_file, inj_file=inj_file, ifo=ifo, + seg_files=seg_files, + veto_file=veto_file, tags=tags, ) plotting_nodes.append(plot_node) @@ -436,6 +448,8 @@ for nstat in nstats: out_dir, trig_file=offsource_file, inj_file=inj_file, + seg_files=seg_files, + veto_file=veto_file, tags=tags, ) plotting_nodes.append(plot_node) @@ -483,6 +497,8 @@ for inj_set in inj_sets: trig_file=offsource_file, ifo=ifo, inj_file=inj_file, + seg_files=seg_files, + veto_file=veto_file, tags=tags, ) plotting_nodes.append(plot_node) @@ -498,6 +514,7 @@ for inj_set in inj_sets: offsource_file, seg_files, inj_file=inj_file, + veto_file=veto_file, tags=inj_file.tags, ) html_nodes.append(html_node) @@ -520,6 +537,8 @@ for inj_set in inj_sets: offsource_file, 'daxes', out_dir, + seg_files=seg_files, + veto_file=veto_file, tags=inj_file.tags + ['loudest_quiet_found_injs'], ) logging.info('Leaving minifollowups') @@ -540,6 +559,7 @@ for stat in stats: trig_file=offsource_file, inj_file=inj_file, seg_files=seg_files, + veto_file=veto_file, tags=[stat], ) plotting_nodes.append(plot_node) @@ -570,6 +590,8 @@ else: offsource_file, 'daxes', out_dir, + seg_files=seg_files, + veto_file=veto_file, tags=['loudest_offsource_events'], ) logging.info('Leaving minifollowups') @@ -603,6 +625,7 @@ for inj_set in inj_sets: inj_file=inj_file, bank_file=bank_file, seg_files=seg_files, + veto_file=veto_file, tags=tags, plot_bkgd=True, ) @@ -631,6 +654,7 @@ for i, offtrial in enumerate(offtrials): inj_file=inj_file, bank_file=bank_file, seg_files=seg_files, + veto_file=veto_file, tags=tags, plot_bkgd=False, ) @@ -741,6 +765,8 @@ for snr_type in ['reweighted', 'coherent']: 'pygrb_plot_snr_timeseries', out_dir, trig_file=all_times_file, + seg_files=seg_files, + veto_file=veto_file, tags=[snr_type, 'alltimes'], ) plotting_nodes.append(plot_node) @@ -763,6 +789,8 @@ else: onsource_file, 'daxes', out_dir, + seg_files=seg_files, + veto_file=veto_file, tags=['loudest_onsource_event'], ) logging.info("Leaving onsource minifollowups") @@ -785,6 +813,7 @@ for inj_set in inj_sets: inj_file=inj_file, bank_file=bank_file, seg_files=seg_files, + veto_file=veto_file, tags=tags, plot_bkgd=False, ) From fc7c0d8e120bb6b4c2437d6187181f3c7165b56d Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Mon, 11 Nov 2024 12:49:09 -0800 Subject: [PATCH 02/10] Generalised functions involved in loading trigger/injeciton data in pygrb --- pycbc/results/pygrb_postprocessing_utils.py | 44 ++++++++++++++++++--- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 2869e62db6b..553bb7b19b9 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -287,13 +287,20 @@ def _dataset_iterator(g, prefix=''): # ============================================================================= # Functions to load triggers # ============================================================================= -def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None, - slide_id=None): +def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, + slide_id=None): """Loads triggers from PyGRB output file, returning a dictionary""" + logging.info("Loading triggers.") trigs = HFile(input_file, 'r') rw_snr = trigs['network/reweighted_snr'][:] net_ids = trigs['network/event_id'][:] + # Output the number of items loaded only upon a request by the user who + # should not use data_tag='trigs'or 'injs' when processing the onsource + if data_tag=='trigs': + logging.info(f"{len(rw_snr)} triggers loaded.") + elif data_tag=='injs': + logging.info(f"{len(rw_snr)} injections loaded.") ifo_ids = {} for ifo in ifos: ifo_ids[ifo] = trigs[ifo+'/event_id'][:] @@ -307,6 +314,16 @@ def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None, above_thresh = rw_snr > 0 num_orig_pts = len(above_thresh) + # Output the number of items surviging vetoes with the same logic as above + msg = "" + if data_tag=='trigs': + msg += f"{sum(above_thresh)} triggers surviving reweighted SNR cut " + elif data_tag=='injs': + msg = f"{sum(above_thresh)} injections surviving reweighted SNR cut " + if msg: + msg += f"at {rw_snr_threshold}." + logging.info(msg) + # Do not assume that IFO and network datasets are sorted the same way: # find where each surviving network/event_id is placed in the IFO/event_id ifo_ids_above_thresh_locations = {} @@ -315,10 +332,10 @@ def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None, numpy.array([numpy.where(ifo_ids[ifo] == net_id)[0][0] for net_id in net_ids[above_thresh]]) - # Apply the cut on all the data by remove points with reweighted SNR = 0 + # Apply the cut on all the data by removing points with reweighted SNR = 0 trigs_dict = {} with HFile(input_file, "r") as trigs: - for (path, dset) in dataset_iterator(trigs): + for (path, dset) in _dataset_iterator(trigs): # The dataset contains information other than trig/inj properties: # just copy it if len(dset) != num_orig_pts: @@ -336,12 +353,27 @@ def load_triggers(input_file, ifos, vetoes, rw_snr_threshold=None, trigs_dict[path] = dset[above_thresh] if trigs_dict[path].size == trigs['network/slide_id'][:].size: - trigs_dict[path] = slide_filter(trigs, trigs_dict[path], - slide_id=slide_id) + trigs_dict[path] = _slide_filter(trigs, trigs_dict[path], + slide_id=slide_id) return trigs_dict +# Wrapper function to load trigger/injection data +def load_data(input_file, ifos, rw_snr_threshold=None, + data_tag=None, slide_id=None): + """Load data from a trigger/injection file""" + + trigs_or_injs = None + if input_file: + trigs_or_injs = \ + _load_triggers(input_file, ifos, data_tag=data_tag, + rw_snr_threshold=rw_snr_threshold, + slide_id=slide_id) + + return trigs_or_injs + + # ============================================================================= # Detector utils: # * Function to calculate the antenna response F+^2 + Fx^2 From cdabedd03e897965d67b601a816de0bcf92f6cf3 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Mon, 11 Nov 2024 12:49:30 -0800 Subject: [PATCH 03/10] Updating pycbc_pygrb_plot_skygrid accordingly --- bin/pygrb/pycbc_pygrb_plot_skygrid | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/bin/pygrb/pycbc_pygrb_plot_skygrid b/bin/pygrb/pycbc_pygrb_plot_skygrid index 3670d08aa34..37c8d812f41 100644 --- a/bin/pygrb/pycbc_pygrb_plot_skygrid +++ b/bin/pygrb/pycbc_pygrb_plot_skygrid @@ -42,18 +42,6 @@ __version__ = pycbc.version.git_verbose_msg __date__ = pycbc.version.date __program__ = "pycbc_pygrb_plot_skygrid" -# ============================================================================= -# Functions -# ============================================================================= -# Load trigger data -def load_data(input_file, ifos, vetoes, injections=False): - """Load data from a trigger/injection file""" - - logging.info("Loading triggers...") - - trigs = ppu.load_triggers(input_file, ifos, vetoes) - - return trigs # ============================================================================= # Main script starts here @@ -80,12 +68,11 @@ for outdir in outdirs: if not os.path.isdir(outdir): os.makedirs(outdir) -# Extract IFOs and vetoes -ifos, vetoes = ppu.extract_ifos_and_vetoes(trig_file, opts.veto_files, - opts.veto_category) +# Extract IFOs +ifos = ppu.extract_ifos(trig_file) -# Load trigger data -trig_data = load_data(trig_file, ifos, vetoes) +# Load trigger data: the sky-grid points are not time-slide in the plot +trig_data = ppu.load_data(trig_file, ifos, data_tag=None, slide_id=0) # # Generate sky grid plot From 8271be9697045ea9817950a1b4601b25683c21dd Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Mon, 11 Nov 2024 13:28:24 -0800 Subject: [PATCH 04/10] Typo in comment --- bin/pygrb/pycbc_pygrb_plot_skygrid | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/pygrb/pycbc_pygrb_plot_skygrid b/bin/pygrb/pycbc_pygrb_plot_skygrid index 37c8d812f41..c59b31c1ec8 100644 --- a/bin/pygrb/pycbc_pygrb_plot_skygrid +++ b/bin/pygrb/pycbc_pygrb_plot_skygrid @@ -71,7 +71,7 @@ for outdir in outdirs: # Extract IFOs ifos = ppu.extract_ifos(trig_file) -# Load trigger data: the sky-grid points are not time-slide in the plot +# Load trigger data: the sky-grid points are not time-slid in the plot trig_data = ppu.load_data(trig_file, ifos, data_tag=None, slide_id=0) # From 0d2974e7c0e7bedff9334a4aa6c6ef726cbda122 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Mon, 11 Nov 2024 13:29:49 -0800 Subject: [PATCH 05/10] No f-string in logging --- pycbc/results/pygrb_postprocessing_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 553bb7b19b9..b351cb1b755 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -298,9 +298,9 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, # Output the number of items loaded only upon a request by the user who # should not use data_tag='trigs'or 'injs' when processing the onsource if data_tag=='trigs': - logging.info(f"{len(rw_snr)} triggers loaded.") + logging.info("%d triggers loaded.", len(rw_snr)) elif data_tag=='injs': - logging.info(f"{len(rw_snr)} injections loaded.") + logging.info("%d injections loaded.", len(rw_snr)) ifo_ids = {} for ifo in ifos: ifo_ids[ifo] = trigs[ifo+'/event_id'][:] From cdd203b095fb5adeb0ad2a1f15acc22ed7223116 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Tue, 12 Nov 2024 00:54:43 -0800 Subject: [PATCH 06/10] Improved a logging message and 2 code comments --- pycbc/results/pygrb_postprocessing_utils.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index b351cb1b755..be0a0b5f744 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -295,8 +295,9 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, trigs = HFile(input_file, 'r') rw_snr = trigs['network/reweighted_snr'][:] net_ids = trigs['network/event_id'][:] - # Output the number of items loaded only upon a request by the user who - # should not use data_tag='trigs'or 'injs' when processing the onsource + # Output the number of items loaded only upon a request by the user who is + # expected not to set data_tag to 'trigs'or 'injs' when processing the + # onsource if data_tag=='trigs': logging.info("%d triggers loaded.", len(rw_snr)) elif data_tag=='injs': @@ -317,11 +318,11 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, # Output the number of items surviging vetoes with the same logic as above msg = "" if data_tag=='trigs': - msg += f"{sum(above_thresh)} triggers surviving reweighted SNR cut " + msg += f"{sum(above_thresh)} triggers " elif data_tag=='injs': - msg = f"{sum(above_thresh)} injections surviving reweighted SNR cut " + msg = f"{sum(above_thresh)} injections " if msg: - msg += f"at {rw_snr_threshold}." + msg += f"surviving reweighted SNR cut at {rw_snr_threshold}." logging.info(msg) # Do not assume that IFO and network datasets are sorted the same way: @@ -362,7 +363,9 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, # Wrapper function to load trigger/injection data def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, slide_id=None): - """Load data from a trigger/injection file""" + """Load data from a trigger/injection file. data_tag enables logging + information about the number of triggers/injections found, so the user + should not set it to 'trigs'/'injs' when processing the onsource.""" trigs_or_injs = None if input_file: From eb06b9a7a0b3ce001cf5e2d64a4c34a8ad385e3d Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Wed, 13 Nov 2024 05:06:33 -0800 Subject: [PATCH 07/10] Renaming variable that was forgotten --- pycbc/workflow/jobsetup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/workflow/jobsetup.py b/pycbc/workflow/jobsetup.py index 34b20bfdbdc..61bcc09a416 100644 --- a/pycbc/workflow/jobsetup.py +++ b/pycbc/workflow/jobsetup.py @@ -311,7 +311,7 @@ def multi_ifo_coherent_job_setup(workflow, out_files, curr_exe_job, tag.append(split_bank.tag_str) node = curr_exe_job.create_node(data_seg, job_valid_seg, parent=split_bank, inj_file=inj_file, tags=tag, - dfParents=frame_files, bankVetoBank=bank_veto, + dfParents=input_files, bankVetoBank=bank_veto, ipn_file=ipn_sky_points) workflow.add_node(node) split_bank_counter += 1 From 4b92073105e86824c1b818e1f232945d2f0e242a Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Wed, 13 Nov 2024 09:23:52 -0800 Subject: [PATCH 08/10] Removed redundant variable --- pycbc/workflow/jobsetup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pycbc/workflow/jobsetup.py b/pycbc/workflow/jobsetup.py index 61bcc09a416..2c90ec19138 100644 --- a/pycbc/workflow/jobsetup.py +++ b/pycbc/workflow/jobsetup.py @@ -276,7 +276,6 @@ def multi_ifo_coherent_job_setup(workflow, out_files, curr_exe_job, data_seg, job_valid_seg = curr_exe_job.get_valid_times() curr_out_files = FileList([]) ipn_sky_points = None - veto_file = None bank_veto = None input_files = FileList(datafind_outs) for f in datafind_outs: @@ -284,7 +283,6 @@ def multi_ifo_coherent_job_setup(workflow, out_files, curr_exe_job, ipn_sky_points = f input_files.remove(f) elif 'vetoes' in f.description: - veto_file = f input_files.remove(f) elif 'INPUT_BANK_VETO_BANK' in f.description: bank_veto = f From 1358cd541e273107d7739c3fa6d82163395f70b9 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Thu, 14 Nov 2024 01:39:24 -0800 Subject: [PATCH 09/10] Blending _load_triggers into load_data --- pycbc/results/pygrb_postprocessing_utils.py | 32 +++++++++------------ 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index be0a0b5f744..4a72c71cf4c 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -285,16 +285,22 @@ def _dataset_iterator(g, prefix=''): # ============================================================================= -# Functions to load triggers +# Function to load trigger/injection data # ============================================================================= -def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, - slide_id=None): - """Loads triggers from PyGRB output file, returning a dictionary""" +def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, + slide_id=None): + """Load data from a trigger/injection PyGRB output file, returning a + dictionary. If the input_file is None, None is returned. data_tag enables + logging information about the number of triggers/injections found, so the + user should not set it to 'trigs'/'injs' when processing the onsource.""" + + if input_file is None: + return None - logging.info("Loading triggers.") trigs = HFile(input_file, 'r') rw_snr = trigs['network/reweighted_snr'][:] net_ids = trigs['network/event_id'][:] + # Output the number of items loaded only upon a request by the user who is # expected not to set data_tag to 'trigs'or 'injs' when processing the # onsource @@ -302,6 +308,8 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, logging.info("%d triggers loaded.", len(rw_snr)) elif data_tag=='injs': logging.info("%d injections loaded.", len(rw_snr)) + else: + logging.info("Loading triggers.") ifo_ids = {} for ifo in ifos: ifo_ids[ifo] = trigs[ifo+'/event_id'][:] @@ -360,21 +368,7 @@ def _load_triggers(input_file, ifos, data_tag=None, rw_snr_threshold=None, return trigs_dict -# Wrapper function to load trigger/injection data -def load_data(input_file, ifos, rw_snr_threshold=None, - data_tag=None, slide_id=None): - """Load data from a trigger/injection file. data_tag enables logging - information about the number of triggers/injections found, so the user - should not set it to 'trigs'/'injs' when processing the onsource.""" - - trigs_or_injs = None - if input_file: - trigs_or_injs = \ - _load_triggers(input_file, ifos, data_tag=data_tag, - rw_snr_threshold=rw_snr_threshold, - slide_id=slide_id) - return trigs_or_injs # ============================================================================= From 1316d65c3713a01800cd3f60e71cb16ecf7a3362 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Thu, 14 Nov 2024 01:40:18 -0800 Subject: [PATCH 10/10] Blending _load_triggers into load_data --- pycbc/results/pygrb_postprocessing_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index 4a72c71cf4c..b29bd1d1ddd 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -294,7 +294,7 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, logging information about the number of triggers/injections found, so the user should not set it to 'trigs'/'injs' when processing the onsource.""" - if input_file is None: + if not input_file: return None trigs = HFile(input_file, 'r')