From 9fab51cca00fe2433c2c7a804e4de22f30c55638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Schl=C3=BCter?= Date: Wed, 20 Nov 2024 13:06:22 +0100 Subject: [PATCH] Refactor output_writer_hdf5.py. Mostly WS --- NuRadioMC/simulation/output_writer_hdf5.py | 61 ++++++++++++---------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/NuRadioMC/simulation/output_writer_hdf5.py b/NuRadioMC/simulation/output_writer_hdf5.py index 6da48566b..aec83e6bb 100644 --- a/NuRadioMC/simulation/output_writer_hdf5.py +++ b/NuRadioMC/simulation/output_writer_hdf5.py @@ -39,7 +39,7 @@ def __init__( # self._n_showers = len(self._fin['shower_ids']) # self._hdf5_keys = ['event_group_ids', 'xx', 'yy', 'zz', 'vertex_times', - # 'azimuths', 'zemiths', 'energies', + # 'azimuths', 'zemiths', 'energies', # 'shower_energies', ''n_interaction', # 'shower_ids', 'event_ids', 'triggered', 'n_samples', # 'dt', 'Tnoise', 'Vrms', 'bandwidth', 'trigger_names'] @@ -98,37 +98,40 @@ def add_event_group(self, event_buffer): """ logger.debug("adding event group to output file") - - - # first add atributes to the output file. Attributes of all events should be the same, - # raising an error if not. + # first add atributes to the output file. Attributes of all events should be the same, + # raising an error if not. trigger_names = [] extent_array_by = 0 for sid in event_buffer: for eid in event_buffer[sid]: evt = event_buffer[sid][eid] + + # save event attributes for enum_entry in genattrs: - if(evt.has_generator_info(enum_entry)): + if evt.has_generator_info(enum_entry): if enum_entry.name not in self._mout_attributes: self._mout_attributes[enum_entry.name] = evt.get_generator_info(enum_entry) else: # if the attribute is already present, we need to check if it is the same for all events assert all(np.atleast_1d(self._mout_attributes[enum_entry.name] == evt.get_generator_info(enum_entry))) + + # save station attributes for stn in evt.get_stations(): - # save station attributes - tmp_keys = [[chp.Vrms_NuRadioMC_simulation, "Vrms"], [chp.bandwidth_NuRadioMC_simulation, "bandwidth"]] - for (key_cp, key_hdf5) in tmp_keys: - channel_values = [] - for channel in stn.iter_channels(sorted=True): - channel_values.append(channel[key_cp]) + station_key_pairs = [[chp.Vrms_NuRadioMC_simulation, "Vrms"], [chp.bandwidth_NuRadioMC_simulation, "bandwidth"]] + for (key_cp, key_hdf5) in station_key_pairs: + channel_values = [channel[key_cp] for channel in stn.iter_channels(sorted=True)] + if key_hdf5 not in self._mout_groups_attributes[sid]: self._mout_groups_attributes[sid][key_hdf5] = np.array(channel_values) else: - assert all(np.atleast_1d(self._mout_groups_attributes[sid][key_hdf5] == np.array(channel_values))), f"station {sid} key {key_hdf5} is {self._mout_groups_attributes[sid][key_hdf5]}, but current channel is {np.array(channel_values)}" + assert all(np.atleast_1d(self._mout_groups_attributes[sid][key_hdf5] == np.array(channel_values))), \ + f"station {sid} key {key_hdf5} is {self._mout_groups_attributes[sid][key_hdf5]}, but current channel is {np.array(channel_values)}" + for trigger in stn.get_triggers().values(): if trigger.get_name() not in trigger_names: trigger_names.append(trigger.get_name()) logger.debug(f"extending data structure by trigger {trigger.get_name()} to output file") extent_array_by += 1 + # the available triggers are not available from the start because a certain trigger # might only trigger a later event. Therefore we need to extend the array # if we find a new trigger @@ -140,6 +143,7 @@ def add_event_group(self, event_buffer): for i in range(len(self._mout[key])): logger.debug(f"extending data structure by {extent_array_by} to output file for key {key}") self._mout[key][i] = self._mout[key][i] + [False] * extent_array_by + for station_id in self._station_ids: sg = self._mout_groups[station_id] if keys[0] in sg: @@ -156,7 +160,7 @@ def add_event_group(self, event_buffer): evt = event_buffer[sid][eid] if self._particle_mode: for shower in evt.get_sim_showers(): - if not shower.get_id() in shower_ids: + if shower.get_id() not in shower_ids: logger.debug(f"adding shower {shower.get_id()} to output file") # shower ids might not be in increasing order. We need to sort the hdf5 output later shower_ids.append(shower.get_id()) @@ -185,7 +189,9 @@ def add_event_group(self, event_buffer): self.__first_event = False else: # emitters have different properties, so we need to treat them differently than showers for emitter in evt.get_sim_emitters(): - if not emitter.get_id() in shower_ids: # the key "shower_ids" is also used for emitters and identifies the emitter id. This is done because it is the only way to have the same input files for both shower/particle and emitter simulations. + # the key "shower_ids" is also used for emitters and identifies the emitter id. This is done because it is + # the only way to have the same input files for both shower/particle and emitter simulations. + if emitter.get_id() not in shower_ids: logger.debug(f"adding shower {emitter.get_id()} to output file") # shower ids might not be in increasing order. We need to sort the hdf5 output later shower_ids.append(emitter.get_id()) @@ -204,8 +210,9 @@ def add_event_group(self, event_buffer): self.__add_parameter(self._mout, keyname, emitter[key], self.__first_event) self.__first_event = False + # now save station data - stn = evt.get_station() # there can only ever be one station per event! If there are more than one station, this line will crash. + stn = evt.get_station() # there can only ever be one station per event! If there are more than one station, this line will crash. sg = self._mout_groups[sid] self.__add_parameter(sg, 'event_group_ids', evt.get_run_number()) self.__add_parameter(sg, 'event_ids', evt.get_id()) @@ -232,8 +239,8 @@ def add_event_group(self, event_buffer): self.__add_parameter(sg, 'triggered', stn.has_triggered()) - # depending on the simulation mode we have either showers or emitters but we can - # treat them the same way as long as we only call common member functions such as + # depending on the simulation mode we have either showers or emitters but we can + # treat them the same way as long as we only call common member functions such as # `get_id()` iterable = None if self._particle_mode: @@ -250,9 +257,9 @@ def add_event_group(self, event_buffer): # we need to save data per shower, channel and ray tracing solution. Due to the simple table structure # of the hdf5 files we need to preserve the ordering of the showers and channels. As the order in the # NuRadio data structure is different, we need to go through some effort to get the right order. - # The shower ids will be sorted at the very end. - # The channel ids already have the correct ordering. - # The ray tracing solutions are also ordered, because the efield object contains the correct ray tracing solution id. + # The shower ids will be sorted at the very end. + # The channel ids already have the correct ordering. + # The ray tracing solutions are also ordered, because the efield object contains the correct ray tracing solution id. channel_rt_data = {} keys_channel_rt_data = ['travel_times', 'travel_distances'] if self._mout_attributes['config']['speedup']['amp_per_ray_solution']: @@ -266,7 +273,7 @@ def add_event_group(self, event_buffer): # important: we need to loop over the channels of the station object, not # the channels present in the sim_station object. This is because the sim # channel object only contains the channels that have a signal, i.e., a ray - # tracing solution and a strong enough Askaryan signal. But we want to loop over all + # tracing solution and a strong enough Askaryan signal. But we want to loop over all # channels of the station, because we want to save the data for all channels, not only # the ones that have a signal. This also preserves the order of the channels. for iCh, channel in enumerate(stn.iter_channels(sorted=True)): @@ -310,9 +317,9 @@ def add_event_group(self, event_buffer): trigger_times = np.ones((len(shower_ids_stn), len(self._mout_attributes['trigger_names'])), dtype=float) * np.nan for eid in event_buffer[sid]: evt = event_buffer[sid][eid] - stn = evt.get_station() # there can only ever be one station per event! If there are more than one station, this line will crash. - # depending on the simulation mode we have either showers or emitters but we can - # treat them the same way as long as we only call common member functions such as + stn = evt.get_station() # there can only ever be one station per event! If there are more than one station, this line will crash. + # depending on the simulation mode we have either showers or emitters but we can + # treat them the same way as long as we only call common member functions such as # `get_id()` iterable = None if self._particle_mode: @@ -372,7 +379,7 @@ def add_event_group(self, event_buffer): if self._particle_mode: # we also want to save the first interaction even if it didn't contribute to any trigger - # this is important to know the initial neutrino properties (only relevant for the simulation of + # this is important to know the initial neutrino properties (only relevant for the simulation of # secondary interactions) stn_buffer = event_buffer[self._station_ids[0]] evt = stn_buffer[list(stn_buffer.keys())[0]] @@ -513,7 +520,7 @@ def calculate_Veff(self): float: The calculated effective volume (Veff) """ # calculate effective - try: # sometimes not all relevant attributes are set, e.g. for emitter simulations. + try: # sometimes not all relevant attributes are set, e.g. for emitter simulations. triggered = remove_duplicate_triggers(self._mout['triggered'], self._mout['event_group_ids']) n_triggered = np.sum(triggered) n_triggered_weighted = np.sum(np.array(self._mout['weights'])[triggered])