Skip to content

Commit

Permalink
Implemented filter by resolution and n_obs in cctbx.xfel.merge
Browse files Browse the repository at this point in the history
  • Loading branch information
David Moreau authored and phyy-nx committed Dec 2, 2024
1 parent b37f114 commit 4ca6c71
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 49 deletions.
152 changes: 105 additions & 47 deletions xfel/merging/application/filter/experiment_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,11 @@ class experiment_filter(worker):
def __init__(self, params, mpi_helper=None, mpi_logger=None):
super(experiment_filter, self).__init__(params=params, mpi_helper=mpi_helper, mpi_logger=mpi_logger)

def validate(self):
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported
return
assert self.params.filter.unit_cell.value.target_space_group is not None, \
'Space group is required for unit cell filtering'

def __repr__(self):
return 'Filter experiments'

def check_unit_cell(self, experiment):

experiment_unit_cell = experiment.crystal.get_unit_cell()

is_ok = experiment_unit_cell.is_similar_to(self.params.filter.unit_cell.value.target_unit_cell,
self.params.filter.unit_cell.value.relative_length_tolerance,
self.params.filter.unit_cell.value.absolute_angle_tolerance)
Expand Down Expand Up @@ -49,7 +41,6 @@ def remove_experiments(experiments, reflections, experiment_ids_to_remove):
experiments.select_on_experiment_identifiers([i for i in experiments.identifiers() if i not in experiment_ids_to_remove])
reflections.remove_on_experiment_identifiers(experiment_ids_to_remove)
reflections.reset_ids()

return experiments, reflections

def check_cluster(self, experiment):
Expand All @@ -69,18 +60,89 @@ def check_cluster(self, experiment):
return m_distance < self.params.filter.unit_cell.cluster.covariance.mahalanobis

def run(self, experiments, reflections):
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported
filter_by_unit_cell = 'unit_cell' in self.params.filter.algorithm[0]
filter_by_n_obs = 'n_obs' in self.params.filter.algorithm[0]
filter_by_resolution = 'resolution' in self.params.filter.algorithm[0]
# only "unit_cell" "n_obs" and "resolution" algorithms are supported
if (not filter_by_unit_cell) and (not filter_by_n_obs) and (not filter_by_resolution):
return experiments, reflections
if filter_by_unit_cell:
assert self.params.filter.unit_cell.value.target_space_group is not None, \
'Space group is required for unit cell filtering'
if filter_by_n_obs:
check0 = self.params.filter.n_obs.min is not None
check1 = self.params.filter.n_obs.max is not None
assert check0 or check1, \
'Either min or max is required for n_obs filtering'
if filter_by_resolution:
assert self.params.filter.resolution.d_min is not None, \
'd_min is required for resolution filtering'

self.logger.log_step_time("FILTER_EXPERIMENTS")

# BEGIN BY-VALUE FILTER
experiment_ids_to_remove = []
if filter_by_unit_cell:
experiment_ids_to_remove_unit_cell, removed_for_unit_cell, removed_for_space_group = self.run_filter_by_unit_cell(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_unit_cell
else:
removed_for_unit_cell = 0
removed_for_space_group = 0
if filter_by_n_obs:
experiment_ids_to_remove_n_obs, removed_for_n_obs = self.run_filter_by_n_obs(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_n_obs
else:
removed_for_n_obs = 0
if filter_by_resolution:
experiment_ids_to_remove_resolution, removed_for_resolution = self.run_filter_by_resolution(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_resolution
else:
removed_for_resolution = 0
experiment_ids_to_remove = list(set(experiment_ids_to_remove))

input_len_expts = len(experiments)
input_len_refls = len(reflections)
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove)

removed_reflections = input_len_refls - len(new_reflections)
#assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments)

self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell)
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group)
self.logger.log("Experiments rejected because of n_obs %d"%removed_for_n_obs)
self.logger.log("Experiments rejected because of resolution %d"%removed_for_resolution)
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections)

# MPI-reduce total counts
comm = self.mpi_helper.comm
MPI = self.mpi_helper.MPI
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0)
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0)
total_removed_for_n_obs = comm.reduce(removed_for_n_obs, MPI.SUM, 0)
total_removed_for_resolution = comm.reduce(removed_for_resolution, MPI.SUM, 0)
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0)

# rank 0: log total counts
if self.mpi_helper.rank == 0:
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell)
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group)
self.logger.main_log("Total experiments rejected because of n_obs %d"%total_removed_for_n_obs)
self.logger.main_log("Total experiments rejected because of resolution %d"%total_removed_for_resolution)
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed)

self.logger.log_step_time("FILTER_EXPERIMENTS", True)

# Do we have any data left?
from xfel.merging.application.utils.data_counter import data_counter
data_counter(self.params).count(new_experiments, new_reflections)
return new_experiments, new_reflections

def filter_by_unit_cell(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_unit_cell = 0
removed_for_space_group = 0

# BEGIN BY-VALUE FILTER
if self.params.filter.unit_cell.algorithm == "value":
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
if self.params.filter.unit_cell.value.target_unit_cell in (Auto, None):
if self.params.scaling.unit_cell is None:
try:
Expand All @@ -102,12 +164,10 @@ def run(self, experiments, reflections):
elif not self.check_unit_cell(experiment):
experiment_ids_to_remove.append(experiment.identifier)
removed_for_unit_cell += 1
# END BY-VALUE FILTER
# END BY-VALUE FILTER
elif self.params.filter.unit_cell.algorithm == "cluster":

from uc_metrics.clustering.util import get_population_permutation # implicit import
import pickle

class Empty: pass
if self.mpi_helper.rank == 0:
with open(self.params.filter.unit_cell.cluster.covariance.file,'rb') as F:
Expand Down Expand Up @@ -136,38 +196,36 @@ class Empty: pass
if not self.check_cluster(experiment):
experiment_ids_to_remove.append(experiment.identifier)
removed_for_unit_cell += 1
# END OF COVARIANCE FILTER
input_len_expts = len(experiments)
input_len_refls = len(reflections)
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove)

removed_reflections = input_len_refls - len(new_reflections)
assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments)

self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell)
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group)
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections)

# MPI-reduce total counts
comm = self.mpi_helper.comm
MPI = self.mpi_helper.MPI
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0)
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0)
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0)

# rank 0: log total counts
if self.mpi_helper.rank == 0:
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell)
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group)
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed)

self.logger.log_step_time("FILTER_EXPERIMENTS", True)

# Do we have any data left?
from xfel.merging.application.utils.data_counter import data_counter
data_counter(self.params).count(new_experiments, new_reflections)
# END OF COVARIANCE FILTER
return experiment_ids_to_remove, removed_for_unit_cell, removed_for_space_group

return new_experiments, new_reflections
def run_filter_by_n_obs(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_n_obs = 0
for expt_index, experiment in enumerate(experiments):
refls_expt = reflections.select(reflections['id'] == expt_index)
if self.params.filter.n_obs.max and len(refls_expt) > self.params.filter.n_obs.max:
experiment_ids_to_remove.append(experiment.identifier)
removed_for_n_obs += 1
if self.params.filter.n_obs.min and len(refls_expt) < self.params.filter.n_obs.min:
experiment_ids_to_remove.append(experiment.identifier)
removed_for_n_obs += 1
return experiment_ids_to_remove, removed_for_n_obs

def run_filter_by_resolution(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_resolution = 0
resolution = reflections.compute_d(experiments)
start = 0
for expt_index, expt in enumerate(experiments):
refls_expt = reflections.select(reflections['id'] == expt_index)
n_refls = refls_expt.size()
max_resolution = min(resolution[start: start + n_refls])
start += n_refls
if max_resolution > self.params.filter.resolution.d_min:
experiment_ids_to_remove.append(expt.identifier)
removed_for_resolution += 1
return experiment_ids_to_remove, removed_for_resolution

if __name__ == '__main__':
from xfel.merging.application.worker import exercise_worker
Expand Down
8 changes: 6 additions & 2 deletions xfel/merging/application/phil/phil.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,18 @@
.help = The filter section defines criteria to accept or reject whole experiments
.help = or to modify the entire experiment by a reindexing operator
.help = refer to the select section for filtering of individual reflections
.help = only unit_cell, resolution, and n_obs are implemented
{
algorithm = n_obs reindex resolution unit_cell report
.type = choice
.type = strings
.multiple = True
n_obs {
min = 15
min = None
.type = int
.help = Minimum number of observations for subsequent processing
max = None
.type = int
.help = Maximum number of observations for subsequent processing
}
reindex {
data_reindex_op = h,k,l
Expand Down

0 comments on commit 4ca6c71

Please sign in to comment.