From 01d01c3d9fb0e502f864643c33c2d664cc516220 Mon Sep 17 00:00:00 2001 From: hurricane642 Date: Thu, 24 Nov 2022 11:54:27 +0100 Subject: [PATCH 1/3] add first version of the complex transformer for property selection --- python/rascaline/transformer.py | 296 ++++++++++++++++++++++++++++++++ python/tests/transformer.py | 127 ++++++++++++++ 2 files changed, 423 insertions(+) create mode 100644 python/rascaline/transformer.py create mode 100644 python/tests/transformer.py diff --git a/python/rascaline/transformer.py b/python/rascaline/transformer.py new file mode 100644 index 000000000..bb1172b51 --- /dev/null +++ b/python/rascaline/transformer.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +import numpy as np +from equistore import Labels, TensorBlock, TensorMap + + +class Transformer: + """The 'Transformer' class makes it easy to create a representation matrix + when using some other matrix as a reference. A classic use case is to create + a TensorMap representation for a dataset, then perform transformations + within that TensorMap (e.g., keys_to_features or keys_to_properties), and + select the most useful features in the transformed TensorMap. + The 'Transformer' allows a set of these features to be used to calculate + a new TensorMap, thus saving computation time and maintaining a single + representation for all representations. + + Parameters: + ----------- + :param selectors: The selector to be used when selecting the features. + Currently the same selector is used for different blocks. + #TODO: It should be possible for the user to pass a list of selectors + for blocks. + :param transformation: the type of transformation to be performed. + Two options are possible - 'keys_to_features' and 'keys_to_properties'. + #TODO: provide the ability to pass a list of conversions that will occur + one after the other. + :param moved_keys: Those keys which will be moved during the transformation. + This variable can accept knowledge of type str (one key), list (a list + of keys) and Labels (in addition to the name of the keys, pass a list + of keys to be moved). + """ + + def __init__(self, selector, transformation=None, moved_keys=None): + # + self.selector = selector + self.transformation = transformation + self.moved_keys = moved_keys + if ( + (self.transformation is not None) + and (self.transformation != "keys_to_samples") + and (self.transformation != "keys_to_properties") + ): + raise ValueError( + "`transformation` parameter should be either `keys_to_samples`," + f" either `keys_to_properties`, got {self.transformation}" + ) + if (self.transformation is None) and (self.moved_keys is not None): + raise ValueError("unable to shift keys: unknown transformation type") + + def _copy(self, tensor_map): + """This function that allows you to create a copy of 'TensorMap'. + It may be worth adding this function to the equistore. + """ + blocks = [] + for _, block in tensor_map: + blocks.append(block.copy()) + return TensorMap(tensor_map.keys, blocks) + + def keys_definition(self): + """This is another internal function that performs two main tasks. + First, it converts all moved_keys to the same format. What is + meant is that further we need the names of the keys we are going + to move, as well as the 'TensorMap' keys, which will be passed + to the compute function as a reference at the end. This function + stores the names of the moved keys in the 'moved_keys_names' array, + and stores the keys of the final TensorMap reference in 'final_keys'. + """ + # the first 2 cases are simple - we either copy the moved_keys directly, + # or create an array based on them, and simply take all the keys passed + # in the fit TensorMap step as the final keys. + if isinstance(self.moved_keys, str): + self.moved_keys_names = [self.moved_keys.copy()] + self.final_keys = self._old_keys + elif isinstance(self.moved_keys, list): + self.moved_keys_names = self.moved_keys.copy() + self.final_keys = self._old_keys + else: + # The third case is a little more complicated. + # First, we save the names of the moved keys, + # taking them from Labels 'moved_keys'. + self.moved_keys_names = self.moved_keys.names + names = [] + new_keys = [] + # Let's write down the order of the keys we will have during the + # course of the algorithm in the 'names' + names.extend(self.tensor_map.keys.names) + names.extend(self.moved_keys_names) + # Now let's generate reference TensorMap keys. They will consist of + # two parts - those keys that were left after transformation, and + # those keys that were in the values of the variable moved_keys. + # Go through them and create all possible combinations of these + # parts. + for key in self.tensor_map.keys: + for value in self.moved_keys: + clue = [k.copy() for k in key] + clue.extend(value) + new_keys.append(clue) + # The keys have been listed in random order, let's arrange them and + # store the values in 'final_keys'. + indices = [] + for key in self._old_keys_names: + indices.append(names.index(key)) + ordered_keys = [] + for el in new_keys: + key = [el[i] for i in indices] + ordered_keys.append(key) + self.final_keys = Labels( + names=self._old_keys_names, values=np.array(ordered_keys) + ) + + def _mover(self, tensor_map): + # Internal function that does the transformation of the reference + # Tensormap. + self._old_keys = tensor_map.keys + self._old_keys_names = tensor_map.keys.names + tensor_copy = self._copy(tensor_map) + if self.transformation is not None: + if self.transformation == "keys_to_samples": + tensor_copy.keys_to_samples(self.moved_keys) + elif self.transformation == "keys_to_properties": + tensor_copy.keys_to_properties(self.moved_keys) + return tensor_copy + + def properties_selection(self): + # This function selects properties according to a preset algorithm + # within each 'TensorMap' block + blocks = [] + for _, block in self.tensor_map: + mask = self.selector.fit(block.values).get_support() + selected_properties = block.properties[mask] + blocks.append( + TensorBlock( + # Since the resulting 'TensorMap' will then be used as a + # reference, the only thing we are interested in each + # block is the name of the properties. + values=np.empty((1, len(selected_properties))), + samples=Labels.single(), + components=[], + properties=selected_properties, + ) + ) + + self.selected_tensor = TensorMap(self.tensor_map.keys, blocks) + + def fit(self, tensor_map): + """The fit function tells the transformer which attributes to use when + creating new representations. + + Parameters: + ----------- + :param tensor_map: reference TensorMap, with which transformations are + carried out and in which properties are selected. + """ + self.tensor_map = self._mover(tensor_map) + self.keys_definition() + self.properties_selection() + + def transform(self, frames, calculator): + """A function that creates a TensorMap representation based on the + passed frames as well as a previously performed fit. + + Parameters: + ----------- + :param frames: list with the frames to be processed during this function. + :param calculator: calculator that will compute the representation of + the transferred frames. + """ + if self.transformation is None: + # trivial case - nothing happened, do the usual calculation. + descriptor = calculator.compute( + frames, selected_properties=self.selected_tensor + ) + return descriptor + elif self.transformation == "keys_to_samples": + # In the second case the situation is a bit more complicated. + # Suppose we originally had a set of key names {'a', 'b', 'c'}. + # We moved key 'c' to samples. We are left with blocks with keys + # {'a', 'b'}. Let's start going through all the final keys. We take + # key {a_1, b_1, c_1}. Its corresponding features are in the + # {a_1, b_1} block. Accordingly, all we need to do is tear off what + # we have moved from the keys, take the properties from the + # resulting block and save them. + blocks = [] + idx = [] + # save the positions of the moved keys. + for key in self.moved_keys_names: + idx.append(self.final_keys.names.index(key)) + for obj in self.final_keys: + # separate the moved keys, obtain a block based on the remainder + obt_key = tuple(item for i, item in enumerate(obj) if i not in idx) + if len(obt_key) == 0: + obt_key = (0,) + block = self.selected_tensor.block( + self.tensor_map.keys.position(obt_key) + ) + blocks.append( + TensorBlock( + values=np.empty((1, len(block.properties))), + samples=Labels.single(), + components=[], + properties=block.properties, + ) + ) + properties_tensor = TensorMap(self.final_keys, blocks) + # Do the final computation + descriptor = calculator.compute( + frames, + selected_properties=properties_tensor, + selected_keys=self.final_keys, + ) + return descriptor + elif self.transformation == "keys_to_properties": + # The third case is the most complicated. Again, let's start with a + # TensorMap with {'a', 'b', 'c'} keys. Suppose we move the 'c' keys + # to properties. We take the final key {a_1, b_1, c_1}. Its + # corresponding properties lie in the block {a_1, b_1}. But we do + # not need all the properties, we need only those properties that + # include c_1 in the label. We need to take all these properties, + # separate c_1 from them and save them in the corresponding block. + + # save positions of the moved keys in the properties array + pos_in_prop = [] + for key in self.moved_keys_names: + pos_in_prop.append(self.tensor_map.property_names.index(key)) + idx = [] + property_names = [] + # save property names, which were originaly, before the `move` + for i, key in enumerate(self.tensor_map.property_names): + if i not in pos_in_prop: + property_names.append(key) + # determine the positions of the moved keys in the final keys + for key in self.moved_keys_names: + idx.append(self.final_keys.names.index(key)) + # in this dictionary we write a list of properties, which we will + # save for each block. + properties_dict = {} + for obj in self.final_keys: + obj_tuple = tuple(item for item in obj) + properties_dict[obj_tuple] = [] + # running through all the keys of the transformed tensor + for obj in self.tensor_map.keys: + # obtain block by the position of key + block = self.selected_tensor.block(self.tensor_map.keys.position(obj)) + # go through all properties (each one consists of a set of values) + for prop in block.properties: + # this array stores the part of properties that was previously + # keys + add_key = [] + # and here are those who always have been properties + property_initial = [] + for i, item in enumerate(prop): + if i in pos_in_prop: + add_key.append(item) + else: + property_initial.append(item) + obt_key = [] + add_key_ind = 0 + key_ind = 0 + # put the key together from the two pieces - the one you + # moved and the one you have left + for i in range(len(self.final_keys.names)): + if i in idx: + obt_key.append(add_key[add_key_ind]) + add_key_ind += 1 + else: + obt_key.append(obj[key_ind]) + key_ind += 1 + obt_key = tuple(obt_key) + # add the original properties in our dictionary + properties_dict[obt_key].append(property_initial) + blocks = [] + # go through the original keys to create a tensor for selection + for key in self.final_keys: + key = tuple(key) + # In theory, we may find that we have not selected any property + # that is correspond to this block - take this into account. + if properties_dict[key] != []: + values = np.array(properties_dict[key]) + else: + values = np.empty((0, len(property_names)), dtype=int) + properties = Labels(names=property_names, values=values) + # create the block for each key + blocks.append( + TensorBlock( + values=np.empty((1, len(properties))), + samples=Labels.single(), + components=[], + properties=properties, + ) + ) + properties_tensor = TensorMap(self.final_keys, blocks) + descriptor = calculator.compute( + frames, + selected_properties=properties_tensor, + selected_keys=self.final_keys, + ) + return descriptor diff --git a/python/tests/transformer.py b/python/tests/transformer.py new file mode 100644 index 000000000..c1950bd22 --- /dev/null +++ b/python/tests/transformer.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +import unittest + +import numpy as np +from equistore import TensorBlock, TensorMap +from skcosmo.feature_selection import FPS as FPS_f + +from rascaline.calculators import DummyCalculator +from rascaline.transformer import Transformer + +from test_systems import TestSystem + + +class TestTransformer(unittest.TestCase): + def test_keys_to_samples(self): + system = TestSystem() + calculator = DummyCalculator(cutoff=3.2, delta=2, name="") + descriptor = calculator.compute(system) + tr = Transformer( + selector=FPS_f(n_to_select=2), + transformation="keys_to_samples", + moved_keys=["species_center"], + ) + tr.fit(descriptor) + result = tr.transform(system, calculator) + result.keys_to_samples("species_center") + desc = calculator.compute(system) + desc.keys_to_samples("species_center") + blocks = [] + for _, block in desc: + fps = FPS_f(n_to_select=2) + mask = fps.fit(block.values).get_support() + selected_properties = block.properties[mask] + blocks.append( + TensorBlock( + values=block.values[:, mask], + samples=block.samples, + components=block.components, + properties=selected_properties, + ) + ) + selected_desc = TensorMap(desc.keys, blocks) + for i in range(len(selected_desc.keys)): + self.assertTrue( + np.array_equal(selected_desc.block(i).values, result.block(i).values) + ) + + def test_keys_to_properties(self): + system = TestSystem() + calculator = DummyCalculator(cutoff=3.2, delta=2, name="") + descriptor = calculator.compute(system) + tr = Transformer( + selector=FPS_f(n_to_select=2), + transformation="keys_to_properties", + moved_keys=["species_center"], + ) + tr.fit(descriptor) + result = tr.transform(system, calculator) + desc = calculator.compute(system) + desc.keys_to_properties("species_center") + result.keys_to_properties("species_center") + blocks = [] + for _, block in desc: + # create a separate FPS selector for each block + fps = FPS_f(n_to_select=2) + mask = fps.fit(block.values).get_support() + selected_properties = block.properties[mask] + # put the selected features in a format rascaline can use + blocks.append( + TensorBlock( + # values, samples and component carry no information here + values=block.values[:, mask], + samples=block.samples, + components=block.components, + properties=selected_properties, + ) + ) + selected_desc = TensorMap(desc.keys, blocks) + for i in range(len(selected_desc.keys)): + self.assertTrue( + np.array_equal(selected_desc.block(i).values, result.block(i).values) + ) + + # This test uses a function which is not yet implemented in rascaline, + # so it is temporarily commented out + # def test_keys_to_properties_labels(self): + # system = TestSystem() + # lab = Labels( + # names=['species_center'], + # values=np.array([[0], [1]]) + # ) + # calculator = DummyCalculator(cutoff=3.2, delta=2, name="") + # descriptor = calculator.compute(system) + # tr = Transformer(selector = FPS_f(n_to_select=2), + # transformation='keys_to_properties', + # moved_keys=lab) + # tr.fit(descriptor) + # result = tr.transform(system, calculator) + # desc = calculator.compute(system) + # desc.keys_to_properties(lab) + # result.keys_to_properties(lab) + # blocks=[] + # for _, block in desc: + # # create a separate FPS selector for each block + # fps = FPS_f(n_to_select=2) + # mask = fps.fit(block.values).get_support() + # selected_properties = block.properties[mask] + # # put the selected features in a format rascaline can use + # blocks.append( + # TensorBlock( + # # values, samples and component carry no information here + # values=block.values[:, mask], + # samples=block.samples, + # components=block.components, + # properties=selected_properties, + # ) + # ) + # selected_desc = TensorMap(desc.keys, blocks) + # print(selected_desc.block(0).values, result.block(0).values) + # for i in range(len(selected_desc.keys)): + # self.assertTrue(np.array_equal(selected_desc.block(i).values, + # result.block(i).values)) + # + + +if __name__ == "__main__": + unittest.main() From 0865e5079c16e9857f0a117817e4bca352d18ec3 Mon Sep 17 00:00:00 2001 From: hurricane642 Date: Mon, 19 Dec 2022 11:28:03 -0800 Subject: [PATCH 2/3] changes in naming --- ...{transformer.py => properties_selector.py} | 187 +++++++++++------- ...{transformer.py => properties_selector.py} | 51 +++-- setup.cfg | 1 + tox.ini | 2 +- 4 files changed, 143 insertions(+), 98 deletions(-) rename python/rascaline/{transformer.py => properties_selector.py} (65%) rename python/tests/{transformer.py => properties_selector.py} (78%) diff --git a/python/rascaline/transformer.py b/python/rascaline/properties_selector.py similarity index 65% rename from python/rascaline/transformer.py rename to python/rascaline/properties_selector.py index bb1172b51..4c65d7ce8 100644 --- a/python/rascaline/transformer.py +++ b/python/rascaline/properties_selector.py @@ -3,13 +3,13 @@ from equistore import Labels, TensorBlock, TensorMap -class Transformer: - """The 'Transformer' class makes it easy to create a representation matrix +class PropertiesSelector: + """The 'PropertiesSelector' class makes it easy to create a representation matrix when using some other matrix as a reference. A classic use case is to create a TensorMap representation for a dataset, then perform transformations within that TensorMap (e.g., keys_to_features or keys_to_properties), and select the most useful features in the transformed TensorMap. - The 'Transformer' allows a set of these features to be used to calculate + The 'PropertiesSelector' allows a set of these features to be used to calculate a new TensorMap, thus saving computation time and maintaining a single representation for all representations. @@ -23,27 +23,59 @@ class Transformer: Two options are possible - 'keys_to_features' and 'keys_to_properties'. #TODO: provide the ability to pass a list of conversions that will occur one after the other. - :param moved_keys: Those keys which will be moved during the transformation. - This variable can accept knowledge of type str (one key), list (a list - of keys) and Labels (in addition to the name of the keys, pass a list - of keys to be moved). + :param calculator: an instance of the calculator that will calculate the + descriptor within this instance of the class. + :param keys_to_move: Those keys which will be moved during the transformation. + This variable can be anything supported by the + :py:class:`equistore.TensorMap.keys_to_properties` or + :py:class:`equistore.TensorMap.keys_to_samples` functions, i.e. one + string, a list of strings or an instance of :py:class:`equistore.Labels` + :param use_native_system: If ``True`` (this is the default), copy data + from the ``systems`` into Rust ``SimpleSystem``. This can be a lot + faster than having to cross the FFI boundary often when accessing + the neighbor list. Otherwise the Python neighbor list is used. + + :param gradients: List of gradients to compute. If this is ``None`` or + an empty list ``[]``, no gradients are computed. Gradients are + stored inside the different blocks, and can be accessed with + ``descriptor.block(...).gradient()``, where + ```` is ``"positions"`` or ``"cell"``. The following + gradients are available: """ - def __init__(self, selector, transformation=None, moved_keys=None): + def __init__( + self, + selector, + calculator, + transformation=None, + keys_to_move=None, + gradients=None, + use_native_system=True, + ): # - self.selector = selector - self.transformation = transformation - self.moved_keys = moved_keys + self._selector = selector + self._transformation = transformation + self._moved_keys = keys_to_move + self.calculator = calculator + self.calculator_grad = gradients + self.calculator_use_native_system = use_native_system + self.transformed_leys = None + self._moved_keys_names = None + self._initial_keys = None + self.tensor_map = None + self._initial_keys_names = None + self.selected_tensor = None + if ( - (self.transformation is not None) - and (self.transformation != "keys_to_samples") - and (self.transformation != "keys_to_properties") + (self._transformation is not None) + and (self._transformation != "keys_to_samples") + and (self._transformation != "keys_to_properties") ): raise ValueError( "`transformation` parameter should be either `keys_to_samples`," - f" either `keys_to_properties`, got {self.transformation}" + f" either `keys_to_properties`, got {self._transformation}" ) - if (self.transformation is None) and (self.moved_keys is not None): + if (self._transformation is None) and (self._moved_keys is not None): raise ValueError("unable to shift keys: unknown transformation type") def _copy(self, tensor_map): @@ -55,77 +87,79 @@ def _copy(self, tensor_map): blocks.append(block.copy()) return TensorMap(tensor_map.keys, blocks) - def keys_definition(self): + def _keys_definition(self): """This is another internal function that performs two main tasks. First, it converts all moved_keys to the same format. What is meant is that further we need the names of the keys we are going to move, as well as the 'TensorMap' keys, which will be passed to the compute function as a reference at the end. This function stores the names of the moved keys in the 'moved_keys_names' array, - and stores the keys of the final TensorMap reference in 'final_keys'. + and stores the keys of the final TensorMap reference in 'transformed_leys'. """ # the first 2 cases are simple - we either copy the moved_keys directly, # or create an array based on them, and simply take all the keys passed # in the fit TensorMap step as the final keys. - if isinstance(self.moved_keys, str): - self.moved_keys_names = [self.moved_keys.copy()] - self.final_keys = self._old_keys - elif isinstance(self.moved_keys, list): - self.moved_keys_names = self.moved_keys.copy() - self.final_keys = self._old_keys + if isinstance(self._moved_keys, str): + self._moved_keys_names = [self.moved_keys] + self.transformed_leys = self._initial_keys + elif isinstance(self._moved_keys, list): + self._moved_keys_names = self._moved_keys.copy() + self.transformed_leys = self._initial_keys else: + assert isinstance(self._moved_keys, Labels) + # The third case is a little more complicated. # First, we save the names of the moved keys, # taking them from Labels 'moved_keys'. - self.moved_keys_names = self.moved_keys.names + self._moved_keys_names = self._moved_keys.names names = [] new_keys = [] # Let's write down the order of the keys we will have during the # course of the algorithm in the 'names' names.extend(self.tensor_map.keys.names) - names.extend(self.moved_keys_names) + names.extend(self._moved_keys_names) # Now let's generate reference TensorMap keys. They will consist of # two parts - those keys that were left after transformation, and # those keys that were in the values of the variable moved_keys. # Go through them and create all possible combinations of these # parts. for key in self.tensor_map.keys: - for value in self.moved_keys: + for value in self._moved_keys: clue = [k.copy() for k in key] clue.extend(value) new_keys.append(clue) # The keys have been listed in random order, let's arrange them and - # store the values in 'final_keys'. + # store the values in 'transformed_leys'. indices = [] - for key in self._old_keys_names: + for key in self._initial_keys_names: indices.append(names.index(key)) ordered_keys = [] for el in new_keys: key = [el[i] for i in indices] ordered_keys.append(key) - self.final_keys = Labels( - names=self._old_keys_names, values=np.array(ordered_keys) + self.transformed_leys = Labels( + names=self._initial_keys_names, values=np.array(ordered_keys) ) def _mover(self, tensor_map): # Internal function that does the transformation of the reference # Tensormap. - self._old_keys = tensor_map.keys - self._old_keys_names = tensor_map.keys.names + self._initial_keys = tensor_map.keys + self._initial_keys_names = tensor_map.keys.names tensor_copy = self._copy(tensor_map) - if self.transformation is not None: - if self.transformation == "keys_to_samples": - tensor_copy.keys_to_samples(self.moved_keys) - elif self.transformation == "keys_to_properties": - tensor_copy.keys_to_properties(self.moved_keys) + if self._transformation is not None: + if self._transformation == "keys_to_samples": + tensor_copy.keys_to_samples(self._moved_keys) + elif self._transformation == "keys_to_properties": + tensor_copy.keys_to_properties(self._moved_keys) return tensor_copy - def properties_selection(self): + def _properties_selection(self): # This function selects properties according to a preset algorithm # within each 'TensorMap' block blocks = [] for _, block in self.tensor_map: - mask = self.selector.fit(block.values).get_support() + mask = self._selector.fit(block.values).get_support() selected_properties = block.properties[mask] blocks.append( TensorBlock( @@ -141,36 +175,43 @@ def properties_selection(self): self.selected_tensor = TensorMap(self.tensor_map.keys, blocks) - def fit(self, tensor_map): + def fit(self, reference_frames): """The fit function tells the transformer which attributes to use when creating new representations. Parameters: ----------- - :param tensor_map: reference TensorMap, with which transformations are - carried out and in which properties are selected. + :param reference_frames: reference frames, with which representation + and then transformations are carried out and in which properties + are selected. """ + tensor_map = self.calculator.compute( + systems=reference_frames, + gradients=self.calculator_grad, + use_native_system=self.calculator_use_native_system, + ) self.tensor_map = self._mover(tensor_map) - self.keys_definition() - self.properties_selection() + self._keys_definition() + self._properties_selection() - def transform(self, frames, calculator): + def transform(self, frames): """A function that creates a TensorMap representation based on the passed frames as well as a previously performed fit. Parameters: ----------- - :param frames: list with the frames to be processed during this function. - :param calculator: calculator that will compute the representation of - the transferred frames. + :param frames: list with the frames to be processed during this + function. """ - if self.transformation is None: + if self._transformation is None: # trivial case - nothing happened, do the usual calculation. - descriptor = calculator.compute( - frames, selected_properties=self.selected_tensor + descriptor = self.calculator.compute( + systems=frames, + gradients=self.calculator_grad, + use_native_system=self.calculator_use_native_system, ) return descriptor - elif self.transformation == "keys_to_samples": + elif self._transformation == "keys_to_samples": # In the second case the situation is a bit more complicated. # Suppose we originally had a set of key names {'a', 'b', 'c'}. # We moved key 'c' to samples. We are left with blocks with keys @@ -182,9 +223,9 @@ def transform(self, frames, calculator): blocks = [] idx = [] # save the positions of the moved keys. - for key in self.moved_keys_names: - idx.append(self.final_keys.names.index(key)) - for obj in self.final_keys: + for key in self._moved_keys_names: + idx.append(self.transformed_leys.names.index(key)) + for obj in self.transformed_leys: # separate the moved keys, obtain a block based on the remainder obt_key = tuple(item for i, item in enumerate(obj) if i not in idx) if len(obt_key) == 0: @@ -200,15 +241,17 @@ def transform(self, frames, calculator): properties=block.properties, ) ) - properties_tensor = TensorMap(self.final_keys, blocks) + properties_tensor = TensorMap(self.transformed_leys, blocks) # Do the final computation - descriptor = calculator.compute( - frames, + descriptor = self.calculator.compute( + systems=frames, + gradients=self.calculator_grad, + use_native_system=self.calculator_use_native_system, selected_properties=properties_tensor, - selected_keys=self.final_keys, + selected_keys=self.transformed_leys, ) return descriptor - elif self.transformation == "keys_to_properties": + elif self._transformation == "keys_to_properties": # The third case is the most complicated. Again, let's start with a # TensorMap with {'a', 'b', 'c'} keys. Suppose we move the 'c' keys # to properties. We take the final key {a_1, b_1, c_1}. Its @@ -219,7 +262,7 @@ def transform(self, frames, calculator): # save positions of the moved keys in the properties array pos_in_prop = [] - for key in self.moved_keys_names: + for key in self._moved_keys_names: pos_in_prop.append(self.tensor_map.property_names.index(key)) idx = [] property_names = [] @@ -228,12 +271,12 @@ def transform(self, frames, calculator): if i not in pos_in_prop: property_names.append(key) # determine the positions of the moved keys in the final keys - for key in self.moved_keys_names: - idx.append(self.final_keys.names.index(key)) + for key in self._moved_keys_names: + idx.append(self.transformed_leys.names.index(key)) # in this dictionary we write a list of properties, which we will # save for each block. properties_dict = {} - for obj in self.final_keys: + for obj in self.transformed_leys: obj_tuple = tuple(item for item in obj) properties_dict[obj_tuple] = [] # running through all the keys of the transformed tensor @@ -257,7 +300,7 @@ def transform(self, frames, calculator): key_ind = 0 # put the key together from the two pieces - the one you # moved and the one you have left - for i in range(len(self.final_keys.names)): + for i in range(len(self.transformed_leys.names)): if i in idx: obt_key.append(add_key[add_key_ind]) add_key_ind += 1 @@ -269,7 +312,7 @@ def transform(self, frames, calculator): properties_dict[obt_key].append(property_initial) blocks = [] # go through the original keys to create a tensor for selection - for key in self.final_keys: + for key in self.transformed_leys: key = tuple(key) # In theory, we may find that we have not selected any property # that is correspond to this block - take this into account. @@ -287,10 +330,12 @@ def transform(self, frames, calculator): properties=properties, ) ) - properties_tensor = TensorMap(self.final_keys, blocks) - descriptor = calculator.compute( - frames, + properties_tensor = TensorMap(self.transformed_leys, blocks) + descriptor = self.calculator.compute( + systems=frames, + gradients=self.calculator_grad, + use_native_system=self.calculator_use_native_system, selected_properties=properties_tensor, - selected_keys=self.final_keys, + selected_keys=self.transformed_leys, ) return descriptor diff --git a/python/tests/transformer.py b/python/tests/properties_selector.py similarity index 78% rename from python/tests/transformer.py rename to python/tests/properties_selector.py index c1950bd22..eb1181f4c 100644 --- a/python/tests/transformer.py +++ b/python/tests/properties_selector.py @@ -3,32 +3,32 @@ import numpy as np from equistore import TensorBlock, TensorMap -from skcosmo.feature_selection import FPS as FPS_f +from skcosmo.feature_selection import FPS as FPS from rascaline.calculators import DummyCalculator -from rascaline.transformer import Transformer +from rascaline.properties_selector import PropertiesSelector from test_systems import TestSystem -class TestTransformer(unittest.TestCase): +class TestPropertiesSelector(unittest.TestCase): def test_keys_to_samples(self): system = TestSystem() calculator = DummyCalculator(cutoff=3.2, delta=2, name="") - descriptor = calculator.compute(system) - tr = Transformer( - selector=FPS_f(n_to_select=2), + tr = PropertiesSelector( + selector=FPS(n_to_select=2), + calculator=calculator, transformation="keys_to_samples", - moved_keys=["species_center"], + keys_to_move=["species_center"], ) - tr.fit(descriptor) - result = tr.transform(system, calculator) + tr.fit(system) + result = tr.transform(system) result.keys_to_samples("species_center") desc = calculator.compute(system) desc.keys_to_samples("species_center") blocks = [] for _, block in desc: - fps = FPS_f(n_to_select=2) + fps = FPS(n_to_select=2) mask = fps.fit(block.values).get_support() selected_properties = block.properties[mask] blocks.append( @@ -48,21 +48,21 @@ def test_keys_to_samples(self): def test_keys_to_properties(self): system = TestSystem() calculator = DummyCalculator(cutoff=3.2, delta=2, name="") - descriptor = calculator.compute(system) - tr = Transformer( - selector=FPS_f(n_to_select=2), + tr = PropertiesSelector( + selector=FPS(n_to_select=2), + calculator=calculator, transformation="keys_to_properties", - moved_keys=["species_center"], + keys_to_move=["species_center"], ) - tr.fit(descriptor) - result = tr.transform(system, calculator) + tr.fit(system) + result = tr.transform(system) desc = calculator.compute(system) desc.keys_to_properties("species_center") result.keys_to_properties("species_center") blocks = [] for _, block in desc: # create a separate FPS selector for each block - fps = FPS_f(n_to_select=2) + fps = FPS(n_to_select=2) mask = fps.fit(block.values).get_support() selected_properties = block.properties[mask] # put the selected features in a format rascaline can use @@ -81,8 +81,8 @@ def test_keys_to_properties(self): np.array_equal(selected_desc.block(i).values, result.block(i).values) ) - # This test uses a function which is not yet implemented in rascaline, - # so it is temporarily commented out + # # This test uses a function which is not yet implemented in rascaline, + # # so it is temporarily commented out # def test_keys_to_properties_labels(self): # system = TestSystem() # lab = Labels( @@ -90,19 +90,19 @@ def test_keys_to_properties(self): # values=np.array([[0], [1]]) # ) # calculator = DummyCalculator(cutoff=3.2, delta=2, name="") - # descriptor = calculator.compute(system) - # tr = Transformer(selector = FPS_f(n_to_select=2), + # tr = PropertiesSelector(selector=FPS(n_to_select=2), + # calculator=calculator, # transformation='keys_to_properties', - # moved_keys=lab) - # tr.fit(descriptor) - # result = tr.transform(system, calculator) + # keys_to_move=lab) + # tr.fit(system) + # result = tr.transform(system) # desc = calculator.compute(system) # desc.keys_to_properties(lab) # result.keys_to_properties(lab) # blocks=[] # for _, block in desc: # # create a separate FPS selector for each block - # fps = FPS_f(n_to_select=2) + # fps = FPS(n_to_select=2) # mask = fps.fit(block.values).get_support() # selected_properties = block.properties[mask] # # put the selected features in a format rascaline can use @@ -120,7 +120,6 @@ def test_keys_to_properties(self): # for i in range(len(selected_desc.keys)): # self.assertTrue(np.array_equal(selected_desc.block(i).values, # result.block(i).values)) - # if __name__ == "__main__": diff --git a/setup.cfg b/setup.cfg index c97c8d355..19cefcbf0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,6 +21,7 @@ package_dir = install_requires = numpy equistore @ https://github.com/lab-cosmo/equistore/archive/6ca7fa3.zip + skcosmo [options.packages.find] where = python diff --git a/tox.ini b/tox.ini index f8180b8e0..6f834aeb0 100644 --- a/tox.ini +++ b/tox.ini @@ -26,7 +26,7 @@ deps = extra_deps = ase chemfiles - + skcosmo [testenv:lint] # lint the Python code with flake8 (code linter), black (code formatter), and From 73e5d071efd1fa7e43d19e531f81e0ae37fd336c Mon Sep 17 00:00:00 2001 From: hurricane642 Date: Thu, 22 Dec 2022 18:16:23 -0800 Subject: [PATCH 3/3] add check of skmatter installation --- python/tests/properties_selector.py | 9 ++++++++- setup.cfg | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/python/tests/properties_selector.py b/python/tests/properties_selector.py index eb1181f4c..b00778843 100644 --- a/python/tests/properties_selector.py +++ b/python/tests/properties_selector.py @@ -3,7 +3,6 @@ import numpy as np from equistore import TensorBlock, TensorMap -from skcosmo.feature_selection import FPS as FPS from rascaline.calculators import DummyCalculator from rascaline.properties_selector import PropertiesSelector @@ -11,6 +10,14 @@ from test_systems import TestSystem +HAVE_SKMAT = True +try: + from skcosmo.feature_selection import FPS as FPS +except ImportError: + HAVE_SKMAT = False + + +@unittest.skipIf(not HAVE_SKMAT, "skmatter is not installed") class TestPropertiesSelector(unittest.TestCase): def test_keys_to_samples(self): system = TestSystem() diff --git a/setup.cfg b/setup.cfg index 19cefcbf0..c97c8d355 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,7 +21,6 @@ package_dir = install_requires = numpy equistore @ https://github.com/lab-cosmo/equistore/archive/6ca7fa3.zip - skcosmo [options.packages.find] where = python