diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..e8abe0c7 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,24 @@ +# Check that package builds +name: Build Checks + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Install dependencies + run: >- + python -m pip install --user --upgrade setuptools wheel + - name: Build + run: >- + python setup.py sdist bdist_wheel diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5a6d24ad..e4044c6a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -5,8 +5,10 @@ name: PyPi Release on: push: - pull_request: - workflow_dispatch: + tags: + - hippynn-* + release: + types: [published] jobs: build: @@ -31,4 +33,4 @@ jobs: python setup.py sdist bdist_wheel - name: Publish distribution 📦 to PyPI if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/hippynn/databases/database.py b/hippynn/databases/database.py index b454fc1b..e9375b49 100644 --- a/hippynn/databases/database.py +++ b/hippynn/databases/database.py @@ -191,7 +191,6 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample if not self.splitting_completed: raise ValueError("Database has not yet been split.") - if split_type not in self.splits: raise ValueError(f"Split {split_type} Invalid. Current splits:{list(self.splits.keys())}") @@ -226,6 +225,51 @@ def make_generator(self, split_type, evaluation_mode, batch_size=None, subsample ) return generator + + def trim_all_arrays(self,index): + """ + To be used in conjuction with remove_high_property + """ + for key in self.arr_dict: + self.arr_dict[key] = self.arr_dict[key][index] + + def remove_high_property(self,key,perAtom,species_key=None,cut=None,std_factor=10): + """ + This function removes outlier data from the dataset + Must be called before splitting + "key": the property key in the dataset to check for high values + "perAtom": True if the property is defined per atom in axis 1, otherwise property is treated as full system + "std_factor": systems with values larger than this multiplier time the standard deviation of all data will be reomved. None to skip this step + "cut_factor": systems with values larger than this number are reomved. None to skip this step. This step is done first. + """ + if perAtom: + if species_key==None: + raise RuntimeError("species_key must be defined to trim a per atom quantity") + atom_ind = self.arr_dict[species_key] > 0 + ndim = len(self.arr_dict[key].shape) + if cut!=None: + if perAtom: + Kmean = np.mean(self.arr_dict[key][atom_ind]) + else: + Kmean = np.mean(self.arr_dict[key]) + failArr = np.abs(self.arr_dict[key]-Kmean)>cut + #This does nothing with ndim=1 + trimArr = np.sum(failArr,axis=tuple(range(1,ndim)))==0 + self.trim_all_arrays(trimArr) + + if std_factor!=None: + if perAtom: + atom_ind = self.arr_dict[species_key] > 0 + Kmean = np.mean(self.arr_dict[key][atom_ind]) + std_cut = np.std(self.arr_dict[key][atom_ind]) * std_factor + else: + Kmean = np.mean(self.arr_dict[key]) + std_cut = np.std(self.arr_dict[key]) * std_factor + failArr = np.abs(self.arr_dict[key]-Kmean)>std_cut + #This does nothing with ndim=1 + trimArr = np.sum(failArr,axis=tuple(range(1,ndim)))==0 + self.trim_all_arrays(trimArr) + def compute_index_mask(indices, index_pool): diff --git a/hippynn/graphs/nodes/physics.py b/hippynn/graphs/nodes/physics.py index 8851f6a3..28cfae6e 100644 --- a/hippynn/graphs/nodes/physics.py +++ b/hippynn/graphs/nodes/physics.py @@ -7,7 +7,7 @@ from .base.node_functions import NodeNotFound from .indexers import AtomIndexer, PaddingIndexer, acquire_encoding_padding from .pairs import OpenPairIndexer -from .tags import Encoder, PairIndexer, Charges +from .tags import Encoder, PairIndexer, Charges, Energies from .inputs import PositionsNode, SpeciesNode from ..indextypes import IdxType, index_type_coercion, elementwise_compare_reduce @@ -120,20 +120,21 @@ def expansion1(self, charges, species, *, purpose, **kwargs): @_parent_expander.match(Charges, _BaseNode, SpeciesNode) def expansion2(self, charges, pos_or_pair, species, *, purpose, **kwargs): encoder, pidxer = acquire_encoding_padding(species, species_set=None, purpose=purpose) - return charges, pos_or_pair, encoder, pidxer + return charges, pos_or_pair, pidxer - @_parent_expander.match(Charges, PositionsNode, Encoder, PaddingIndexer) - def expansion3(self, charges, positions, encoder, pidxer, *, cutoff_distance, **kwargs): + @_parent_expander.match(Charges, PositionsNode, PaddingIndexer) + def expansion3(self, charges, positions, pidxer, *, cutoff_distance, **kwargs): try: - pairfinder = find_unique_relative((charges, positions, encoder, pidxer), PairIndexer) + pairfinder = find_unique_relative((charges, positions, pidxer), PairIndexer) except NodeNotFound: warnings.warn("Boundary conditions not specified, Building open boundary conditions.") + encoder = find_unique_relative(pidxer, Encoder) pairfinder = OpenPairIndexer("PairIndexer", (positions, encoder, pidxer), dist_hard_max=cutoff_distance) return charges, pairfinder, pidxer @_parent_expander.match(Charges, PairIndexer, AtomIndexer) - def expansion4(self, charges, pairfinder, pidxer, **kwargs): - self._validate_pairfinder(pairfinder, None) + def expansion4(self, charges, pairfinder, pidxer, *, cutoff_distance, **kwargs): + self._validate_pairfinder(pairfinder, cutoff_distance) pf = pairfinder return charges, pf.pair_dist, pf.pair_first, pf.pair_second, pidxer.mol_index, pidxer.n_molecules @@ -142,10 +143,10 @@ def expansion4(self, charges, pairfinder, pidxer, **kwargs): _parent_expander.require_idx_states(IdxType.Atoms, *(None,) * 5) -class CoulombEnergyNode(ChargePairSetup, AutoKw, MultiNode): +class CoulombEnergyNode(ChargePairSetup, Energies, AutoKw, MultiNode): _input_names = "charges", "pair_dist", "pair_first", "pair_second", "mol_index", "n_molecules" - _output_names = "mol_energies", "atom_voltages" - _output_index_states = IdxType.Molecules, IdxType.Atoms + _output_names = "mol_energies", "atom_energies", "atom_voltages" + _output_index_states = IdxType.Molecules, IdxType.Atoms, IdxType.Atoms _main_output = "mol_energies" _auto_module_class = physics_layers.CoulombEnergy @@ -159,7 +160,7 @@ def _validate_pairfinder(pairfinder, cutoff_distance): if pairfinder.torch_module.hard_dist_cutoff is not None: raise ValueError( - "dist_hard_max is set to a finite value,\n" + "hard_dist_cutoff is set to a finite value,\n" "coulomb energy requires summing over the entire set of pairs" ) @@ -174,10 +175,11 @@ def __init__(self, name, parents, energy_conversion, module="auto"): super().__init__(name, parents, module=module) -class ScreenedCoulombEnergyNode(ChargePairSetup, AutoKw, SingleNode): +class ScreenedCoulombEnergyNode(ChargePairSetup, Energies, AutoKw, MultiNode): _input_names = "charges", "pair_dist", "pair_first", "pair_second", "mol_index", "n_molecules" - _output_names = "mol_energies", "atom_voltages" - _index_state = IdxType.Molecules + _output_names = "mol_energies", "atom_energies", "atom_voltages" + _output_index_states = IdxType.Molecules, IdxType.Atoms, IdxType.Atoms + _main_output = "mol_energies" _auto_module_class = physics_layers.ScreenedCoulombEnergy @staticmethod @@ -306,3 +308,41 @@ def expansion1(self, features, species, **kwargs): def __init__(self, name, parents, module="auto", **kwargs): parents = self.expand_parents(parents) super().__init__(name, parents, module=module, **kwargs) + + + +class CombineEnergyNode(Energies, AutoKw, ExpandParents, MultiNode): + """ + Combines Local atom energies from different Energy Nodes. + """ + _input_names = "input_atom_energy_1", "input_atom_energy_2", "mol_index", "n_molecules" + _output_names = "mol_energy", "atom_energies" + _main_output = "mol_energy" + _output_index_states = IdxType.Molecules, IdxType.Atoms, + _auto_module_class = physics_layers.CombineEnergy + + @_parent_expander.match(_BaseNode, Energies) + def expansion0(self, energy_1, energy_2, **kwargs): + return energy_1, energy_2.atom_energies + + @_parent_expander.match(Energies, _BaseNode) + def expansion0(self, energy_1, energy_2, **kwargs): + return energy_1.atom_energies, energy_2 + + @_parent_expander.match(_BaseNode, _BaseNode) + def expansion1(self, energy_1, energy_2, **kwargs): + pdindexer = find_unique_relative([energy_1, energy_2], AtomIndexer, why_desc="Generating CombineEnergies") + return energy_1, energy_2, pdindexer + + @_parent_expander.match(_BaseNode, _BaseNode, PaddingIndexer) + def expansion2(self, energy_1, energy_2, pdindexer, **kwargs): + return energy_1, energy_2, pdindexer.mol_index, pdindexer.n_molecules + + _parent_expander.assertlen(4) + _parent_expander.require_idx_states(IdxType.Atoms, IdxType.Atoms, None, None) + + def __init__(self, name, parents, module="auto", module_kwargs=None, **kwargs): + self.module_kwargs = {} if module_kwargs is None else module_kwargs + parents = self.expand_parents(parents, **kwargs) + super().__init__(name, parents=parents, module=module, **kwargs) + diff --git a/hippynn/graphs/nodes/targets.py b/hippynn/graphs/nodes/targets.py index 00cdc545..b666e97d 100644 --- a/hippynn/graphs/nodes/targets.py +++ b/hippynn/graphs/nodes/targets.py @@ -4,6 +4,7 @@ from .base import MultiNode, AutoKw, ExpandParents, find_unique_relative, _BaseNode from .indexers import PaddingIndexer from .tags import AtomIndexer, Network, PairIndexer, HAtomRegressor, Charges, Energies +from .indexers import PaddingIndexer from ..indextypes import IdxType, index_type_coercion from ...layers import targets as target_modules @@ -85,7 +86,7 @@ class HBondNode(ExpandParents, AutoKw, MultiNode): _output_index_states = IdxType.Pair, IdxType.Pair _input_names = "features", "pair_first", "pair_second", "pair_dist" _main_output = "bonds" - + @_parent_expander.matchlen(1) def expand0(self, features, *, purpose, **kwargs): pairfinder = find_unique_relative(features, PairIndexer, why_desc=purpose) diff --git a/hippynn/interfaces/ase_interface/calculator.py b/hippynn/interfaces/ase_interface/calculator.py index 586fdf14..b4d1f671 100644 --- a/hippynn/interfaces/ase_interface/calculator.py +++ b/hippynn/interfaces/ase_interface/calculator.py @@ -4,7 +4,6 @@ import warnings import torch -from ase.calculators import interface from ase.calculators.calculator import compare_atoms, PropertyNotImplementedError, Calculator # Calculator is required to allow HIPNN to be used with ASE Mixing Calculators from hippynn.graphs import find_relatives, find_unique_relative, get_subgraph, copy_subgraph, replace_node, GraphModule diff --git a/hippynn/interfaces/lammps_interface/mliap_interface.py b/hippynn/interfaces/lammps_interface/mliap_interface.py index db180cef..fb2858ea 100644 --- a/hippynn/interfaces/lammps_interface/mliap_interface.py +++ b/hippynn/interfaces/lammps_interface/mliap_interface.py @@ -19,6 +19,7 @@ from hippynn.graphs.nodes.tags import Encoder, PairIndexer from hippynn.graphs.nodes.physics import GradientNode, VecMag from hippynn.graphs.nodes.inputs import SpeciesNode +from hippynn.graphs.nodes.pairs import PairFilter class MLIAPInterface(MLIAPUnified): """ @@ -40,7 +41,7 @@ def __init__(self, energy_node, element_types, ndescriptors=1, # Build the calculator self.rcutfac, self.species_set, self.graph = setup_LAMMPS_graph(energy_node) self.nparams = sum(p.nelement() for p in self.graph.parameters()) - self.graph.to(torch.float64) + self.graph.to(torch.float32) def compute_gradients(self, data): pass @@ -61,7 +62,7 @@ def compute_forces(self, data): z_vals = self.species_set[elems+1] pair_i = self.as_tensor(data.pair_i).type(torch.int64) pair_j = self.as_tensor(data.pair_j).type(torch.int64) - rij = self.as_tensor(data.rij).type(torch.float64) + rij = self.as_tensor(data.rij).type(torch.float32) nlocal = self.as_tensor(data.nlistatoms) # note your sign for rij might need to be +1 or -1, depending on how your implementation works @@ -125,6 +126,7 @@ def setup_LAMMPS_graph(energy): species_set = torch.as_tensor(encoder.species_set).to(torch.int64) min_radius = max(p.dist_hard_max for p in pair_indexers) + ############################################################### # Set up graph to accept external pair indices and shifts @@ -139,13 +141,27 @@ def setup_LAMMPS_graph(energy): pair_dist = VecMag("(LAMMPS)pair_dist", in_pair_coord) new_inputs = [species,in_pair_first,in_pair_second,in_pair_coord,in_nlocal] - + + # Construct Filters and replace the existing pair indexers with the + # corresponding new (filtered) node that accepts external pairs of atoms for pi in pair_indexers: - replace_node(pi.pair_first, in_pair_first, disconnect_old=False) - replace_node(pi.pair_second, in_pair_second, disconnect_old=False) - replace_node(pi.pair_coord, in_pair_coord, disconnect_old=False) - replace_node(pi.pair_dist, pair_dist, disconnect_old=False) - pi.disconnect() + if pi.dist_hard_max == min_radius: + replace_node(pi.pair_first, in_pair_first, disconnect_old=False) + replace_node(pi.pair_second, in_pair_second, disconnect_old=False) + replace_node(pi.pair_coord, in_pair_coord, disconnect_old=False) + replace_node(pi.pair_dist, pair_dist, disconnect_old=False) + pi.disconnect() + else: + mapped_node = PairFilter( + "DistanceFilter-LAMMPS", + (pair_dist, in_pair_first, in_pair_second, in_pair_coord), + dist_hard_max=pi.dist_hard_max, + ) + replace_node(pi.pair_first, mapped_node.pair_first, disconnect_old=False) + replace_node(pi.pair_second, mapped_node.pair_second, disconnect_old=False) + replace_node(pi.pair_coord, mapped_node.pair_coord, disconnect_old=False) + replace_node(pi.pair_dist, mapped_node.pair_dist, disconnect_old=False) + pi.disconnect() energy, *new_required = new_required try: diff --git a/hippynn/layers/physics.py b/hippynn/layers/physics.py index 3917e370..fc5ef09e 100644 --- a/hippynn/layers/physics.py +++ b/hippynn/layers/physics.py @@ -74,6 +74,13 @@ def forward(self, charges, positions, mol_index, n_molecules): class CoulombEnergy(torch.nn.Module): + """ Computes the Coulomb Energy of the molecule/configuration. + + Coulomb energies is defined for pairs of atoms. Here, we adopt the + convention that the Coulomby energy for a pair of atoms is evenly + partitioned to both atoms as the 'per-atom energies'. Therefore, the + atom energies sum to the molecular energy; similar to the HEnergy. + """ def __init__(self, energy_conversion_factor): super().__init__() self.register_buffer("energy_conversion_factor", torch.tensor(energy_conversion_factor)) @@ -84,12 +91,18 @@ def forward(self, charges, pair_dist, pair_first, pair_second, mol_index, n_mole n_atoms, _ = charges.shape voltage_atom = torch.zeros((n_atoms, 1), device=charges.device, dtype=charges.dtype) voltage_atom.index_add_(0, pair_first, voltage_pairs) - coulomb_atom = voltage_atom * charges - coulomb_molecules = 0.5 * self.summer(coulomb_atom, mol_index, n_molecules) - return coulomb_molecules, voltage_atom + coulomb_atoms = 0.5*voltage_atom * charges + coulomb_molecule = self.summer(coulomb_atoms, mol_index, n_molecules) + return coulomb_molecule, coulomb_atoms, voltage_atom class ScreenedCoulombEnergy(CoulombEnergy): + """ Computes the Coulomb Energy of the molecule/configuration. + + The convention for the atom energies is the same as CoulombEnergy + and the HEnergy. + """ + def __init__(self, energy_conversion_factor, screening, radius=None): super().__init__(energy_conversion_factor) if screening is None: @@ -106,24 +119,23 @@ def __init__(self, energy_conversion_factor, screening, radius=None): self.bond_summer = pairs.MolPairSummer() def forward(self, charges, pair_dist, pair_first, pair_second, mol_index, n_molecules): - - # Calculation of pair terms - base_coulomb = charges[pair_first] * charges[pair_second] / pair_dist.unsqueeze(1) screening = self.screening(pair_dist, self.radius).unsqueeze(1) screening = torch.where((pair_dist < self.radius).unsqueeze(1), screening, torch.zeros_like(screening)) - coulomb_pairs = base_coulomb * screening - - # Add pair contributions to system - coulomb_molecule = self.bond_summer(coulomb_pairs, mol_index, n_molecules, pair_first) - # Finally, account for symmetry pairs and energy conversion factor. - coulomb_molecule = 0.5 * self.energy_conversion_factor * coulomb_molecule + # Voltage pairs for per-atom energy + voltage_pairs = self.energy_conversion_factor * (charges[pair_second] / pair_dist.unsqueeze(1)) + voltage_pairs = voltage_pairs * screening + n_atoms, _ = charges.shape + voltage_atom = torch.zeros((n_atoms, 1), device=charges.device, dtype=charges.dtype) + voltage_atom.index_add_(0, pair_first, voltage_pairs) + coulomb_atoms = 0.5 * voltage_atom * charges + coulomb_molecule = self.summer(coulomb_atoms, mol_index, n_molecules) - return coulomb_molecule + return coulomb_molecule, coulomb_atoms, voltage_atom class CombineScreenings(torch.nn.Module): - """ Returns products of different screenings for Screened Coulomb Interactions. + """ Returns products of different screenings for Screened Coulomb Interactions. """ def __init__(self, screening_list): super().__init__() @@ -131,12 +143,12 @@ def __init__(self, screening_list): def forward(self, pair_dist, radius): """ Product of different screenings applied to pair_dist upto radius. - + :param pair_dist: torch.tensor, dtype=float64: 'Neighborlist' distances for coulomb energies. - :param radius: Maximum radius that Screened-Coulomb is evaluated upto. + :param radius: Maximum radius that Screened-Coulomb is evaluated upto. :return screening: Weights for screening for all pair_dist. """ - screening = None + screening = None for s in self.SL: if screening is None: @@ -236,4 +248,29 @@ def forward(self, features, species): class VecMag(torch.nn.Module): def forward(self, vector_feature): - return torch.norm(vector_feature, dim=1).unsqueeze(1) + return torch.norm(vector_feature, dim=1) + + + + +class CombineEnergy(torch.nn.Module): + """ + Combines the energies (molecular and atom energies) from two different + nodes, e.g. HEnergy, Coulomb, or ScreenedCoulomb Energy Nodes. + """ + def __init__(self): + super().__init__() + self.summer = indexers.MolSummer() + + def forward(self, atom_energy_1, atom_energy_2, mol_index, n_molecules): + """ + :param: atom_energy_1 per-atom energy from first node. + :param: atom_energy_2 per atom energy from second node. + :param: mol_index the molecular index for atoms in the batch + :param: total number of molecules in the batch + :return: Total Energy + """ + total_atom_energy = atom_energy_1 + atom_energy_2 + mol_energy = self.summer(total_atom_energy, mol_index, n_molecules) + + return mol_energy, total_atom_energy diff --git a/hippynn/layers/targets.py b/hippynn/layers/targets.py index 4b8128d0..98077c74 100644 --- a/hippynn/layers/targets.py +++ b/hippynn/layers/targets.py @@ -67,7 +67,7 @@ def forward(self, all_features, mol_index, n_molecules): total_hier = torch.zeros_like(total_energies) mol_hier = torch.zeros_like(total_energies) total_atom_hier = torch.zeros_like(total_atomen) - batch_hier = torch.zeros(1,dtype=total_energies.dtype,device=total_energies.dtype) + batch_hier = torch.zeros(1,dtype=total_energies.dtype,device=total_energies.device) return total_energies, total_atomen, partial_sums, total_hier, total_atom_hier, mol_hier, batch_hier