Skip to content

Commit

Permalink
add multithreads to train
Browse files Browse the repository at this point in the history
  • Loading branch information
VScoldness committed Jan 8, 2024
1 parent 69df0c5 commit bdc42de
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 62 deletions.
7 changes: 5 additions & 2 deletions pyxtal_ff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def __init__(self, descriptors=None, model=None, logo=True):
- kernel: str (*GPR)
The kernel specifying the covariance function of the GPR.
The current development allows "RBF" and "DotProduct".
- n_thread: int
The number of thread
(*) required.
(*NN) for Neural Network algorithm only.
Expand Down Expand Up @@ -203,7 +205,7 @@ def __init__(self, descriptors=None, model=None, logo=True):
'random_seed', 'force_coefficient', 'unit', 'softmax_beta',
'restart', 'optimizer', 'path', 'order', 'd_max',
'epoch', 'device', 'alpha', 'batch_size', 'noise', 'kernel',
'norm', 'stress_coefficient', 'stress_group', 'memory']
'norm', 'stress_coefficient', 'stress_group', 'memory', 'n_thread']
for key in model.keys():
if key not in keywords:
msg = f"Don't recognize {key} in model. "+\
Expand Down Expand Up @@ -375,7 +377,8 @@ def _MODEL(self, model):
unit=_model['unit'],
restart=_model['restart'],
path=_model['path'],
memory=_model['memory'])
memory=_model['memory'],
n_thread=_model['n_thread'])
self.optimizer = _model['optimizer']

elif self.algorithm == 'PR':
Expand Down
133 changes: 73 additions & 60 deletions pyxtal_ff/models/neuralnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,65 +23,65 @@

from pyxtal_ff.models.optimizers.regressor import Regressor
from pyxtal_ff.utilities.elements import Element
from tqdm import tqdm

eV2GPa = 160.21766

class NeuralNetwork():
""" Atom-centered Neural Network model. The inputs are atom-centered
descriptors: BehlerParrinello or Bispectrum. The forward propagation of
the Neural Network predicts energy per atom, and the derivative of the
forward propagation predicts force.
A machine learning interatomic potential can developed by optimizing the
weights of the Neural Network for a given system.
Parameters
----------
elements: list
A list of atomic species in the crystal system.
hiddenlayers: list or dict
[3, 3] contains 2 layers with 3 nodes each. Each atomic species in the
crystal system is assigned with its own neural network architecture.
activation: str
The activation function for the neural network model.
Options: tanh, sigmoid, and linear.
random_seed: int
Random seed for generating random initial random weights.
batch_size: int
Determine the number of structures in a batch per optimization step.
epoch: int
A measure of the number of times all of the training vectors
are used once to update the weights.
device: str
The device used to train: 'cpu' or 'cuda'.
force_coefficient: float
This parameter is used as the penalty parameter to scale
the force contribution relative to the energy.
stress_coefficient: float
This parameter is used as the balance parameter scaling
the stress contribution relative to the energy.
stress_group: list of strings
Only the intended group will be considered in stress training,
i.e. ['Elastic'].
alpha: float
L2 penalty (regularization) parameter.
softmax_beta: float
The parameters used for Softmax Energy Penalty function.
unit: str
The unit of energy ('eV' or 'Ha').
restart: str
Continuing Neural Network training from where it was left off.
path: str
A path to the directory where everything is saved.
memory: str
There are two options: 'in' or 'out'. 'in' will use load all
descriptors to memory as 'out' will call from disk as needed.
"""
Atom-centered Neural Network model. The inputs are atom-centered
descriptors: BehlerParrinello or Bispectrum. The forward propagation of
the Neural Network predicts energy per atom, and the derivative of the
forward propagation predicts force.
A machine learning interatomic potential can developed by optimizing the
weights of the Neural Network for a given system.
Parameters
----------
elements: list
A list of atomic species in the crystal system.
hiddenlayers: list or dict
[3, 3] contains 2 layers with 3 nodes each. Each atomic species in the
crystal system is assigned with its own neural network architecture.
activation: str
The activation function for the neural network model.
Options: tanh, sigmoid, and linear.
random_seed: int
Random seed for generating random initial random weights.
batch_size: int
Determine the number of structures in a batch per optimization step.
epoch: int
A measure of the number of times all of the training vectors
are used once to update the weights.
device: str
The device used to train: 'cpu' or 'cuda'.
force_coefficient: float
This parameter is used as the penalty parameter to scale
the force contribution relative to the energy.
stress_coefficient: float
This parameter is used as the balance parameter scaling
the stress contribution relative to the energy.
stress_group: list of strings
Only the intended group will be considered in stress training,
i.e. ['Elastic'].
alpha: float
L2 penalty (regularization) parameter.
softmax_beta: float
The parameters used for Softmax Energy Penalty function.
unit: str
The unit of energy ('eV' or 'Ha').
restart: str
Continuing Neural Network training from where it was left off.
path: str
A path to the directory where everything is saved.
memory: str
There are two options: 'in' or 'out'. 'in' will use load all
descriptors to memory as 'out' will call from disk as needed.
"""
def __init__(self, elements, hiddenlayers, activation, random_seed,
batch_size, epoch, device, alpha, softmax_beta, unit,
force_coefficient, stress_coefficient, stress_group,
restart, path, memory):
restart, path, memory, n_thread: int = 1):

self.elements = sorted(elements)

Expand Down Expand Up @@ -159,7 +159,7 @@ def __init__(self, elements, hiddenlayers, activation, random_seed,
self.restart = restart
self.path = path
self.memory = memory

self.n_thread = n_thread
self.drange = None


Expand Down Expand Up @@ -220,7 +220,8 @@ def train(self, TrainData, optimizer):
print(f"Optimizer : {optimizer['method']}")
print(f"Force_coefficient : {self.force_coefficient}")
print(f"Stress_coefficient : {self.stress_coefficient}")
print(f"Batch_size : {self.batch_size}\n")
print(f"Batch_size : {self.batch_size}")
print(f"n_thread : {self.n_thread}")

# Run Neural Network Potential Training
t0 = time.time()
Expand Down Expand Up @@ -467,13 +468,9 @@ def preprocess(self, TrainData):
def calculate_loss(self, models, batch):
""" Calculate the total loss and MAE for energy and forces
for a batch of structures per one optimization step. """

output = []
# output = [cur_energy_loss, cur_force_loss, cur_stress_loss, cur_energy_mae, cur_force_mae, cur_stress_mae, n_atoms, cur_count]
for data in tqdm(batch):
cur_output = self.single_loss(models, data)
output.append(cur_output)

output = self.single_thread_loss(models, batch) if self.n_thread == 1 else self.mutli_thread_loss(models, batch)
# output = [[cur_energy_loss, cur_force_loss, cur_stress_loss, cur_energy_mae, cur_force_mae, cur_stress_mae, n_atoms, cur_count], ...]
output = self._sum_together(output)

energy_loss = output[0] / (2. * len(batch))
Expand All @@ -497,6 +494,23 @@ def calculate_loss(self, models, batch):

return energy_loss+force_loss+stress_loss, energy_mae, force_mae, stress_mae

def single_thread_loss(self, model, batch):
output = []
for cur_batch in batch:
cur_output = self.single_loss(model, cur_batch)
output.append(cur_output)
return output

def mutli_thread_loss(self, model, batch):
import concurrent.futures

with concurrent.futures.ThreadPoolExecutor(max_workers=self.n_thread) as executor:
results = [executor.submit(self.single_loss, model, cur_batch) for cur_batch in batch]
output = []
for f in concurrent.futures.as_completed(results):
output.append(f.result())
return output

@staticmethod
def _sum_together(output: list[list[float]]) -> list[float]:
res = [0 for _ in range(len(output[0]))]
Expand Down Expand Up @@ -546,7 +560,6 @@ def single_loss(self, models, data):
cur_stress_loss = sf.item()*self.stress_coefficient * ((_stress - stress) ** 2).sum()
cur_stress_mae = sf.item()*F.l1_loss(_stress, stress) * 6
cur_count = 6

return [cur_energy_loss, cur_force_loss, cur_stress_loss, cur_energy_mae, cur_force_mae, cur_stress_mae, n_atoms, cur_count]


Expand Down

0 comments on commit bdc42de

Please sign in to comment.