diff --git a/README.md b/README.md index 4dd32a4..08d994d 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,11 @@ * [Funding support](#fund) -# ALIGNN (Introduction) +# ALIGNN & ALIGNN-FF (Introduction) The Atomistic Line Graph Neural Network (https://www.nature.com/articles/s41524-021-00650-1) introduces a new graph convolution layer that explicitly models both two and three body interactions in atomistic systems. This is achieved by composing two edge-gated graph convolution layers, the first applied to the atomistic line graph *L(g)* (representing triplet interactions) and the second applied to the atomistic bond graph *g* (representing pair interactions). +A unified force-field model, ALIGNN-FF (https://pubs.rsc.org/en/content/articlehtml/2023/dd/d2dd00096b ) was developed that can model both structurally and chemically diverse solids with any combination of 89 elements from the periodic table. + ![ALIGNN layer schematic](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/schematic_lg.jpg) @@ -185,35 +187,41 @@ Atomisitic line graph neural network-based FF (ALIGNN-FF) can be used to model b [ASE calculator](https://wiki.fysik.dtu.dk/ase/ase/calculators/calculators.html) provides interface to various codes. An example for ALIGNN-FF is give below. Note that there are multiple pretrained ALIGNN-FF models available, here we use the deafult_path model. As more accurate models are developed, they will be made available as well: ``` -from alignn.ff.ff import AlignnAtomwiseCalculator,default_path -model_path = default_path() -calc = AlignnAtomwiseCalculator(path=model_path) - +from alignn.ff.ff import ( + AlignnAtomwiseCalculator, + default_path, + mptraj_path, + wt01_path, +) +import matplotlib.pyplot as plt from ase import Atom, Atoms +import time +from ase.build import bulk import numpy as np import matplotlib.pyplot as plt +from ase.build import make_supercell +%matplotlib inline -lattice_params = np.linspace(3.5, 3.8) +model_path = default_path() +calc = AlignnAtomwiseCalculator(path=model_path) + +t1 = time.time() +# a = 5.43 +lattice_params = np.linspace(5.2, 5.6) fcc_energies = [] ready = True for a in lattice_params: - atoms = Atoms([Atom('Cu', (0, 0, 0))], - cell=0.5 * a * np.array([[1.0, 1.0, 0.0], - [0.0, 1.0, 1.0], - [1.0, 0.0, 1.0]]), - pbc=True) - + atoms = bulk("Si", "diamond", a=a) atoms.set_tags(np.ones(len(atoms))) atoms.calc = calc e = atoms.get_potential_energy() fcc_energies.append(e) - -import matplotlib.pyplot as plt -%matplotlib inline -plt.plot(lattice_params, fcc_energies) -plt.title('1x1x1') -plt.xlabel('Lattice constant ($\AA$)') -plt.ylabel('Total energy (eV)') +t2 = time.time() +print("Time", t2 - t1) +plt.plot(lattice_params, fcc_energies, "-o") +plt.title("Si") +plt.xlabel("Lattice constant ($\AA$)") +plt.ylabel("Total energy (eV)") plt.show() ``` diff --git a/alignn/__init__.py b/alignn/__init__.py index b99e495..7d8d745 100644 --- a/alignn/__init__.py +++ b/alignn/__init__.py @@ -1,3 +1,3 @@ """Version number.""" -__version__ = "2024.5.27" +__version__ = "2024.8.27" diff --git a/alignn/ff/all_models_ff.json b/alignn/ff/all_models_ff.json index fba44d0..981f0da 100644 --- a/alignn/ff/all_models_ff.json +++ b/alignn/ff/all_models_ff.json @@ -1 +1,15 @@ -{"v5.27.2024": "https://figshare.com/ndownloader/files/47286127", "alignnff_fmult": "https://figshare.com/ndownloader/files/41583585", "alignnff_wt10": "https://figshare.com/ndownloader/files/41583594", "alignnff_fd": "https://figshare.com/ndownloader/files/41583582", "alignnff_wt01": "https://figshare.com/ndownloader/files/41583588", "alignnff_wt1": "https://figshare.com/ndownloader/files/41583591", "fmult_mlearn_only": "https://figshare.com/ndownloader/files/41583597", "aff_Oct23": "https://figshare.com/ndownloader/files/42880573", "revised": "https://figshare.com/ndownloader/files/41583600", "scf_fd_top_10_en_42_fmax_600_wt01": "https://figshare.com/ndownloader/files/41967375", "scf_fd_top_10_en_42_fmax_600_wt10": "https://figshare.com/ndownloader/files/41967372"} \ No newline at end of file +{ + "v8.29.2024_dft_3d": "https://figshare.com/ndownloader/files/48889834", + "v8.29.2024_mpf": "https://figshare.com/ndownloader/files/48889837", + "v5.27.2024": "https://figshare.com/ndownloader/files/47286127", + "alignnff_fmult": "https://figshare.com/ndownloader/files/41583585", + "alignnff_wt10": "https://figshare.com/ndownloader/files/41583594", + "alignnff_fd": "https://figshare.com/ndownloader/files/41583582", + "alignnff_wt01": "https://figshare.com/ndownloader/files/41583588", + "alignnff_wt1": "https://figshare.com/ndownloader/files/41583591", + "fmult_mlearn_only": "https://figshare.com/ndownloader/files/41583597", + "aff_Oct23": "https://figshare.com/ndownloader/files/42880573", + "revised": "https://figshare.com/ndownloader/files/41583600", + "scf_fd_top_10_en_42_fmax_600_wt01": "https://figshare.com/ndownloader/files/41967375", + "scf_fd_top_10_en_42_fmax_600_wt10": "https://figshare.com/ndownloader/files/41967372" +} \ No newline at end of file diff --git a/alignn/ff/ff.py b/alignn/ff/ff.py index b38b0cf..8fc3571 100644 --- a/alignn/ff/ff.py +++ b/alignn/ff/ff.py @@ -126,7 +126,8 @@ def get_figshare_model_ff( def default_path(): """Get default model path.""" - dpath = get_figshare_model_ff(model_name="v5.27.2024") + dpath = get_figshare_model_ff(model_name="v8.29.2024_dft_3d") + # dpath = get_figshare_model_ff(model_name="v5.27.2024") # dpath = get_figshare_model_ff(model_name="alignnff_wt10") # dpath = get_figshare_model_ff(model_name="alignnff_fmult") # print("model_path", dpath) @@ -141,12 +142,19 @@ def revised_path(): def alignnff_fmult(): - """Get defaukt model path.""" + """Get default model path.""" dpath = get_figshare_model_ff(model_name="alignnff_fmult") print("model_path", dpath) return dpath +def mptraj_path(): + """Get MPtraj model path.""" + dpath = get_figshare_model_ff(model_name="v8.29.2024_mpf") + print("model_path", dpath) + return dpath + + def mlearn_path(): """Get model trained on mlearn path.""" dpath = get_figshare_model_ff(model_name="fmult_mlearn_only") @@ -287,6 +295,7 @@ def calculate(self, atoms, properties=None, system_changes=None): """Calculate properties.""" j_atoms = ase_to_atoms(atoms) num_atoms = j_atoms.num_atoms + # g, lg = Graph.atom_dgl_multigraph( g, lg = Graph.atom_dgl_multigraph( j_atoms, neighbor_strategy=self.config["neighbor_strategy"], @@ -295,7 +304,11 @@ def calculate(self, atoms, properties=None, system_changes=None): atom_features=self.config["atom_features"], use_canonize=self.config["use_canonize"], ) - result = self.net((g.to(self.device), lg.to(self.device))) + if self.config["model"]["alignn_layers"] > 0: + # g,lg = g + result = self.net((g.to(self.device), lg.to(self.device))) + else: + result = self.net((g.to(self.device))) # print ('stress',result["stress"].detach().numpy()) if self.force_mult_natoms: mult = num_atoms diff --git a/alignn/lmdb_dataset.py b/alignn/lmdb_dataset.py index 328395c..7a41ab7 100644 --- a/alignn/lmdb_dataset.py +++ b/alignn/lmdb_dataset.py @@ -38,11 +38,12 @@ def prepare_line_graph_batch( class TorchLMDBDataset(Dataset): """Dataset of crystal DGLGraphs using LMDB.""" - def __init__(self, lmdb_path="", ids=[]): + def __init__(self, lmdb_path="", line_graph=True, ids=[]): """Intitialize with path and ids array.""" super(TorchLMDBDataset, self).__init__() self.lmdb_path = lmdb_path self.ids = ids + self.line_graph = line_graph self.env = lmdb.open(self.lmdb_path, readonly=True, lock=False) with self.env.begin() as txn: self.length = txn.stat()["entries"] @@ -56,8 +57,12 @@ def __getitem__(self, idx): """Get sample.""" with self.env.begin() as txn: serialized_data = txn.get(f"{idx}".encode()) - graph, line_graph, label = pk.loads(serialized_data) - return graph, line_graph, label + if self.line_graph: + graph, line_graph, label = pk.loads(serialized_data) + return graph, line_graph, label + else: + graph, label = pk.loads(serialized_data) + return graph, label def close(self): """Close connection.""" @@ -70,7 +75,9 @@ def __del__(self): @staticmethod def collate(samples: List[Tuple[dgl.DGLGraph, torch.Tensor]]): """Dataloader helper to batch graphs cross `samples`.""" + # print('samples',samples) graphs, labels = map(list, zip(*samples)) + # graphs, lgs, labels = map(list, zip(*samples)) batched_graph = dgl.batch(graphs) return batched_graph, torch.tensor(labels) @@ -113,6 +120,7 @@ def get_torch_dataset( """Get Torch Dataset with LMDB.""" vals = np.array([ii[target] for ii in dataset]) # df[target].values print("data range", np.max(vals), np.min(vals)) + print("line_graph", line_graph) f = open(os.path.join(output_dir, tmp_name + "_data_range"), "w") line = "Max=" + str(np.max(vals)) + "\n" f.write(line) @@ -123,7 +131,9 @@ def get_torch_dataset( if os.path.exists(tmp_name) and read_existing: for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)): ids.append(d[id_tag]) - dat = TorchLMDBDataset(lmdb_path=tmp_name, ids=ids) + dat = TorchLMDBDataset( + lmdb_path=tmp_name, line_graph=line_graph, ids=ids + ) print("Reading dataset", tmp_name) return dat ids = [] @@ -131,7 +141,8 @@ def get_torch_dataset( with env.begin(write=True) as txn: for idx, (d) in tqdm(enumerate(dataset), total=len(dataset)): ids.append(d[id_tag]) - g, lg = Graph.atom_dgl_multigraph( + # g, lg = Graph.atom_dgl_multigraph( + g = Graph.atom_dgl_multigraph( Atoms.from_dict(d["atoms"]), cutoff=float(cutoff), max_neighbors=max_neighbors, @@ -140,6 +151,8 @@ def get_torch_dataset( use_canonize=use_canonize, cutoff_extra=cutoff_extra, ) + if line_graph: + g, lg = g label = torch.tensor(d[target]).type(torch.get_default_dtype()) # print('label',label,label.view(-1).long()) if classification: @@ -165,11 +178,16 @@ def get_torch_dataset( ).type(torch.get_default_dtype()) # labels.append(label) - serialized_data = pk.dumps((g, lg, label)) + if line_graph: + serialized_data = pk.dumps((g, lg, label)) + else: + serialized_data = pk.dumps((g, label)) txn.put(f"{idx}".encode(), serialized_data) env.close() - lmdb_dataset = TorchLMDBDataset(lmdb_path=tmp_name, ids=ids) + lmdb_dataset = TorchLMDBDataset( + lmdb_path=tmp_name, line_graph=line_graph, ids=ids + ) return lmdb_dataset diff --git a/alignn/models/alignn_atomwise.py b/alignn/models/alignn_atomwise.py index 228688b..1b15ec2 100644 --- a/alignn/models/alignn_atomwise.py +++ b/alignn/models/alignn_atomwise.py @@ -55,7 +55,9 @@ class ALIGNNAtomWiseConfig(BaseSettings): add_reverse_forces: bool = False # will make True as default soon lg_on_fly: bool = False # will make True as default soon batch_stress: bool = True + multiply_cutoff: bool = False extra_features: int = 0 + exponent: int = 3 class Config: """Configure model settings behavior.""" @@ -99,6 +101,14 @@ def cutoff_function_based_edges(r, inner_cutoff=4, exponent=3): + c2 * ratio ** (exponent + 1) + c3 * ratio ** (exponent + 2) ) + # r_cut = inner_cutoff + # r_on = inner_cutoff+1 + + # r_sq = r * r + # r_on_sq = r_on * r_on + # r_cut_sq = r_cut * r_cut + # envelope = (r_cut_sq - r_sq) + # ** 2 * (r_cut_sq + 2 * r_sq - 3 * r_on_sq)/ (r_cut_sq - r_on_sq) ** 3 return torch.where(r <= inner_cutoff, envelope, torch.zeros_like(r)) @@ -371,7 +381,6 @@ def forward( features = g.ndata["extra_features"] # print('features',features,features.shape) features = self.extra_feature_embedding(features) - g = g.local_var() result = {} @@ -381,6 +390,9 @@ def forward( r = g.edata["r"] if self.config.calculate_gradient: r.requires_grad_(True) + bondlength = torch.norm(r, dim=1) + # mask = bondlength >= self.config.inner_cutoff + # bondlength[mask]=float(1.1) if self.config.lg_on_fly and len(self.alignn_layers) > 0: # re-compute bond angle cosines here to ensure # the three-body interactions are fully included @@ -390,13 +402,26 @@ def forward( z = self.angle_embedding(lg.edata.pop("h")) # r = g.edata["r"].clone().detach().requires_grad_(True) - bondlength = torch.norm(r, dim=1) if self.config.use_cutoff_function: - bondlength = cutoff_function_based_edges( - bondlength, inner_cutoff=self.config.inner_cutoff - ) - y = self.edge_embedding(bondlength) - + # bondlength = cutoff_function_based_edges( + if self.config.multiply_cutoff: + c_off = cutoff_function_based_edges( + bondlength, + inner_cutoff=self.config.inner_cutoff, + exponent=self.config.exponent, + ).unsqueeze(dim=1) + + y = self.edge_embedding(bondlength) * c_off + else: + bondlength = cutoff_function_based_edges( + bondlength, + inner_cutoff=self.config.inner_cutoff, + exponent=self.config.exponent, + ) + y = self.edge_embedding(bondlength) + else: + y = self.edge_embedding(bondlength) + # y = self.edge_embedding(bondlength) # ALIGNN updates: update node, edge, triplet features for alignn_layer in self.alignn_layers: x, y, z = alignn_layer(g, lg, x, y, z) diff --git a/alignn/train.py b/alignn/train.py index dd3675f..a67d1db 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -232,7 +232,7 @@ def train_dgl( else: net = model - # print("net", net) + print("net parameters", sum(p.numel() for p in net.parameters())) # print("device", device) net.to(device) if use_ddp: @@ -354,7 +354,10 @@ def get_batch_errors(dat=[]): info = {} # info["id"] = jid optimizer.zero_grad() - result = net([dats[0].to(device), dats[1].to(device)]) + if (config.model.alignn_layers) > 0: + result = net([dats[0].to(device), dats[1].to(device)]) + else: + result = net(dats[0].to(device)) # info = {} info["target_out"] = [] info["pred_out"] = [] @@ -373,9 +376,12 @@ def get_batch_errors(dat=[]): # print('result["out"]',result["out"]) # print('dats[2]',dats[2]) loss1 = config.model.graphwise_weight * criterion( - result["out"], dats[2].to(device) + result["out"], + dats[-1].to(device), + # result["out"], dats[2].to(device) ) - info["target_out"] = dats[2].cpu().numpy().tolist() + info["target_out"] = dats[-1].cpu().numpy().tolist() + # info["target_out"] = dats[2].cpu().numpy().tolist() info["pred_out"] = ( result["out"].cpu().detach().numpy().tolist() ) @@ -488,7 +494,11 @@ def get_batch_errors(dat=[]): info = {} info["id"] = jid optimizer.zero_grad() - result = net([dats[0].to(device), dats[1].to(device)]) + # result = net([dats[0].to(device), dats[1].to(device)]) + if (config.model.alignn_layers) > 0: + result = net([dats[0].to(device), dats[1].to(device)]) + else: + result = net(dats[0].to(device)) # info = {} info["target_out"] = [] info["pred_out"] = [] @@ -504,9 +514,9 @@ def get_batch_errors(dat=[]): loss4 = 0 # Such as stresses if config.model.output_features is not None: loss1 = config.model.graphwise_weight * criterion( - result["out"], dats[2].to(device) + result["out"], dats[-1].to(device) ) - info["target_out"] = dats[2].cpu().numpy().tolist() + info["target_out"] = dats[-1].cpu().numpy().tolist() info["pred_out"] = ( result["out"].cpu().detach().numpy().tolist() ) @@ -647,7 +657,11 @@ def get_batch_errors(dat=[]): # print('dats[0]',dats[0]) # print('test_loader',test_loader) # print('test_loader.dataset.ids',test_loader.dataset.ids) - result = net([dats[0].to(device), dats[1].to(device)]) + # result = net([dats[0].to(device), dats[1].to(device)]) + if (config.model.alignn_layers) > 0: + result = net([dats[0].to(device), dats[1].to(device)]) + else: + result = net(dats[0].to(device)) loss1 = 0 # Such as energy loss2 = 0 # Such as bader charges loss3 = 0 # Such as forces @@ -659,9 +673,9 @@ def get_batch_errors(dat=[]): # print('result["out"]',result["out"]) # print('dats[2]',dats[2]) loss1 = config.model.graphwise_weight * criterion( - result["out"], dats[2].to(device) + result["out"], dats[-1].to(device) ) - info["target_out"] = dats[2].cpu().numpy().tolist() + info["target_out"] = dats[-1].cpu().numpy().tolist() info["pred_out"] = ( result["out"].cpu().detach().numpy().tolist() ) diff --git a/alignn/train_alignn.py b/alignn/train_alignn.py index 2e05895..16ef6ab 100644 --- a/alignn/train_alignn.py +++ b/alignn/train_alignn.py @@ -17,6 +17,7 @@ import time from jarvis.core.atoms import Atoms import random +from ase.stress import voigt_6_to_full_3x3_stress device = "cpu" if torch.cuda.is_available(): @@ -265,8 +266,15 @@ def train_for_folder( target_grad = "atomwise_grad" info["atomwise_grad"] = i[gradwise_key] # - mean_force if train_stress: - info["stresses"] = i[stresswise_key] # - mean_force + if len(i[stresswise_key]) == 6: + + stress = voigt_6_to_full_3x3_stress(i[stresswise_key]) + else: + stress = i[stresswise_key] + info["stresses"] = stress # - mean_force target_stress = "stresses" + + # print("stresses",info["stresses"] ) if "extra_features" in i: info["extra_features"] = i["extra_features"] dataset.append(info) @@ -301,7 +309,8 @@ def train_for_folder( print("Restarting the model training:", restart_model_path) if config.model.name == "alignn_atomwise": rest_config = loadjson( - restart_model_path.replace("best_model.pt", "config.json") + restart_model_path.replace("current_model.pt", "config.json") + # restart_model_path.replace("best_model.pt", "config.json") ) tmp = ALIGNNAtomWiseConfig(**rest_config["model"]) diff --git a/setup.py b/setup.py index bd71df9..8da80fa 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="alignn", - version="2024.5.27", + version="2024.8.27", author="Kamal Choudhary, Brian DeCost", author_email="kamal.choudhary@nist.gov", description="alignn",