Skip to content

jboes/cluster-expansion

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Neural Networks for CuPd segregation

Site enumeration

1 × 1 × 7 – all layer slab

We will begin by training a simple representation of possible slab configurations. A 1 × 1 slab, but 7 layers deep. With 7 layers, there is a single atom in the middle which will experience bulk like energetics since the furthest atoms (at the slab surfaces) are further than 6.0 Å away. Therefore, any NN trained with that cutoff should be equivalent to the energy predicted by a bulk structure.

This is an attempt to counteract the fact that previous NN have had difficulty extrapolating to larger numbers of layers.

We enumerate with EMT, which is also proving difficult to work with due to its sensitivity. Using PBC seems to help reduce some of this error, however, results are also sensitive to the lattice constant used.

from ase.lattice.surface import fcc111
from ase.db import connect
from itertools import product
from asap3 import EMT
import numpy as np

db = connect('tmp/EMT/1x1x7.db')

atoms0 = fcc111('Au', [1, 1, 7], a=3.634, vacuum=6.0)
pbc = ([True, True, False])
natoms = [a.index for a in atoms0]
metals = ['Cu', 'Pd']

nrgs = set()
for i, c in enumerate(product(metals, repeat=len(natoms))):
    atoms = atoms0.copy()
    for j, s in enumerate(natoms):
	atoms[s].symbol = c[j]

    atoms.set_calculator(EMT())
    nrg = round(atoms.get_potential_energy(), 11)

    atoms.set_calculator(None)

    if nrg not in nrgs:
	nrgs.add(nrg)
        db.write(atoms)

2 × 1 × 7 – all layer slab

from ase.lattice.surface import fcc111
from ase.db import connect
from itertools import product
from asap3 import EMT
import numpy as np

db = connect('tmp/EMT/2x1x7.db')

atoms0 = fcc111('Cu', [2, 1, 7], a=3.634, vacuum=6.0)
pbc = ([True, True, False])
natoms = [a.index for a in atoms0]
metals = ['Cu', 'Pd']

nrgs = set()
for i, c in enumerate(product(metals, repeat=len(natoms))):
    atoms = atoms0.copy()
    for j, s in enumerate(natoms):
	atoms[s].symbol = c[j]

    atoms.set_calculator(EMT())
    nrg = round(atoms.get_potential_energy(), 11)

    atoms.set_calculator(None)

    if nrg not in nrgs:
	nrgs.add(nrg)
        db.write(atoms)

r3 × r3 × 7 – all layer slab

from ase.lattice.surface import fcc111_root
from ase.db import connect
from itertools import product
from asap3 import EMT
import numpy as np

db = connect('tmp/EMT/r3xr3x7.db')

atoms0 = fcc111_root('Cu', 3, [1, 1, 7], a=3.634, vacuum=6.0)
pbc = ([True, True, False])
natoms = [a.index for a in atoms0]
metals = ['Cu', 'Pd']

nrgs = set()
for i, c in enumerate(product(metals, repeat=len(natoms))):
    atoms = atoms0.copy()
    for j, s in enumerate(natoms):
	atoms[s].symbol = c[j]

    atoms.set_calculator(EMT())
    nrg = round(atoms.get_potential_energy(), 4)

    atoms.set_calculator(None)

    if nrg not in nrgs:
	nrgs.add(nrg)
        db.write(atoms)

Compile directories

For the first instance, I use this code to assemble all of the enumerations from the 3 types of configurations included above into a single database for validation purposes.

from amp.utilities import hash_image
import numpy as np
from ase.io import read
from glob import glob
from ase.db import connect
from amp import Amp
from ase.calculators.singlepoint import SinglePointCalculator

db = connect('tmp/enum.db')

calc = Amp('networks/db0/7-7/')
calc1 = Amp('networks/db0/8-8/')

dirs = glob('tmp/EMT/*.db')
lats = np.linspace(3.634, 3.939, 5)

nrgs = set()
H = set()
for d in dirs:
    images = read(d, ':')

    nrg, calcs = [], []
    for atoms1 in images:
        for a in lats:
            atoms = atoms1.copy()
            x = a / 3.634

            delta = np.array([[x, 0, 0],
                              [0, x, 0],
                              [0, 0, x]])

            atoms.set_cell(np.dot(atoms.get_cell(), delta),
                           scale_atoms=True)

	    atoms.set_calculator(calc)
            E = atoms.get_potential_energy()
	    nrg = round(E, 5)

            hash = hash_image(atoms)
            if hash not in H and nrg not in nrgs:
                H.add(hash)
                nrgs.add(nrg)

                atoms.set_calculator(calc1)
                E1 = atoms.get_potential_energy()
                dE = (E - E1) / len(atoms)
                atoms.set_calculator(SinglePointCalculator(atoms, energy=dE))

                lat = round(a, 3)
                db.write(atoms, hash=hash, a=float(a))

db0

DFT

Next, we calculate the energy of each structure which EMT predicts to be energy unique. There are 72 in total, of a possible 128 structures. We also perform these calculations at 5 lattice constants. That of Cu and Pd, and three linearly interpolated in between.

This way, I hope to capture not only the configurations energies, but also some of the contribution of strain effects. No relaxations are performed.

from vasp import Vasp
from amp.utilities import hash_image
import numpy as np
from ase.io import read
Vasp.VASPRC['queue.walltime'] = '24:00:00'

lats = np.linspace(3.634, 3.939, 5)
images = read('tmp/EMT/1x1x7.db', ':')

nrg, calcs = [], []
for atoms1 in images:
    for a in lats:
        atoms = atoms1.copy()
        x = a / 3.634

        delta = np.array([[x, 0., 0.],
                          [0., x, 0.],
                          [0., 0., x]])

        atoms.set_cell(np.dot(atoms.get_cell(), delta),
                       scale_atoms=True)

        hash = hash_image(atoms)

        wd = 'DFT/type=CuPd-NN/surf=117/lattice={:.3f}/hash={}'.format(a, hash)
        print(wd)
        calc = Vasp(wd,
                    xc='PBE',
                    kpts=[16, 16, 1],
                    encut=400,
                    nsw=0,
                    atoms=atoms)
        nrg += [calc.potential_energy]
        calcs += [calc]
Vasp.stop_if(None in nrg)

[calc.write_db('database/CuPd.db', parser='=',
               overwrite=False, keys={'dbkey': 0})
 for calc in calcs]

To ensure that the calculations have been successfully added to the database, I check the number of calculations here.

from ase.db import connect

db = connect('database/CuPd.db')

nrgs = set()
for d in db.select():
    nrgs.add(d.energy)

print(db.count())

NN training

Here we repeat the process as above. This time, we will train to a selection of 90% of the data instead of the whole training set. This way, 10% of the data can be used for validation, which is standard practice when testing for over fitting.

from ase.db import connect
import random
import numpy as np

db = connect('database/CuPd.db')

n = db.count()
n_train = int(round(n * 0.9))
ids =  np.array(range(n)) + 1

# This will sudo-randomly select 10% of the calculations
# Which is useful for reproducing our results.
random.seed(256)
train_samples = random.sample(ids, n_train)
valid_samples = set(ids) - set(train_samples)

db.update(list(train_samples), train_set='True')
db.update(list(valid_samples), train_set='False')

Here we will train two separate frameworks of NN.

from amp import Amp
from ase.db import connect
from amp import SimulatedAnnealing
from amp.descriptor import Gaussian
from amp.regression import NeuralNetwork
import os
import shutil

images = []
db = connect('database/CuPd.db')
for d in db.select('train_set=True'):
    atoms = d.toatoms()
    del atoms.constraints
    images += [atoms]

# These are so quick to finish, I run them in series.
frameworks = [(7, 7), (8, 8)]

for s in frameworks:
    if os.path.exists('networks/db0/{}-{}/'.format(*s)):
	shutil.rmtree('networks/db0/{}-{}/'.format(*s))
	os.makedirs('networks/db0/{}-{}/'.format(*s))
    else:
	os.makedirs('networks/db0/{}-{}/'.format(*s))

    calc = Amp(label='networks/db0/{}-{}/'.format(*s),
	       dblabel='networks/db0/',
	       descriptor=Gaussian(cutoff=6.0),
	       regression=NeuralNetwork(hiddenlayers=s))

    calc.train(images=images,
	       data_format='db',
	       cores=4,
	       energy_goal=1e-3,
	       force_goal=None,
	       global_search=SimulatedAnnealing(temperature=100,
						steps=50),
	       extend_variables=False)

Analysis and predictions

Now that we have 2 fully trained NN’s, we can use them on the EMT enumerations to validate which structures are well fit, and which are not. To do this, we first add the energy predictions of each NN to the temporary database of EMT enumerations.

from amp.utilities import hash_image
import numpy as np
from ase.io import read
from glob import glob
from ase.db import connect
from amp import Amp
from ase.calculators.singlepoint import SinglePointCalculator

db = connect('tmp/enum.db')

calc = Amp('networks/db0/8-8/')
calc1 = Amp('networks/db0/7-7/')

with connect('tmp/enum.db') as db0:
    for d in db.select():
        atoms = d.toatoms()

	atoms.set_calculator(calc)
	E = atoms.get_potential_energy()

	atoms.set_calculator(calc1)
	E1 = atoms.get_potential_energy()
	dE = (E - E1) / len(atoms)
	atoms.set_calculator(SinglePointCalculator(atoms, energy=dE))

	db0.write(atoms, hash=d.hash, a=d.a)

To determine which structures of the full set of EMT enumerations to train next, we need to determine a cutoff energy which will sample the structures with the worst agreement between NN predictions. This is done manually via guess and check at the moment.

from ase.db import connect
import matplotlib.pyplot as plt
import numpy as np

db = connect('tmp/enum.db')

E = []
for d in db.select():
    E += [abs(d.energy)]

cut = 0.06
E = np.array(E)

dE = len(E[E >  cut])
print('{} structures with error greater than {:.0f} meV/atom'.format(dE, cut*1e3))

fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(E, bins=np.arange(0, 0.15, 0.01))
ax.set_xlabel('Difference of neural networks (eV/atom)')
ax.set_ylabel('Frequency')
plt.tight_layout()
plt.savefig('./images/db0-nn-diff.png')

./images/db0-nn-diff.png

We want to include about 1000 configurations for the first instance of the training. A cutoff of 0.06 eV/atom gives 1184 structures which is close enough for our purposes.

db1

DFT

Based on the analysis from above, it is apparent that there isn’t nearly enough data yet to make an accurate NN. Here we utilize the existing NN frameworks to determine the most poorly predicted structures. Of the ≈ 1000 most poorly predicted structures, we perform DFT calculations at the same 5 lattice constants as above.

from ase.db import connect
import numpy as np
from vasp import Vasp
from amp.utilities import hash_image
Vasp.VASPRC['queue.walltime'] = '24:00:00'

db0 = connect('database/CuPd.db')
H = set([d.hash for d in db0.select()])

db = connect('tmp/enum.db')
d = np.array([[abs(_.energy), _.natoms, _.hash, _.a, _.toatoms()]
              for _ in db.select()]).T
data = np.array([_[d[0] >  0.06] for _ in d[1:]]).T

calcs, nrg = [], []
for n, hash, a, atoms in data:
    if hash not in H:

        # All eunmerations of the 1x1x7 structure are already included.

	if int(n) == 14:
            Vasp.VASPRC['queue.ppn'] = 2
	    wd = 'DFT/type=CuPd-NN/surf=217/lattice={:.3f}/hash={}'.format(a, hash)
	    kpts = [8, 16, 1]
	elif int(n) == 21:
            Vasp.VASPRC['queue.ppn'] = 4
	    wd = 'DFT/type=CuPd-NN/surf=r3r37/lattice={:.3f}/hash={}'.format(a, hash)
	    kpts = [10, 10, 1]

	calc = Vasp(wd,
		    xc='PBE',
		    kpts=kpts,
		    encut=400,
		    nsw=0,
		    atoms=atoms)
	calc.set_memory()
	E = calc.get_potential_energy()
	if E:
	    calcs += [calc]

[calc.write_db('database/CuPd.db', parser='=',
               overwrite=False, keys={'dbkey': 1})
 for calc in calcs]
from ase.db import connect

db = connect('database/CuPd.db')

nrgs = set()
for d in db.select():
    nrgs.add(d.energy)

print('Database contains {} calculations'.format(db.count()))

NN training

Here we repeat the process as performed above. However, this time we will only include 90% of the training points for training and leave the rest for validation.

from ase.db import connect
import random
import numpy as np

db = connect('database/CuPd.db')

n = db.count()
n_train = int(round(n * 0.9))
ids =  np.array(range(n)) + 1

random.seed(256)
train_samples = random.sample(ids, n_train)
valid_samples = set(ids) - set(train_samples)

db.update(list(train_samples), train_set=True)
db.update(list(valid_samples), train_set=False)

Now, we create a new framework for the next instance of the database.

from amp import Amp
from ase.db import connect
from amp import SimulatedAnnealing
from amp.descriptor import Gaussian
from amp.regression import NeuralNetwork
import os
import shutil

images = []
db = connect('database/CuPd.db')
for d in db.select('train_set=True'):
    atoms = d.toatoms()
    del atoms.constraints
    images += [atoms]

# These are so quick to finish, I run them in series.
frameworks = [(2, 2), (3, 3)]

for s in frameworks:
    if os.path.exists('networks/db1/{}-{}/'.format(*s)):
	shutil.rmtree('networks/db1/{}-{}/'.format(*s))
	os.makedirs('networks/db1/{}-{}/'.format(*s))
    else:
	os.makedirs('networks/db1/{}-{}/'.format(*s))

    calc = Amp(label='networks/db1/{}-{}/'.format(*s),
	       dblabel='../',
	       descriptor=Gaussian(cutoff=6.0),
	       regression=NeuralNetwork(hiddenlayers=s))

    calc.train(images=images,
	       data_format='db',
	       cores=4,
	       energy_goal=1e-3,
	       force_goal=None,
	       global_search=SimulatedAnnealing(temperature=100,
						steps=50),
	       extend_variables=False)

Analysis and predictions

from amp.utilities import hash_image
import numpy as np
from ase.io import read
from glob import glob
from ase.db import connect
from amp import Amp
from ase.calculators.singlepoint import SinglePointCalculator

db = connect('tmp/enum.db')

calc = Amp('networks/db1/2-2/')
calc1 = Amp('networks/db1/3-3/')

with connect('tmp/enum-db1.db') as db0:
    for d in db.select():
        atoms = d.toatoms()

	atoms.set_calculator(calc)
	E = atoms.get_potential_energy()

	atoms.set_calculator(calc1)
	E1 = atoms.get_potential_energy()
	dE = (E - E1) / len(atoms)
	atoms.set_calculator(SinglePointCalculator(atoms, energy=dE))

	db0.write(atoms, hash=d.hash, a=d.a)
from ase.db import connect
import matplotlib.pyplot as plt
import numpy as np

db = connect('tmp/enum-db1.db')

E = []
for d in db.select():
    E += [abs(d.energy)]

cut = 0.01
E = np.array(E)

dE = len(E[E >  cut])
print('{} structures with error greater than {:.0f} meV/atom'.format(dE, cut*1e3))

fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(E, bins=np.arange(0, 0.02, 0.001))
ax.set_xlabel('Difference of neural networks (eV/atom)')
ax.set_ylabel('Frequency')
plt.tight_layout()
plt.savefig('./images/db1-nn-diff.png')

./images/db1-nn-diff.png

The fitting is already quite good (I am targeting residuals below ≈ 0.01 ev/atom). Using 0.01 eV/atom as the next cutoff gives 567 structures which are “poorly predicted” by both frameworks, so we will repeat the process for these structures.

db2

DFT

From this point on, the methods are the same as above. We are simply repeating the process until the desired level of convergence is obtained.

from ase.db import connect
import numpy as np
from vasp import Vasp
from amp.utilities import hash_image
Vasp.VASPRC['queue.walltime'] = '24:00:00'

db0 = connect('database/CuPd.db')
H = set([d.hash for d in db0.select()])

db = connect('tmp/enum-db1.db')
d = np.array([[abs(_.energy), _.natoms, _.hash, _.a, _.toatoms()]
              for _ in db.select()]).T
data = np.array([_[d[0] >  0.01] for _ in d[1:]]).T

calcs, nrg = [], []
for n, hash, a, atoms in data:
    if hash not in H:

        # All eunmerations of the 1x1x7 structure are already included.

	if int(n) == 14:
            Vasp.VASPRC['queue.ppn'] = 2
	    wd = 'DFT/type=CuPd-NN/surf=217/lattice={:.3f}/hash={}'.format(a, hash)
	    kpts = [8, 16, 1]
	elif int(n) == 21:
            Vasp.VASPRC['queue.ppn'] = 4
	    wd = 'DFT/type=CuPd-NN/surf=r3r37/lattice={:.3f}/hash={}'.format(a, hash)
	    kpts = [10, 10, 1]

	calc = Vasp(wd,
		    xc='PBE',
		    kpts=kpts,
		    encut=400,
		    nsw=0,
		    atoms=atoms)
	calc.set_memory()
	E = calc.get_potential_energy()
	if E:
	    calcs += [calc]

[calc.write_db('database/CuPd.db', parser='=',
               overwrite=False, keys={'dbkey': 2})
 for calc in calcs]
from ase.db import connect

db = connect('database/CuPd.db')

nrgs = set()
for d in db.select():
    nrgs.add(d.energy)

print('Database contains {} calculations'.format(db.count()))

NN training

from ase.db import connect
import random
import numpy as np

db = connect('database/CuPd.db')

n = db.count()
n_train = int(round(n * 0.9))
ids =  np.array(range(n)) + 1

random.seed(256)
train_samples = random.sample(ids, n_train)
valid_samples = set(ids) - set(train_samples)

db.update(list(train_samples), train_set=True)
db.update(list(valid_samples), train_set=False)

Now, we create a new framework for the next instance of the database.

from amp import Amp
from ase.db import connect
from amp import SimulatedAnnealing
from amp.descriptor import Gaussian
from amp.regression import NeuralNetwork
import os
import shutil

images = []
db = connect('database/CuPd.db')
for d in db.select('train_set=True'):
    atoms = d.toatoms()
    del atoms.constraints
    images += [atoms]

# These are so quick to finish, I run them in series.
frameworks = [(2, 2), (3, 3)]

for s in frameworks:
    if os.path.exists('networks/db2/{}-{}/'.format(*s)):
	shutil.rmtree('networks/db2/{}-{}/'.format(*s))
	os.makedirs('networks/db2/{}-{}/'.format(*s))
    else:
	os.makedirs('networks/db2/{}-{}/'.format(*s))

    calc = Amp(label='networks/db2/{}-{}/'.format(*s),
	       dblabel='networks/db2/',
	       descriptor=Gaussian(cutoff=6.0),
	       regression=NeuralNetwork(hiddenlayers=s))

    calc.train(images=images,
	       data_format='db',
	       cores=4,
	       energy_goal=1e-3,
	       force_goal=None,
	       global_search=SimulatedAnnealing(temperature=100,
						steps=50),
	       extend_variables=False)

Analysis and predictions

from amp.utilities import hash_image
import numpy as np
from ase.io import read
from glob import glob
from ase.db import connect
from amp import Amp
from ase.calculators.singlepoint import SinglePointCalculator

db = connect('tmp/enum-db1.db')

calc = Amp('networks/db2/2-2/')
calc1 = Amp('networks/db2/3-3/')

with connect('tmp/enum-db2.db') as db0:
    for d in db.select():
        atoms = d.toatoms()

	atoms.set_calculator(calc)
	E = atoms.get_potential_energy()

	atoms.set_calculator(calc1)
	E1 = atoms.get_potential_energy()
	dE = (E - E1) / len(atoms)
	atoms.set_calculator(SinglePointCalculator(atoms, energy=dE))

	db0.write(atoms, hash=d.hash, a=d.a)
from ase.db import connect
import matplotlib.pyplot as plt
import numpy as np

db = connect('tmp/enum-db2.db')

E = []
for d in db.select():
    E += [abs(d.energy)]

cut = 0.005
E = np.array(E)

dE = len(E[E >  cut])
print('{} structures with error greater than {:.0f} meV/atom'.format(dE, cut*1e3))

fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(E, bins=np.arange(0, 0.02, 0.001))
ax.set_xlabel('Difference of neural networks (eV/atom)')
ax.set_ylabel('Frequency')
plt.tight_layout()
plt.savefig('./images/db2-nn-diff.png')

./images/db2-nn-diff.png

After only 2 training instances, there is incredible agreement between both NN.

Monte-Carlo simulations

Here is an example code for performing MC simulations for the annealing process of a 10 × 10 × 15 slab.

Base MC code

Copy this code to a file called “CEMC.py” in the same directory as this file. You can also do this by running M-x org-babel-tangle in emacs.

import numpy as np
import random
from ase.units import kB
from ase.db import connect
from ase.calculators.neighborlist import NeighborList
from ase.calculators.singlepoint import SinglePointCalculator as SPC

def main(atoms, dbname, T=800, steps=20000):

    db = connect(dbname)

    # Setting up variables for grand canonical MC
    symbols = atoms.get_chemical_symbols()
    sym = list(set(symbols))
    chem_bins = {_: [] for _ in sym}

    for i, s in enumerate(symbols):
	chem_bins[s] += [i]

    # Ensure sym1 has the lower concentration
    if len(chem_bins[sym[0]]) > len(chem_bins[sym[1]]):
	sym.reverse()

    # Calculate the initial energy and store it
    nrg = atoms.get_potential_energy()

    # Write the initial configuration
    if db.count() == 0:
        dummy = atoms.copy()
        dummy.set_calculator(SPC(atoms, energy=nrg))
        db.write(dummy)

    # Construct a Neighbors list
    r = atoms.get_distance(0, 1) / np.sqrt(2) / 1.5
    nl = NeighborList([r]*len(atoms),
                      self_interaction=False,
                      bothways=True)

    # Perform MC steps
    attempt, success = 0, 0
    while success < steps:

        ind1 = None
        while ind1 is None:
            # First, choose a random index from sym[0]
            random.shuffle(chem_bins[sym[0]])
            ind0 = chem_bins[sym[0]][-1]

            # Calculate nearest neighbors
            nl.update(atoms)
	    indices, _ = nl.get_neighbors(ind0)

            # Determine if sym2 neighbors exist and choose one
	    sym1_neighbors = [i for i in indices
			      if atoms[i].symbol == sym[1]]
            if sym1_neighbors:
                ind1 = random.sample(sym1_neighbors, 1)[0]

        # Create new atoms object to test
        new_atoms = atoms.copy()
        new_atoms.set_calculator(atoms.get_calculator())

        # Update the atoms object
        new_atoms[ind0].symbol, new_atoms[ind1].symbol = sym[1], sym[0]

        # Calculate the energy of the new system
        new_nrg = new_atoms.get_potential_energy()

        # Determine if lower than previous energy
        if new_nrg < nrg:
            atoms = new_atoms
            nrg = new_nrg
	    chem_bins[sym[1]][-1] = ind0
	    chem_bins[sym[0]][-1] = ind1

            dummy = atoms.copy()
            dummy.set_calculator(SPC(atoms, energy=nrg))
            db.write(dummy)
            success += 1

        elif np.exp(-(new_nrg - nrg) / (kB * T)) > np.random.rand():
            atoms = new_atoms
            nrg = new_nrg
	    chem_bins[sym[1]][-1] = ind0
	    chem_bins[sym[0]][-1] = ind1

            dummy = atoms.copy()
            dummy.set_calculator(SPC(atoms, energy=nrg))
            db.write(dummy)
            success += 1

        attempt += 1

    return success/attempt

Running the simulation

Next, we can perform the MC simulation by calling the earlier code that we wrote to “CEMC.py”.

Here is an example MC simulation at 1000 K and a bulk composition of 50:50 CuPd.

from amp import Amp
from ase.lattice.surface import fcc111
from scipy.interpolate import interp1d
import numpy as np
from CEMC import main as CEMC

# Simulation temperature
T = 1000

# Simulation bulk composition
x = 0.5

lat = interp1d([0, 1], [3.634, 3.939])

# Define a dummy slab
atoms = fcc111('Pd', size=(10, 10, 15), vacuum=6.0, a=lat(x))
atoms.set_pbc([1, 1, 0])

# Randomly populate Cu
samp = np.random.choice(range(len(atoms)), len(atoms)*x, replace=False)
for i in samp:
    atoms[i].symbol = 'Cu'

# Attach the calculator
calc = Amp('../networks/db2/3-3/checkpoint-parameters.json')
atoms.set_calculator(calc)

CEMC(atoms, dbname='x0.5-1000K.db', T=1000)

Training with smaller number of input nodes

Here is an example where we change the default symmetry functions for Amp (G functions). This will reduce the number of input nodes to 4 for each element (8 total) for the usual default of 40 for a binary alloy. This should substantially increase the speed of our neural network. However, this will also likely come at the cost of some reduced accuracy.

For our final NN calculation it is also ideal to use the forces to train our network (shown in the second example). This will allow us a more accurate fit without actually performing more DFT calculations.

Energy only

from amp import Amp
from ase.db import connect
from amp import SimulatedAnnealing
from amp.descriptor import Gaussian
from amp.regression import NeuralNetwork
import os
import shutil

elements = ['Cu', 'Pd']
G = {}
for element0 in elements:

    # Radial symmetry functions.
    etas = [1., 10.]
    _G = [{'type': 'G2', 'element': element, 'eta': eta}
	  for eta in etas
	  for element in elements]

    G[element0] = _G

images = []
db = connect('database/CuPd.db')
for d in db.select('train_set=True'):
    atoms = d.toatoms()
    del atoms.constraints
    images += [atoms]

# This defines a framework of 2 hidden layers with 3 nodes each.
s = (len(G['Cu'])*2, 3, 3)

if os.path.exists('networks/db3/{}-{}-{}/'.format(*s)):
    shutil.rmtree('networks/db3/{}-{}-{}/'.format(*s))
    os.makedirs('networks/db3/{}-{}-{}/'.format(*s))
else:
    os.makedirs('networks/db3/{}-{}-{}/'.format(*s))

calc = Amp(label='networks/db3/{}-{}-{}/'.format(*s),
           dblabel='networks/db3/',
	   descriptor=Gaussian(cutoff=6.0, Gs=G),
	   regression=NeuralNetwork(hiddenlayers=s[1:]))

calc.train(images=images,
	   data_format='db',
	   cores=4,
	   energy_goal=1e-3,
	   force_goal=None,
	   global_search=SimulatedAnnealing(temperature=100,
					    steps=50),
	   extend_variables=False)

Force training

from amp import Amp
from ase.db import connect
from amp import SimulatedAnnealing
from amp.descriptor import Gaussian
from amp.regression import NeuralNetwork
import os
import shutil

elements = ['Cu', 'Pd']
G = {}
for element0 in elements:

    # Radial symmetry functions.
    etas = [1., 10.]
    _G = [{'type': 'G2', 'element': element, 'eta': eta}
	  for eta in etas
	  for element in elements]

    G[element0] = _G

images = []
db = connect('database/CuPd.db')
for d in db.select('train_set=True'):
    atoms = d.toatoms()
    del atoms.constraints
    images += [atoms]

# This defines a framework of 2 hidden layers with 3 nodes each.
s = (len(G['Cu'])*2, 3, 3)

if os.path.exists('networks/db3/{}-{}-{}-f/'.format(*s)):
    shutil.rmtree('networks/db3/{}-{}-{}-f/'.format(*s))
    os.makedirs('networks/db3/{}-{}-{}-f/'.format(*s))
else:
    os.makedirs('networks/db3/{}-{}-{}-f/'.format(*s))

calc = Amp(label='networks/db3/{}-{}-{}-f/'.format(*s),
           dblabel='networks/db3/',
	   descriptor=Gaussian(cutoff=6.0, Gs=G),
	   regression=NeuralNetwork(hiddenlayers=s[1:]))

calc.train(images=images,
	   data_format='db',
	   cores=1,
	   energy_goal=1e-3,
	   force_goal=1e-2,
	   global_search=SimulatedAnnealing(temperature=100,
					    steps=50),
	   extend_variables=False)

About

CuPd cluster expansion calculations

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published