Skip to content

Commit

Permalink
Merge pull request #15 from usnistgov/develop
Browse files Browse the repository at this point in the history
Merge Develop
  • Loading branch information
knc6 authored Jun 20, 2021
2 parents 9626fd2 + 1c4dd78 commit 01ef933
Show file tree
Hide file tree
Showing 14 changed files with 603 additions and 28 deletions.
2 changes: 1 addition & 1 deletion alignn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Version number."""
__version__ = "2021.5.16"
__version__ = "2021.06.18"
26 changes: 24 additions & 2 deletions alignn/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
"gap pbe",
"e_form",
"e_hull",
"energy_per_atom",
"formation_energy_per_atom",
"band_gap",
"e_above_hull",
"mu_b",
"bulk modulus",
"shear modulus",
Expand All @@ -83,6 +87,14 @@
"B",
"C",
"target",
"max_efg",
"avg_elec_mass",
"avg_hole_mass",
"_oqmd_band_gap",
"_oqmd_delta_e",
"_oqmd_stability",
"edos_up",
"pdos_elast",
]


Expand All @@ -93,12 +105,21 @@ class TrainingConfig(BaseSettings):

# dataset configuration
dataset: Literal[
"dft_3d", "dft_2d", "megnet", "qm9", "user_data"
"dft_3d",
"dft_2d",
"megnet",
"megnet2",
"mp_3d_2020",
"qm9",
"user_data",
"oqmd_3d_no_cfid",
"edos_up",
"edos_pdos",
] = "dft_3d"
target: TARGET_ENUM = "formation_energy_peratom"
atom_features: Literal["basic", "atomic_number", "cfid", "cgcnn"] = "cgcnn"
neighbor_strategy: Literal["k-nearest", "voronoi"] = "k-nearest"
id_tag: Literal["jid", "id"] = "jid"
id_tag: Literal["jid", "id", "_oqmd_entry_id"] = "jid"

# logging configuration

Expand Down Expand Up @@ -129,6 +150,7 @@ class TrainingConfig(BaseSettings):
store_outputs: bool = True
progress: bool = True
log_tensorboard: bool = False
standard_scalar_and_pca: bool = False
use_canonize: bool = True
num_workers: int = 4
cutoff: float = 8.0
Expand Down
65 changes: 63 additions & 2 deletions alignn/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
import math
from jarvis.db.jsonutils import dumpjson

# from sklearn.pipeline import Pipeline
import pickle as pk
from sklearn.decomposition import PCA # ,KernelPCA
from sklearn.preprocessing import StandardScaler

# use pandas progress_apply
tqdm.pandas()

Expand Down Expand Up @@ -54,6 +59,12 @@ def load_dataset(
return d


# np.mean(mean_absolute_deviation(x,axis=0))
def mean_absolute_deviation(data, axis=None):
"""Get Mean absolute deviation."""
return np.mean(np.absolute(data - np.mean(data, axis)), axis)


def load_graphs(
df: pd.DataFrame,
name: str = "dft_3d",
Expand Down Expand Up @@ -172,9 +183,9 @@ def get_torch_dataset(
):
"""Get Torch Dataset."""
df = pd.DataFrame(dataset)
# print("df", df)
vals = df[target].values
print("data range", np.max(vals), np.min(vals))

f = open("data_range", "w")
line = "Max=" + str(np.max(vals)) + "\n"
f.write(line)
Expand Down Expand Up @@ -229,6 +240,8 @@ def get_train_val_loaders(
max_neighbors: int = 12,
classification_threshold: Optional[float] = None,
target_multiplication_factor: Optional[float] = None,
standard_scalar_and_pca=False,
output_features=1,
):
"""Help function to set up Jarvis train and val dataloaders."""
train_sample = filename + "_train.data"
Expand Down Expand Up @@ -269,6 +282,9 @@ def get_train_val_loaders(
else:
d = dataset_array

# for ii, i in enumerate(pc_y):
# d[ii][target] = pc_y[ii].tolist()

dat = []
if classification_threshold is not None:
print(
Expand All @@ -279,8 +295,17 @@ def get_train_val_loaders(
" data.",
)
print("Converting target data into 1 and 0.")
all_targets = []
for i in d:
if i[target] != "na" and not math.isnan(i[target]):
if isinstance(i[target], list): # multioutput target
all_targets.append(torch.tensor(i[target]))
dat.append(i)

elif (
i[target] is not None
and i[target] != "na"
and not math.isnan(i[target])
):
if target_multiplication_factor is not None:
i[target] = i[target] * target_multiplication_factor
if classification_threshold is not None:
Expand All @@ -295,6 +320,7 @@ def get_train_val_loaders(
type(i[target]),
)
dat.append(i)
all_targets.append(i[target])

# id_test = ids[-test_size:]
# if standardize:
Expand All @@ -318,6 +344,41 @@ def get_train_val_loaders(
dataset_val = [dat[x] for x in id_val]
dataset_test = [dat[x] for x in id_test]

if standard_scalar_and_pca:
y_data = [i[target] for i in dataset_train]
# pipe = Pipeline([('scale', StandardScaler())])
sc = StandardScaler()
sc.fit(y_data)
# pc = PCA(n_components=output_features)
# pipe = Pipeline(
# [
# ("scale", StandardScaler()),
# ("reduce_dims", PCA(n_components=output_features)),
# ]
# )
pk.dump(sc, open("sc.pkl", "wb"))
pc = PCA(n_components=40)
pc.fit(y_data)
pk.dump(pc, open("pca.pkl", "wb"))

if classification_threshold is None:
try:
from sklearn.metrics import mean_absolute_error

print("MAX val:", max(all_targets))
print("MIN val:", min(all_targets))
print("MAD:", mean_absolute_deviation(all_targets))
# Random model precited value
x_bar = np.mean(np.array([i[target] for i in dataset_train]))
baseline_mae = mean_absolute_error(
np.array([i[target] for i in dataset_test]),
np.array([x_bar for i in dataset_test]),
)
print("Baseline MAE:", baseline_mae)
except Exception as exp:
print("Data error", exp)
pass

train_data = get_torch_dataset(
dataset=dataset_train,
id_tag=id_tag,
Expand Down
4 changes: 2 additions & 2 deletions alignn/models/alignn.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class ALIGNNConfig(BaseSettings):
"""Hyperparameter schema for jarvisdgl.models.alignn."""

name: Literal["alignn"]
alignn_layers: int = 3
gcn_layers: int = 3
alignn_layers: int = 4
gcn_layers: int = 4
atom_input_features: int = 92
edge_input_features: int = 80
triplet_input_features: int = 40
Expand Down
44 changes: 44 additions & 0 deletions alignn/scripts/dataset_props.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"id-oqmd_3d_no_cfid":[
"_oqmd_band_gap",
"_oqmd_delta_e",
"_oqmd_stability"],
"id-mp_3d_2020":["energy_per_atom","formation_energy_per_atom","band_gap","e_above_hull"],
"id-megnet2":["formation_energy_per_atom"],
"jid-dft_2d":["formation_energy_peratom","optb88vdw_bandgap"],
"jid-qe_tb":["indir_gap"],
"jid-dft_3d":[
"formation_energy_peratom",
"optb88vdw_bandgap",
"optb88vdw_total_energy",
"bulk_modulus_kv",
"shear_modulus_gv",
"mbj_bandgap",
"slme",
"magmom_oszicar",
"epsx",
"spillage",
"kpoint_length_unit",
"encut",
"epsy",
"epsz",
"mepsx",
"mepsy",
"mepsz",
"max_ir_mode",
"min_ir_mode",
"n-Seebeck",
"p-Seebeck",
"n-powerfact",
"p-powerfact",
"ehull",
"exfoliation_energy",
"dfpt_piezo_max_dielectric",
"dfpt_piezo_max_eij",
"dfpt_piezo_max_dij"],
"id-polymer_genome":["gga_gap"],
"cod_id-omdb":["bandgap"],
"id-megnet":["e_form","gap pbe"],
"jid-qe_tb":["f_enp","indir_gap"]

}
73 changes: 73 additions & 0 deletions alignn/scripts/defect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import torch
from jarvis.core.atoms import Atoms
from jarvis.core.graphs import Graph
from alignn.models.alignn import ALIGNN

# from jarvis.analysis.structure.spacegroup import Spacegroup3D
from jarvis.db.figshare import get_jid_data
from jarvis.analysis.defects.vacancy import Vacancy
from jarvis.analysis.thermodynamics.energetics import unary_energy

device = "cpu"
if torch.cuda.is_available():
device = torch.device("cuda")


def atom_to_energy(atoms=None, model=None):
"""Get energy for Atoms."""
g, lg = Graph.atom_dgl_multigraph(atoms)
out_data = (
model([g.to(device), lg.to(device)])
.detach()
.cpu()
.numpy()
.flatten()
.tolist()[0]
)
return out_data


def get_defect_form_en(
jid="JVASP-1002",
model_path="JV15/jv_optb88vdw_total_energy_alignn/checkpoint_300.pt",
dataset="dft_3d",
):
"""Predict defect formation energy ???."""
model = ALIGNN()
model.load_state_dict(torch.load(model_path, map_location=device)["model"])
# model=torch.load('checkpoint_250.pt')['model']
model.to(device)
model.eval()

atoms = Atoms.from_dict(get_jid_data(jid=jid, dataset=dataset)["atoms"])
bulk_en_pa = atom_to_energy(atoms=atoms, model=model) # *atoms.num_atoms

strts = Vacancy(atoms).generate_defects(
on_conventional_cell=False, enforce_c_size=8, extend=1
)
for j in strts:
strt = Atoms.from_dict(j.to_dict()["defect_structure"])
name = (
str(jid)
+ "_"
+ str(strt.composition.reduced_formula)
+ "_"
+ j.to_dict()["symbol"]
+ "_"
+ j.to_dict()["wyckoff_multiplicity"]
)
print(name)
def_energy = atom_to_energy(atoms=strt, model=model) * strt.num_atoms
chem_pot = unary_energy(j.to_dict()["symbol"])
Ef = def_energy - (strt.num_atoms + 1) * bulk_en_pa + chem_pot
print(
j.to_dict()["symbol"],
Ef,
bulk_en_pa,
def_energy,
atoms.num_atoms,
chem_pot,
)


get_defect_form_en()
80 changes: 80 additions & 0 deletions alignn/scripts/predict_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import torch
from jarvis.core.atoms import Atoms
from jarvis.core.graphs import Graph
from alignn.models.alignn import ALIGNN
# from jarvis.analysis.structure.spacegroup import Spacegroup3D
from jarvis.db.figshare import data


model_path = "JV15/jv_optb88vdw_bandgap_alignn/checkpoint_300.pt"
device = "cpu"
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
model = ALIGNN()
model.load_state_dict(torch.load(model_path, map_location=device)["model"])
# model=torch.load('checkpoint_250.pt')['model']
model.to(device)
model.eval()


def predict_for_db(
name="polymer_genome",
prop="gga_gap",
filename="predictions.csv",
id_tag="id",
):
db = data(name)
filename = name + "_" + prop + "_v1_" + filename
f = open(filename, "w")
line = "id,original,out_data,num_atoms,formula,spacegroup_number\n"
f.write(line)
for i in db:
src = i["source_folder"]
if "vol" not in src:
atoms = Atoms.from_dict(i["atoms"])
id = i[id_tag]

g, lg = Graph.atom_dgl_multigraph(atoms)
out_data = (
model([g.to(device), lg.to(device)])
.detach()
.cpu()
.numpy()
.flatten()
.tolist()[0]
)
original = i[prop]
line = (
str(id)
+ ","
+ str(original)
+ ","
+ str(out_data)
+ ","
+ str(atoms.num_atoms)
+ ","
+ str(i["formula"])
+ ","
+ str(i["spacegroup_number"])
+ str("\n")
)
f.write(line)
# print (line)
f.close()


predict_for_db(name="qe_tb", prop="indir_gap", id_tag="jid")
"""
import pandas as pd
df=pd.read_csv('qe_tb_indir_gap_predictions.csv')
from sklearn.metrics import mean_absolute_error
original=df['original'].values
out_data=df['out_data'].values
mae=mean_absolute_error(original,out_data)
df['error']=abs(df['original']-df['out_data'])
tol=0.5
df2=(df[df['error']>tol])
print (len(df2))
"""
Loading

0 comments on commit 01ef933

Please sign in to comment.