Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling failures in elastic flow #861

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/atomate2/common/flows/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ class BaseElasticMaker(Maker, ABC):
bulk relaxation.
elastic_relax_maker : .BaseVaspMaker or .ForceFieldRelaxMaker
Maker used to generate elastic relaxations.
max_failed_deformations: int or float
Maximum number of deformations allowed to fail to proceed with the fitting
of the elastic tensor. If an int the absolute number of deformations. If
a float between 0 an 1 the maximum fraction of deformations. If None any
number of deformations allowed.
generate_elastic_deformations_kwargs : dict
Keyword arguments passed to :obj:`generate_elastic_deformations`.
fit_elastic_tensor_kwargs : dict
Expand All @@ -76,6 +81,7 @@ class BaseElasticMaker(Maker, ABC):
elastic_relax_maker: BaseVaspMaker | ForceFieldRelaxMaker = (
None # constant volume optimization
)
max_failed_deformations: int | float | None = None
generate_elastic_deformations_kwargs: dict = field(default_factory=dict)
fit_elastic_tensor_kwargs: dict = field(default_factory=dict)
task_document_kwargs: dict = field(default_factory=dict)
Expand Down Expand Up @@ -139,6 +145,7 @@ def make(
equilibrium_stress=equilibrium_stress,
order=self.order,
symprec=self.symprec if self.sym_reduce else None,
max_failed_deformations=self.max_failed_deformations,
**self.fit_elastic_tensor_kwargs,
**self.task_document_kwargs,
)
Expand Down
23 changes: 23 additions & 0 deletions src/atomate2/common/jobs/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ def fit_elastic_tensor(
fitting_method: str = SETTINGS.ELASTIC_FITTING_METHOD,
symprec: float = SETTINGS.SYMPREC,
allow_elastically_unstable_structs: bool = True,
max_failed_deformations: float | None = None,
) -> ElasticDocument:
"""
Analyze stress/strain data to fit the elastic tensor and related properties.
Expand Down Expand Up @@ -200,21 +201,42 @@ def fit_elastic_tensor(
allow_elastically_unstable_structs : bool
Whether to allow the ElasticDocument to still complete in the event that
the structure is elastically unstable.
max_failed_deformations: int or float
Maximum number of deformations allowed to fail to proceed with the fitting
of the elastic tensor. If an int the absolute number of deformations. If
a float between 0 an 1 the maximum fraction of deformations. If None any
number of deformations allowed.
"""
stresses = []
deformations = []
uuids = []
job_dirs = []
failed_uuids = []
for data in deformation_data:
# stress could be none if the deformation calculation failed
if data["stress"] is None:
failed_uuids.append(data["uuid"])
continue

stresses.append(Stress(data["stress"]))
deformations.append(Deformation(data["deformation"]))
uuids.append(data["uuid"])
job_dirs.append(data["job_dir"])

if max_failed_deformations is not None:
if 0 < max_failed_deformations < 1:
fraction_failed = len(failed_uuids) / len(deformation_data)
if fraction_failed > max_failed_deformations:
raise RuntimeError(
f"{fraction_failed} fraction of deformation calculations have "
f"failed, maximum fraction allowed: {max_failed_deformations}"
)
elif len(failed_uuids) > max_failed_deformations:
raise RuntimeError(
f"{len(failed_uuids)} deformation calculations have failed, maximum "
f"allowed: {max_failed_deformations}"
)

logger.info("Analyzing stress/strain data")

return ElasticDocument.from_stresses(
Expand All @@ -228,4 +250,5 @@ def fit_elastic_tensor(
equilibrium_stress=equilibrium_stress,
symprec=symprec,
allow_elastically_unstable_structs=allow_elastically_unstable_structs,
failed_uuids=failed_uuids,
)
20 changes: 20 additions & 0 deletions src/atomate2/common/schemas/elastic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Schemas for elastic tensor fitting and related properties."""

from copy import deepcopy
from enum import Enum
from typing import Optional

import numpy as np
Expand Down Expand Up @@ -107,6 +108,9 @@ class FittingData(BaseModel):
job_dirs: Optional[list[Optional[str]]] = Field(
None, description="The directories where the deformation jobs were run."
)
failed_uuids: Optional[list[str]] = Field(
None, description="The uuids of perturbations that were not completed"
)


class ElasticTensorDocument(BaseModel):
Expand All @@ -118,6 +122,12 @@ class ElasticTensorDocument(BaseModel):
)


class ElasticWarnings(Enum):
"""Warnings for elastic document."""

FAILED_PERTURBATIONS: str = "failed_perturbations"


class ElasticDocument(StructureMetadata):
"""Document containing elastic tensor information and related properties."""

Expand All @@ -142,6 +152,7 @@ class ElasticDocument(StructureMetadata):
order: Optional[int] = Field(
None, description="Order of the expansion of the elastic tensor."
)
warnings: Optional[list[str]] = Field(None, description="Warnings.")

@classmethod
def from_stresses(
Expand All @@ -156,6 +167,7 @@ def from_stresses(
equilibrium_stress: Optional[Matrix3D] = None,
symprec: float = SETTINGS.SYMPREC,
allow_elastically_unstable_structs: bool = True,
failed_uuids: list[str] = None,
) -> Self:
"""Create an elastic document from strains and stresses.

Expand Down Expand Up @@ -187,8 +199,11 @@ def from_stresses(
allow_elastically_unstable_structs : bool
Whether to allow the ElasticDocument to still complete in the event that
the structure is elastically unstable.
failed_uuids: list of str
The uuids of perturbations that were not completed
"""
strains = [d.green_lagrange_strain for d in deformations]
elastic_warnings = []

if symprec is not None:
strains, stresses, uuids, job_dirs = expand_strains(
Expand Down Expand Up @@ -236,6 +251,9 @@ def from_stresses(

eq_stress = eq_stress.tolist() if eq_stress is not None else eq_stress

if failed_uuids:
elastic_warnings.append(ElasticWarnings.FAILED_PERTURBATIONS.value)

return cls.from_structure(
structure=structure,
meta_structure=structure,
Expand All @@ -253,7 +271,9 @@ def from_stresses(
deformations=[d.tolist() for d in deformations],
uuids=uuids,
job_dirs=job_dirs,
failed_uuids=failed_uuids,
),
warnings=elastic_warnings or None,
)


Expand Down
6 changes: 6 additions & 0 deletions src/atomate2/forcefields/flows/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class ElasticMaker(BaseElasticMaker):
bulk_relax_maker : .ForceFieldRelaxMaker or None
A maker to perform a tight relaxation on the bulk. Set to ``None`` to skip the
bulk relaxation.
max_failed_deformations: int or float
JaGeo marked this conversation as resolved.
Show resolved Hide resolved
Maximum number of deformations allowed to fail to proceed with the fitting
of the elastic tensor. If an int the absolute number of deformations. If
a float between 0 an 1 the maximum fraction of deformations. If None any
number of deformations allowed.
elastic_relax_maker : .ForceFieldRelaxMaker
Maker used to generate elastic relaxations.
generate_elastic_deformations_kwargs : dict
Expand All @@ -69,6 +74,7 @@ class ElasticMaker(BaseElasticMaker):
relax_cell=False, relax_kwargs={"fmax": 0.00001}
)
) # constant volume relaxation
max_failed_deformations: int | float | None = None
generate_elastic_deformations_kwargs: dict = field(default_factory=dict)
fit_elastic_tensor_kwargs: dict = field(default_factory=dict)
task_document_kwargs: dict = field(default_factory=dict)
Expand Down
6 changes: 6 additions & 0 deletions src/atomate2/vasp/flows/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ class ElasticMaker(BaseElasticMaker):
bulk relaxation.
elastic_relax_maker : .BaseVaspMaker
Maker used to generate elastic relaxations.
max_failed_deformations: int or float
JaGeo marked this conversation as resolved.
Show resolved Hide resolved
Maximum number of deformations allowed to fail to proceed with the fitting
of the elastic tensor. If an int the absolute number of deformations. If
a float between 0 an 1 the maximum fraction of deformations. If None any
number of deformations allowed.
generate_elastic_deformations_kwargs : dict
Keyword arguments passed to :obj:`generate_elastic_deformations`.
fit_elastic_tensor_kwargs : dict
Expand All @@ -65,6 +70,7 @@ class ElasticMaker(BaseElasticMaker):
default_factory=lambda: DoubleRelaxMaker.from_relax_maker(TightRelaxMaker())
)
elastic_relax_maker: BaseVaspMaker = field(default_factory=ElasticRelaxMaker)
max_failed_deformations: int | float | None = None
generate_elastic_deformations_kwargs: dict = field(default_factory=dict)
fit_elastic_tensor_kwargs: dict = field(default_factory=dict)
task_document_kwargs: dict = field(default_factory=dict)
Expand Down
141 changes: 139 additions & 2 deletions tests/common/jobs/test_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

from atomate2 import SETTINGS
from atomate2.common.jobs.elastic import generate_elastic_deformations
from atomate2.common.schemas.elastic import expand_strains
from atomate2.common.jobs.elastic import (
fit_elastic_tensor,
generate_elastic_deformations,
)
from atomate2.common.schemas.elastic import ElasticWarnings, expand_strains


@pytest.mark.parametrize("conventional", [False, True])
Expand Down Expand Up @@ -54,3 +57,137 @@ def _get_strains(structure, sym_reduce):
deformations = response[job.uuid][1].output

return [d.green_lagrange_strain for d in deformations]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be good to add a test for the whole flow or at least nake sure that the parameter won't be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a test for the whole workflow is a bit problematic because it is difficult to consistently trigger the failure of one of the pertrubations. I can manually generate a test by explicitly killing vasp for one of the perturbations, but if the tests will need to be regenerated in the future it may be annoying.
It would be easier if I could modify the INCAR of only one of the perturbations, but it seems to not be possible because update_kwargs is not applied to dynamically generated jobs (see materialsproject/jobflow#588).
I thought these tests will be enough, since all the logic stays in the fit job, but if you think it is needed I can try to generate the workflow test by killing the job. Or maybe there is a better way?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. This sounds like too much work.

I think the fitting part is well tested. I was rather thinking about testing that a parameter change really arrives in the fit function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it was part of the output schema, it would be easy. 😅

Copy link
Member

@JaGeo JaGeo May 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the only missing point. As this is a rather small but still important pull request, I would in this case merge it.
Does it make sense to add the max_* to the output schema and just test if it arrives there if you set it in the wf?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I think I had partially misunderstood your previous comment. I could add max_failed_deformations to the output document, but I am not sure if it is really worth to have that information just to make the test. I will try if I can quickly make a test by explicitly killing one of the deformation jobs, if it gets too complicated I will add max_failed_deformations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I managed to add a test with the flow failure triggered by the max_failed_deformations being set to 0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome. Thank you!


def test_fit_elastic_tensor(clean_dir, si_structure, caplog):
conventional = SpacegroupAnalyzer(
si_structure
).get_conventional_standard_structure()

deformation_data = [
{
"stress": [
[15.73376749, 0.0, 0.0],
[0.0, 6.40261126, 0.0],
[0.0, 0.0, 6.40261126],
],
"deformation": [
[0.9899494936611666, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
],
"uuid": "b7715382-9130-409c-ae2d-32a01321a0d0",
"job_dir": "a",
},
{
"stress": [
[7.74111679, 0.0, 0.0],
[0.0, 3.05807413, -0.0],
[0.0, -0.0, 3.05807413],
],
"deformation": [
[0.99498743710662, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
],
"uuid": "6cd2688c-764b-4a08-80f8-5c3ed75b91b9",
"job_dir": "b",
},
{
"stress": [
[-7.9262828, 0.0, -0.0],
[0.0, -3.20998817, 0.0],
[0.0, 0.0, -3.20998817],
],
"deformation": [
[1.004987562112089, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
],
"uuid": "fc3405d7-4171-4fe6-ab1b-086378ae6d0f",
"job_dir": "c",
},
{
"stress": [
[-15.60955466, 0.0, -0.0],
[0.0, -6.14725418, 0.0],
[-0.0, 0.0, -6.14725418],
],
"deformation": [
[1.0099504938362078, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
],
"uuid": "013d1100-f5b7-493b-b4ac-894c85733c7e",
"job_dir": "d",
},
{
"stress": [
[-0.21994363, 0.0, 0.0],
[0.0, -0.1846297, 14.80836455],
[0.0, 14.80836455, 0.40782339],
],
"deformation": [
[1.0, 0.0, 0.0],
[0.0, 1.0, -0.02],
[0.0, 0.0, 0.999799979995999],
],
"uuid": "ab2857a6-188b-49a5-a90f-adfc30f884a7",
"job_dir": "e",
},
{
"stress": [
[-0.17602242, 0.0, 0.0],
[0.0, -0.16580315, 7.40412018],
[0.0, 7.40412018, -0.01771334],
],
"deformation": [
[1.0, 0.0, 0.0],
[0.0, 1.0, -0.01],
[0.0, 0.0, 0.9999499987499375],
],
"uuid": "6cee0242-2ff6-4c02-afe8-9c0e8c0e37b7",
"job_dir": "f",
},
]

job = fit_elastic_tensor(conventional, deformation_data)

response = run_locally(job, ensure_success=True)

elastic_out = response[job.uuid][1].output
assert elastic_out.fitting_data.failed_uuids == []
assert elastic_out.warnings is None
assert len(set(elastic_out.fitting_data.uuids)) == 6

# test failure
# remove one of the outputs
deformation_data[0]["stress"] = None
job = fit_elastic_tensor(conventional, deformation_data, max_failed_deformations=2)

response = run_locally(job, ensure_success=True)

elastic_out = response[job.uuid][1].output
assert elastic_out.fitting_data.failed_uuids == [deformation_data[0]["uuid"]]
assert elastic_out.warnings == [ElasticWarnings.FAILED_PERTURBATIONS.value]
assert len(set(elastic_out.fitting_data.uuids)) == 5

job = fit_elastic_tensor(conventional, deformation_data, max_failed_deformations=0)

response = run_locally(job, ensure_success=False)

assert job.uuid not in response
assert "1 deformation calculations have failed, maximum allowed: 0" in caplog.text

caplog.clear()
job = fit_elastic_tensor(
conventional, deformation_data, max_failed_deformations=0.01
)

response = run_locally(job, ensure_success=False)

assert job.uuid not in response
assert (
"666666 fraction of deformation calculations have failed, "
"maximum fraction allowed: 0.01" in caplog.text
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
ALGO = Normal
EDIFF = 1e-07
EDIFFG = -0.001
ENAUG = 1360
ENCUT = 300
GGA = Ps
IBRION = 2
ISIF = 2
ISMEAR = -5
ISPIN = 2
LAECHG = False
LASPH = True
LCHARG = False
LELF = False
LMIXTAU = True
LORBIT = 11
LREAL = False
LVTOT = True
LWAVE = False
MAGMOM = 8*-0.0
NELM = 200
NSW = 99
PREC = Accurate
SIGMA = 0.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pymatgen with grid density = 100 / number of atoms
0
Gamma
2 2 2
Loading
Loading