openforcefield · lilyminium · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/openff/evaluator/_tests/test_properties/test_enthalpy.py b/openff/evaluator/_tests/test_properties/test_enthalpy.py
@@ -0,0 +1,359 @@
+<<<<<<< HEAD
+import os
+
+=======
+import json
+
+>>>>>>> 223075d (fix tests)
+import pathlib
+import shutil
+
+import numpy as np
+import pytest
+from openff.toolkit.topology import Topology
+from openff.toolkit.typing.engines.smirnoff import ForceField
+from openff.units import unit
+from openff.utilities.utilities import get_data_dir_path
+from openmm.openmm import System as OpenMMSystem
+
+from openff.evaluator.datasets import PropertyPhase
+from openff.evaluator.forcefield import SmirnoffForceFieldSource
+from openff.evaluator.properties import EnthalpyOfMixing
+from openff.evaluator.protocols import analysis
+from openff.evaluator.substances import Component, MoleFraction, Substance
+from openff.evaluator.thermodynamics import ThermodynamicState
+
+<<<<<<< HEAD
+=======
+from openff.evaluator.utils.serialization import TypedJSONEncoder
+from openff.evaluator.workflow import Workflow
+
+>>>>>>> 223075d (fix tests)
+from openff.evaluator.utils import graph
+from openff.evaluator.utils.observables import Observable
+from openff.evaluator.workflow import Workflow
+
+
+def _write_dummy_trajectory_file(directory):
+    """Write a dummy trajectory file to disk.
+    This gets around a check in simulation resumption
+    that requires a trajectory file to exist and not be empty.
+    """
+
+    directory = pathlib.Path(directory)
+    trajectory = directory / "trajectory.dcd"
+    with trajectory.open("w") as file:
+        file.write("dummy")
+
+
+def _get_dummy_enthalpy_of_mixing(substance):
+    """Generate a dummy EnthalpyOfMixing object for testing."""
+    thermodynamic_state = ThermodynamicState(
+        temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere
+    )
+    return EnthalpyOfMixing(
+        thermodynamic_state=thermodynamic_state,
+        phase=PropertyPhase.Liquid,
+        substance=substance,
+        value=10.0 * unit.kilojoules_per_mole,
+        uncertainty=1.0 * unit.kilojoules_per_mole,
+    )
+
+
+def _write_force_field(force_field: str = "openff-2.0.0.offxml"):
+    """
+    Write a force field file to disk.
+    """
+    ff = ForceField(force_field)
+    with open("force-field.json", "w") as file:
+        file.write(SmirnoffForceFieldSource.from_object(ff).json())
+
+
+def _generate_dummy_observable(name):
+    """Generate fake observable data for calculating with mole fractions"""
+    observable = analysis.AverageObservable(name)
+    observable.value = Observable(
+        value=unit.Measurement(
+            value=10 * unit.kilojoules_per_mole, error=1 * unit.kilojoules_per_mole
+        )
+    )
+    return observable
+
+
+class TestEnthalpyOfMixing:
+
+    @pytest.mark.parametrize(
+        "input_mole_fractions, output_mole_fractions",
+        [
+            [(0.5, 0.5), (0.5, 0.5)],
+<<<<<<< HEAD
+            [(0.1037, 0.8963), (0.104, 0.896)],
+            [(0.1111, 0.2222, 0.1667), (0.222, 0.444, 0.334)],
+        ],
+=======
+            [(0.1037, 0.8963), (0.10, 0.90)],
+        ]
+>>>>>>> 223075d (fix tests)
+    )
+    def test_mole_fractions(self, input_mole_fractions, output_mole_fractions, tmpdir):
+        """
+        This test *only* checks the part where mole fractions are weighted
+        """
+        # build our enthalpy of mixing target
+        default_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=100)
+        workflow_schema = default_schema.workflow_schema
+        workflow_schema.replace_protocol_types(
+            {"BaseBuildSystem": "BuildSmirnoffSystem"}
+        )
+        possible_smiles = ["O", "OCCN(CCO)CCO", "CO", "CCO"]
+
+        substance = Substance()
+        n_components = len(input_mole_fractions)
+        for i in range(n_components):
+            substance.add_component(
+                Component(smiles=possible_smiles[i]),
+                MoleFraction(input_mole_fractions[i]),
+            )
+
+        physical_property = _get_dummy_enthalpy_of_mixing(substance)
+        with tmpdir.as_cwd():
+            here = pathlib.Path(".")
+
+            # generate force field and metadata
+            _write_force_field()
+            metadata = Workflow.generate_default_metadata(
+                physical_property, "force-field.json"
+            )
+            uuid = "4000"
+
+            # generate workflow
+            workflow = Workflow.from_schema(
+                workflow_schema, metadata=metadata, unique_id=uuid
+            )
+            workflow_graph = workflow.to_graph()
+            protocol_graph = workflow_graph._protocol_graph
+            parent_outputs = []
+            for name, protocol in workflow_graph.protocols.items():
+                if "build" in name:
+                    build_path = name.replace("|", "_")
+                    output = protocol_graph._execute_protocol(
+                        build_path,
+                        protocol,
+                        True,
+                        available_resources=None,
+                        safe_exceptions=True,
+                    )
+                    # keep the output JSON files to pass in as input to mole fraction protocols
+                    parent_outputs.append(output)
+
+            for i in range(n_components):
+                # generate fake data for the conditional groups
+                base = f"extract_observable_component_{i}"
+                path = (
+                    here
+                    / f"{uuid}_conditional_group_component_{i}"
+                    / f"{uuid}_{base}"
+                    / f"{uuid}|{base}.json"
+                )
+                path.parent.mkdir(parents=True, exist_ok=True)
+                name = f"extract_observable_component_{i}"
+                observable = _generate_dummy_observable(name)
+                with path.open("w") as file:
+                    file.write(
+                        json.dumps(
+                            {
+                                ".value": observable.value,
+                                ".time_series_statistics": observable.time_series_statistics,
+                            },
+                            cls=TypedJSONEncoder,
+                        )
+                    )
+                parent_outputs.append((f"{uuid}|{base}", str(path)))
+
+                # now check mole fraction protocol execution
+                cg = workflow_graph.protocols[f"{uuid}|conditional_group_component_{i}"]
+                wmf = cg.protocols[f"{uuid}|weight_by_mole_fraction_{i}"]
+                wmf_path = (
+                    here
+                    / f"{uuid}_conditional_group_component_{i}"
+                    / f"{uuid}_weight_by_mole_fraction_{i}"
+                )
+                protocol_graph._execute_protocol(
+                    str(wmf_path),
+                    wmf,
+                    True,
+                    *parent_outputs,
+                    available_resources=None,
+                    safe_exceptions=True,
+                )
+
+                # check mole fraction substance directly
+                assert np.isclose(
+                    wmf.full_substance.amounts[possible_smiles[i] + "{solv}"][0].value,
+                    output_mole_fractions[i],
+                )
+
+                # check weighted value
+                assert np.isclose(
+                    wmf.weighted_value.value.m_as(unit.kilojoules_per_mole),
+                    output_mole_fractions[i] * 10,
+                )
+
+<<<<<<< HEAD
+=======
+
+
+>>>>>>> 223075d (fix tests)
+    def test_expected_output_from_production_simulation(self, tmpdir):
+        """
+        This is an integration test of sorts,
+        constructed to test expected mole fractions. See Issue #575.
+
+        This test steps through the EnthalpyOfMixing process.
+        Some data is provided to avoid re-simulating long trajectories:
+
+        - energy minimisations and equilibrations
+        - the production simulation observables
+        - the decorrelated trajectory and observables
+
+        Note: the test is expected to take a few minutes to run.
+
+        TODO: add further tests on earlier steps (e.g. parameterisation)
+
+        """
+        # locate our saved test data
+        data_directory = pathlib.Path(
+            get_data_dir_path(
+                "test/example_properties/dhmix_triethanolamine", "openff.evaluator"
+            )
+        )
+
+        default_schema = EnthalpyOfMixing.default_simulation_schema(n_molecules=1000)
+        workflow_schema = default_schema.workflow_schema
+        workflow_schema.replace_protocol_types(
+            {"BaseBuildSystem": "BuildSmirnoffSystem"}
+        )
+        for schema in workflow_schema.protocol_schemas:
+            # set conditional protocols to match the test data
+            if "conditional" in schema.id:
+                for protocol_name, protocol in schema.protocol_schemas.items():
+                    if "simulation" in protocol_name:
+                        protocol.inputs[".steps_per_iteration"] = 10000000
+                        protocol.inputs[".output_frequency"] = 2000
+                        protocol.inputs[".checkpoint_frequency"] = 10
+
+        # build the enthalpy of mixing target
+        substance = Substance()
+        substance.add_component(Component(smiles="O"), MoleFraction(0.5098))
+        substance.add_component(Component(smiles="OCCN(CCO)CCO"), MoleFraction(0.4902))
+
+        physical_property = _get_dummy_enthalpy_of_mixing(substance)
+
+        metadata = Workflow.generate_default_metadata(
+            physical_property, str(data_directory / "force-field.json")
+        )
+        uuid = "6421"
+        workflow = Workflow.from_schema(
+            workflow_schema, metadata=metadata, unique_id=uuid
+        )
+
+        abs_path = data_directory.resolve()
+        with tmpdir.as_cwd():
+            tmp_path = pathlib.Path(".")
+            # copy data files over from data_directory
+            for path in abs_path.iterdir():
+                if path.name.startswith(uuid):
+                    dest_dir = tmp_path / path.name
+                    shutil.copytree(path, dest_dir)
+
+            # write dummy trajectory files
+            for suffix in ["component_0", "component_1", "mixture"]:
+                _write_dummy_trajectory_file(
+                    tmp_path
+                    / f"{uuid}_conditional_group_{suffix}"
+                    / f"{uuid}_production_simulation_{suffix}"
+                )
+
+            result = workflow.execute()
+
+            # manually check mole fractions
+            substance0 = workflow.protocols[
+                f"{uuid}|build_coordinates_component_0"
+            ].output_substance
+            substance1 = workflow.protocols[
+                f"{uuid}|build_coordinates_component_1"
+            ].output_substance
+            substance_mix = workflow.protocols[
+                f"{uuid}|build_coordinates_mixture"
+            ].output_substance
+
+            assert len(substance0.amounts) == 1
+            assert len(substance1.amounts) == 1
+            assert len(substance_mix.amounts) == 2
+
+            assert np.isclose(substance0.amounts[r"O{solv}"][0].value, 1)
+            assert np.isclose(substance1.amounts[r"OCCN(CCO)CCO{solv}"][0].value, 1)
+            assert np.isclose(substance_mix.amounts[r"O{solv}"][0].value, 0.51)
+            assert np.isclose(substance_mix.amounts[r"OCCN(CCO)CCO{solv}"][0].value, 0.49)
+
+            # check assignment and parameterization
+            system0 = workflow.protocols[
+                f"{uuid}|assign_parameters_component_0"
+            ].parameterized_system
+            system1 = workflow.protocols[
+                f"{uuid}|assign_parameters_component_1"
+            ].parameterized_system
+            system_mix = workflow.protocols[
+                f"{uuid}|assign_parameters_mixture"
+            ].parameterized_system
+
+            for system in [system0, system1, system_mix]:
+                assert isinstance(system.topology, Topology)
+                assert system.topology.n_molecules == 1000
+                assert isinstance(system.system, OpenMMSystem)
+
+            # check cg:= conditional_group. This is a group of protocols
+            cg0 = workflow.protocols[f"{uuid}|conditional_group_component_0"]
+            cg1 = workflow.protocols[f"{uuid}|conditional_group_component_1"]
+            cg_mix = workflow.protocols[f"{uuid}|conditional_group_mixture"]
+
+            # check enthalpy is correct
+            # note this is not re-simulated, but it *is* re-calculated
+            enth0 = cg0.protocols[f"{uuid}|extract_observable_component_0"].value
+            enth1 = cg1.protocols[f"{uuid}|extract_observable_component_1"].value
+            enth_mix = cg_mix.protocols[f"{uuid}|extract_observable_mixture"].value
+
+            assert np.isclose(enth0.value.m_as(unit.kilojoules_per_mole), -44.879, atol=1e-3)
+            assert np.isclose(enth1.value.m_as(unit.kilojoules_per_mole), 251.418, atol=1e-3)
+            assert np.isclose(
+                enth_mix.value.m_as(unit.kilojoules_per_mole), 98.855, atol=1e-3
+            )
+
+            # now check weighting by mole fraction
+            wmf0 = cg0.protocols[f"{uuid}|weight_by_mole_fraction_0"]
+            wmf1 = cg1.protocols[f"{uuid}|weight_by_mole_fraction_1"]
+
+            assert np.isclose(
+                wmf0.full_substance.amounts[r"O{solv}"][0].value, 0.51
+            )
+            assert np.isclose(
+                wmf1.full_substance.amounts[r"OCCN(CCO)CCO{solv}"][0].value, 0.49
+            )
+
+            assert np.isclose(
+                wmf0.weighted_value.value.m_as(unit.kilojoules_per_mole), -22.888, atol=1e-3
+            )
+            assert np.isclose(
+                wmf1.weighted_value.value.m_as(unit.kilojoules_per_mole), 123.195, atol=1e-3
+            )
+
+            # check adding the two together
+            add = workflow.protocols[f"{uuid}|add_component_observables"].result
+            assert np.isclose(
+                add.value.m_as(unit.kilojoules_per_mole), 100.307, atol=1e-3
+            )
+
+            # check final value
+            assert np.isclose(
+                result.value.value.m_as(unit.kilojoules_per_mole), -1.452, atol=1e-3
+            )
diff --git a/...ent_0/6421_extract_observable_component_0/6421|extract_observable_component_0_output.json b/...ent_0/6421_extract_observable_component_0/6421|extract_observable_component_0_output.json
@@ -0,0 +1 @@
+{".value": {"value": {"value": {"value": -44.87890629041152, "unit": "kilojoule / mole", "@type": "openff.evaluator.unit.Quantity"}, "error": {"value": 0.0059413411007539245, "unit": "kilojoule / mole", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.unit.Measurement"}, "gradients": [], "@type": "openff.evaluator.utils.observables.Observable"}, ".time_series_statistics": {"n_total_points": 5000, "n_uncorrelated_points": 2500, "statistical_inefficiency": 1.4624027898460412, "equilibration_index": 0, "@type": "openff.evaluator.utils.timeseries.TimeSeriesStatistics"}}
diff --git a/...ent_0/6421_extract_observable_component_0/6422|extract_observable_component_0_output.json b/...ent_0/6421_extract_observable_component_0/6422|extract_observable_component_0_output.json
@@ -0,0 +1 @@
+{".value": {"value": {"value": {"value": -44.87892946171502, "unit": "kilojoule / mole", "@type": "openff.evaluator.unit.Quantity"}, "error": {"value": 0.00519925052281473, "unit": "kilojoule / mole", "@type": "openff.evaluator.unit.Quantity"}, "@type": "openff.evaluator.unit.Measurement"}, "gradients": [], "@type": "openff.evaluator.utils.observables.Observable"}, ".time_series_statistics": {"n_total_points": 5000, "n_uncorrelated_points": 2500, "statistical_inefficiency": 1.4624029947304895, "equilibration_index": 0, "@type": "openff.evaluator.utils.timeseries.TimeSeriesStatistics"}}
diff --git a/...6421_production_simulation_component_0/6421|production_simulation_component_0_output.json b/...6421_production_simulation_component_0/6421|production_simulation_component_0_output.json
diff --git a/...6421_conditional_group_component_0/6421_production_simulation_component_0/checkpoint.json b/...6421_conditional_group_component_0/6421_production_simulation_component_0/checkpoint.json
@@ -0,0 +1 @@
+{"output_frequency": 2000, "checkpoint_frequency": 10, "steps_per_iteration": 10000000, "current_step_number": 10000000, "@type": "openff.evaluator.protocols.openmm.OpenMMSimulation._Checkpoint"}