Added module for creating MEP_trj files from ORCA output

davidkastner · Jul 26, 2024 · e900d4c · e900d4c
1 parent 1be22d7
commit e900d4c
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 1 deletion.
diff --git a/pyqmmm/cli.py b/pyqmmm/cli.py
@@ -207,6 +207,7 @@ def md(
 @click.option("--bond_valence", "-bv", is_flag=True, help="Replace QM optimized atoms in a pdb.")
 @click.option("--orca_scan", "-os", is_flag=True, help="Plots an ORCA scan.")
 @click.option("--orca_neb_restart", "-rneb", is_flag=True, help="Prepare to restart an ORCA NEB.")
+@click.option("--create_neb_mep", "-mep", is_flag=True, help="Creates MEP_trj from out.")
 @click.help_option('--help', '-h', is_flag=True, help='Exiting pyQMMM.')
 def qm(
     plot_energy,
@@ -217,6 +218,7 @@ def qm(
     bond_valence,
     orca_scan,
     orca_neb_restart,
+    create_neb_mep,
     ):
     """
     Functions for quantum mechanics (QM) simulations.
@@ -288,6 +290,11 @@ def qm(
         files_in_directory = [f for f in os.listdir() if f != 'delete']
         pyqmmm.qm.orca_neb_restart.move_files(files_in_directory)
 
+    if create_neb_mep:
+        import pyqmmm.qm.create_mep_trj
+        pyqmmm.qm.create_mep_trj.create_neb_mep_trj_from_out()
+
+
 if __name__ == "__main__":
     # Run the command-line interface when this script is executed
     cli()
diff --git a/pyqmmm/md/compare_distances.py b/pyqmmm/md/compare_distances.py
@@ -46,7 +46,6 @@ def get_colors(files):
 
 
 def get_plot(files):
-
     colors = get_colors(files)
     legend = []
     for index,file in enumerate(files):

diff --git a/pyqmmm/qm/create_mep_trj.py b/pyqmmm/qm/create_mep_trj.py
@@ -0,0 +1,41 @@
+import re
+
+def create_neb_mep_trj_from_out():
+    # Define the file paths
+    orca_output_file = 'orca.out'
+    output_xyz_file = 'qmscript_MEP_trj.xyz'
+
+    # Define regex patterns for extracting data
+    # Adjusted to handle one-letter and two-letter element symbols
+    coordinate_pattern = re.compile(r'(REACTANT|PRODUCT|IMAGE \d+ \((ANGSTROEM|BOHR)\))\n-+\n((?:[A-Z][a-z]?\s+-?\d+\.\d+\s+-?\d+\.\d+\s+-?\d+\.\d+\s*\n)+)')
+    energy_pattern = re.compile(r'\s+\d+\s+\S+\s+(-?\d+\.\d+)')
+
+    # Read the ORCA output file
+    with open(orca_output_file, 'r') as file:
+        orca_output = file.read()
+
+    # Extract coordinates and energies
+    coordinates = coordinate_pattern.findall(orca_output)
+    path_summary_snippet = orca_output.split('PATH SUMMARY')[1].split('---------------------------------------------------------------')[1].strip()
+    energies = energy_pattern.findall(path_summary_snippet)
+
+    # Adjust coordinates list to include REACTANT and PRODUCT properly
+    coordinates = [('REACTANT', coordinates[0][1], coordinates[0][2])] + coordinates + [('PRODUCT', coordinates[-1][1], coordinates[-1][2])]
+
+    # Ensure the number of coordinates and energies match
+    assert len(coordinates) == len(energies), "Mismatch between number of coordinates and energies."
+
+    # Write the extracted data to the new XYZ file
+    with open(output_xyz_file, 'w') as file:
+        for i, (coord_type, unit, coord_data) in enumerate(coordinates):
+            lines = coord_data.strip().split('\n')
+            num_atoms = len(lines)
+            energy = energies[i]
+            title_line = f"Coordinates from ORCA-job qmscript_MEP E {energy}"
+
+            file.write(f"{num_atoms}\n")
+            file.write(f"{title_line}\n")
+            for line in lines:
+                file.write(line + '\n')
+
+    output_xyz_file