PR: ADD YARP-XTB to PYTEST/Github-Workflow!

now you can run the whole xtb-level of YARP with Pytest/Github-Workflow (halfway testing)!
Savoie-Research-Group · Nov 5, 2024 · 4e496c4 · 4e496c4
2 parents c4d0646 + abb7d22
commit 4e496c4
Show file tree

Hide file tree

Showing 68 changed files with 8,520 additions and 2 deletions.
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -17,14 +17,44 @@ jobs:
     - name: Install dependencies
       run: |
         #python -m pip install --upgrade pip
+        #conda create --name ENV python=3.9
+        #conda init
+        #conda activate ENV
+        #cd /opt/hostedtoolcache/Python/3.9.20/x64/bin/; ls
         if [ -f pytest_requirements.txt ]; then 
           pip install -r pytest_requirements.txt; 
         fi
-        pip install numpy
+        #pip install pytest-executable
+        #pip install pipdeptree
+        #pip install dask
+        export CONDA_ALWAYS_YES="true"
+        pip install 'numpy==1.24.0'
         pip install rdkit
         pip install scipy
+        pip install xtb
+        pip install joblib
+        pip install PyYAML
+        pip install openbabel-wheel
+        pip install pandas
+        pip install ase
+        #pip install xtb
+        conda install -y conda-forge::xtb
+        pip install xgboost
+        pip install h5py
+        pip install pysisyphus
+        conda install -y conda-forge::crest
+        unset CONDA_ALWAYS_YES
         pip install .
 
+        # CHECK
+        pip freeze
+
+        # Prepare pysis env
+        cp .pysisyphusrc /home/runner/.pysisyphusrc
+        cond=asd
+        sed -i "s|CONDA_ENV|${CONDA}|g" /home/runner/.pysisyphusrc
+        #which crest
+
         #if [ -f env.yaml ]; then 
         #  #conda install --file env.yaml
         #  conda env create -f env.yaml
@@ -33,4 +63,22 @@ jobs:
     - name: Run pytest
       run: |
               #conda activate yarp
-               cd examples/; pytest -s # Assuming your tests are in the 'tests' directory
+              #cd examples/; pytest -s # Assuming your tests are in the 'tests' directory
+               home=$(pwd)
+               cd /opt/hostedtoolcache/Python/3.9.20/x64/bin/
+               ls
+               echo $CONDA
+               cd $CONDA/bin
+               ls
+               which pip;    echo "GOOD1"
+               echo $(pwd)
+               $CONDA/bin/xtb --version
+               $CONDA/bin/crest --version
+               pysis --version
+               #obabel --version
+               cd $home/pyTEST_Example/;
+               # xtb and crest are in conda bin, others in pip bin
+               sed -i "s|CONDA_PATH|${CONDA}|g" test_rxn.py
+               pytest -s
+
+      shell: bash
diff --git a/.pysisyphusrc b/.pysisyphusrc
@@ -0,0 +1,16 @@
+[orca]
+# ORCA needs the full path to its binary, so please provide the full path.
+#cmd=/sw/pkgs/lsa/orca/5.0.3/bin/orca
+
+[psi4]
+# As the Psi4 installation without conda is, to put it slightly, tricky it was
+# decided to allow the installation of Psi4 into a separate conda environment.
+# pysisyphus then creates a Psi4 input and sends it to the (bash)-script given below
+# that accepts/expects one argument. It is the responsibility of the scrip to activate
+# the appropriate conda environment and submit the Psi4 input. An example runpsi4.sh
+# script is given below.
+#cmd=/user/johannes/bin/runpsi4.sh
+
+[xtb]
+# Cmd to execute. Please ensure that xtb is on your $PATH.
+cmd=CONDA_ENV/bin/xtb
diff --git a/pyTEST_Example/ML.py b/pyTEST_Example/ML.py
@@ -0,0 +1,40 @@
+from typing import Dict, List                                                                                                                                                                         
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+
+elem_to_num = {'H':1, 'C':6, 'N':7, 'O':8}
+
+class EnsembledModel(nn.Module):
+    def __init__(self, models: List, x=['coord', 'numbers', 'charge'], out=['energy'], detach=False):
+        super().__init__()
+        self.models = nn.ModuleList(models)
+        self.x = x
+        self.out = out
+        self.detach = detach
+
+    def forward(self, data: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        res : List[Dict[str, Tensor]] = []
+        for model in self.models:
+            _in = dict()
+            for k in data:
+                if k in self.x:
+                    _in[k] = data[k]
+            _out = model(_in)
+            _r = dict()
+            for k in _out:
+                if k in self.out:
+                    _r[k] = _out[k]
+                    if self.detach:
+                        _r[k] = _r[k].detach()
+            res.append(_r)
+
+        for k in res[0]:
+            v = []
+            for x in res:
+                v.append(x[k])
+            vv = torch.stack(v, dim=0)
+            data[k] = vv.mean(dim=0)
+            data[k + '_std'] = vv.std(dim=0)
+        return data
diff --git a/pyTEST_Example/TS_refinement.py b/pyTEST_Example/TS_refinement.py
@@ -0,0 +1,160 @@
+#!/bin/env python
+# Author: Hsuan-Hao Hsu ([email protected])
+import os,sys
+import numpy as np
+import yaml
+import logging
+import time
+import json
+import pickle
+import pyjokes
+import fnmatch
+from xgboost import XGBClassifier
+
+from yarp.input_parsers import xyz_parse
+from wrappers.orca import ORCA
+from wrappers.crest import CREST
+from utils import *
+from constants import Constants
+from job_submission import *
+from wrappers.gaussian import Gaussian
+from job_mapping import *
+
+# This program aims to refine the TSs (.xyz files) by DFT level.
+# We don't have any info of reactant and product.
+# Just do TS-opt and IRC calculations.
+def main(args):
+    TS_dict=dict()
+    # read TS into dictionary
+    if os.path.isfile(args["input"]):
+        E, G=xyz_parse(args["input"])
+        TS_dict[args["input"].split("/")[-1].split(".")[0]]=dict()
+        TS_dict[args["input"].split("/")[-1].split(".")[0]]["E"]=E
+        TS_dict[args["input"].split("/")[-1].split(".")[0]]["TSG"]=G
+    else:
+        xyz_files=[args["input"]+"/"+i for i in os.listdir(args["input"]) if fnmatch.fnmatch(i, "*.xyz")]
+        for i in xyz_files:
+            E, G=xyz_parse(i)
+            TS_dict[i.split("/")[-1].split(".")[0]]=dict()
+            TS_dict[i.split("/")[-1].split(".")[0]]["E"]=E
+            TS_dict[i.split("/")[-1].split(".")[0]]["TSG"]=G
+    # finish laod initial TSs into a dict
+    scratch=args["scratch"]
+    if os.path.isdir(args["scratch"]) is False: os.mkdir(args["scratch"])
+    if len(args["dft_lot"].split()) > 1: dft_lot="/".join(args["dft_lot"].split())
+    else: dft_lot=args["dft_lot"]
+    # run TS optimization
+    job_list=dict()
+    running_jobs=[]
+    for i in TS_dict.keys():
+        wf=f"{scratch}/{i}"
+        if os.path.isdir(wf) is False: os.mkdir(wf)
+        xyz_file=f"{wf}/{i}.xyz"
+        xyz_write(xyz_file, TS_dict[i]["E"], TS_dict[i]["TSG"])
+        if args["package"]=="ORCA":
+            dft_job=ORCA(input_geo=xyz_file, work_folder=wf, nproc=int(args["dft_nprocs"]), mem=int(args["mem"])*1000, jobname=f"{i}-TSOPT",\
+                         jobtype="OptTS Freq", lot=args["dft_lot"], charge=args["charge"], multiplicity=args["multiplicity"], solvent=args["solvent"],\
+                         solvation_model=args["solvation_model"], dielectric=args["dielectric"], writedown_xyz=True)
+            dft_job.generate_geometry_settings(hess=True, hess_step=int(args["hess_recalc"]))
+            dft_job.generate_input()
+            job_list[i]=dft_job
+            if dft_job.calculation_terminated_normally() is False: running_jobs.append(i)
+        elif args["package"]=="Gaussian":
+            dft_job=Gaussian(input_geo=xyz_file, work_folder=wf, nproc=int(args["dft_nprocs"]), mem=int(args["mem"])*1000, jobname=f"{i}-TSOPT",\
+                             jobtype="tsopt", lot=dft_lot, charge=args["charge"], multiplicity=args["multiplicity"], solvent=args["solvent"],\
+                             solvation_model=args["solvation_model"], dielectric=args["dielectric"], dispersion=args["dispersion"])
+            dft_job.generate_input()
+            job_list[i]=dft_job
+            if dft_job.calculation_terminated_normally() is False: running_jobs.append(i)
+    if len(running_jobs)>1:
+        n_submit=len(running_jobs)//int(args["dft_njobs"])
+        if len(running_jobs)%int(args["dft_njobs"])>0: n_submit+=1
+        startid=0
+        slurm_jobs=[]
+        startid=0
+        for i in range(n_submit):
+            slurmjob=SLURM_Job(jobname=f"TSOPT.{i}", ppn=int(args["ppn"]), partition=args["partition"], time=args["dft_wt"], mem_per_cpu=int(args["mem"]*1100))
+            endid=min(startid+int(args["dft_njobs"]), len(running_jobs))
+            if args["package"]=="ORCA": slurmjob.create_orca_jobs([job_list[ind] for ind in running_jobs[startid:endid]])
+            elif args["package"]=="Gaussian": slurmjob.create_gaussian_jobs([job_list[ind] for ind in running_jobs[startid:endid]])
+            slurmjob.submit()
+            startid=endid
+            slurm_jobs.append(slurmjob)
+        print(f"Running {len(slurm_jobs)} ts optimization jobs...")
+        monitor_jobs(slurm_jobs)
+        key=[i for i in job_list.keys()]
+        for i in key:
+            dft_opt=job_list[i]
+            if dft_opt.calculation_terminated_normally() and dft_opt.optimization_converged() and dft_opt.is_TS():
+                _, geo=dft_opt.get_final_structure()
+                if dft_lot not in TS_dict[i].keys(): TS_dict[i][dft_lot]=dict()
+                TS_dict[i][dft_lot]["geo"]=geo
+                TS_dict[i][dft_lot]["thermal"]=dft_opt.get_thermal()
+                #TS_dict[i][dft_lot]["SPE"]=dft_opt.get_energy()
+                TS_dict[i][dft_lot]["imag_mode"]=dft_opt.get_imag_freq_mode()
+    else:
+        print("No ts optimiation jobs need to be performed...")
+
+    # Finish running TS-opt jobs
+    # Prepare IRC jobs
+    job_list=dict()
+    running_jobs=[]
+    for i in TS_dict.keys():
+        wf=f"{scratch}/{i}"
+        xyz_file=f"{wf}/{i}.xyz"
+        if dft_lot not in TS_dict[i].keys(): continue
+        xyz_write(xyz_file, TS_dict[i]["E"], TS_dict[i][dft_lot]["geo"])
+        if args["package"]=="ORCA":
+            dft_job=ORCA(input_geo=xyz_file, work_folder=wf, nproc=int(args["dft_nprocs"]), mem=int(args["mem"])*1000, jobname=f"{i}-IRC",\
+                         jobtype="IRC", lot=args["dft_lot"], charge=args["charge"], multiplicity=args["multiplicity"], solvent=args["solvent"],\
+                         solvation_model=args["solvation_model"], dielectric=args["dielectric"], writedown_xyz=True)
+            dft_job.generate_geometry_settings(hess=True, hess_step=int(args["hess_recalc"]))
+            dft_job.generate_input()
+            job_list[i]=dft_job
+            if dft_job.calculation_terminated_normally() is False: running_jobs.append(i)
+        elif args["package"]=="Gaussian":
+            dft_job=Gaussian(input_geo=xyz_file, work_folder=wf, nproc=int(args["dft_nprocs"]), mem=int(args["mem"])*1000, jobname=f"{i}-IRC",\
+                             jobtype="irc", lot=dft_lot, charge=args["charge"], multiplicity=args["multiplicity"], solvent=args["solvent"],\
+                             solvation_model=args["solvation_model"], dielectric=args["dielectric"], dispersion=args["dispersion"])
+            dft_job.generate_input()
+            job_list[i]=dft_job
+            if dft_job.calculation_terminated_normally() is False: running_jobs.append(i)
+    if len(running_jobs)>1:
+        n_submit=len(running_jobs)//int(args["dft_njobs"])
+        if len(running_jobs)%int(args["dft_njobs"])>0: n_submit+=1
+        startid=0
+        slurm_jobs=[]
+        for i in range(n_submit):
+            slurmjob=SLURM_Job(jobname=f"IRC.{i}", ppn=int(args["ppn"]), partition=args["partition"], time=args["dft_wt"], mem_per_cpu=int(args["mem"]*1100))
+            endid=min(startid+int(args["dft_njobs"]), len(running_jobs))
+            if args["package"]=="ORCA": slurmjob.create_orca_jobs([job_list[ind] for ind in running_jobs[startid:endid]])
+            elif args["package"]=="Gaussian": slurmjob.create_gaussian_jobs([job_list[ind] for ind in running_jobs[startid:endid]])
+            slurmjob.submit()
+            startid=endid
+            slurm_jobs.append(slurmjob)
+        print(f"Running {len(slurm_jobs)} irc jobs...")
+        monitor_jobs(slurm_jobs)
+        key=[i for i in job_list.keys()]
+        for i in key:
+            dft_opt=job_list[i]
+            if dft_opt.calculation_terminated_normally():
+                job_success=False
+                try:
+                    E, G1, G2, TSG, barrier1, barrier2=dft_opt.analyze_IRC()
+                    job_success=True
+                except: pass
+                if job_success==True:
+                    TS_dict[i][dft_lot]["IRC"]=dict()
+                    TS_dict[i][dft_lot]["IRC"]["node"]=[G1, G2]
+                    TS_dict[i][dft_lot]["IRC"]["TS"]=TSG
+                    TS_dict[i][dft_lot]["barriers"]=[barrier2, barrier1]
+    else:
+        print("No irc jobs need to be performed...")
+    with open(args["reaction_data"], 'wb') as f:
+        pickle.dump(TS_dict, f)
+    return
+
+if __name__=="__main__":
+    parameters = sys.argv[1]
+    parameters = yaml.load(open(parameters, "r"), Loader=yaml.FullLoader)
+    main(parameters)