Skip to content

Commit

Permalink
Script to convert DeePMD training data to N2P2 format.
Browse files Browse the repository at this point in the history
  • Loading branch information
hjjvandam committed Aug 20, 2024
1 parent 2c38d72 commit 70025b2
Showing 1 changed file with 150 additions and 0 deletions.
150 changes: 150 additions & 0 deletions deepdrivemd/sim/nwchem/n2p2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import os
# Written by Nothando Khumalo, August 16, 2024

""" This program takes the trainig data generated from nwchem and formats it as input data for
n2p2 calculations. """

def create_file(filename):
"""
Creates a new file with the given filename.
"""
with open(filename, 'w') as file:
file.write("") # Create an empty file

def write_to_file(filename, molecule_name, coord_file, type_map_file, force_file, energy_file, mol_identifier):
"""
Writes the necessary data to the file according to the provided algorithm.
"""
with open(filename, 'a') as file:
# Write the header
file.write("begin\n")
file.write(f"comment {molecule_name} ({mol_identifier})\n")
file.write("atom ")

# Read the data from input files
coords = read_coords(coord_file)
num_atoms = len(coords)
elements = read_elements(type_map_file, num_atoms)
forces = read_forces(force_file)
energies = read_energy(energy_file)

# Write the data to the file
for i in range(num_atoms):
x1, y1, z1 = coords[i]
e1 = elements[i]
fx1, fy1, fz1 = forces[i]
c1, n1 = 0.0, 0.0 # These values are not used

# Write the atom line
file.write(f"{x1} {y1} {z1} {e1} {c1} {n1} {fx1} {fy1} {fz1}\n")

# Write the energy value from the beginning of the list
file.write(f"energy {energies[0]}\n") # Use the first energy value

# Write footer
file.write("charge 0.0\n")
file.write("end\n")
print("wrote to file")

def read_coords(coord_file):
"""
Reads the coordinates from the coord.raw file and returns them as a list of tuples.
Each tuple corresponds to the (x, y, z) coordinates of an atom.
"""
coords = []
with open(coord_file, 'r') as file:
lines = file.readlines()
for line in lines:
values = list(map(float, line.split()))
for i in range(0, len(values), 3):
x, y, z = values[i], values[i+1], values[i+2]
coords.append((x, y, z))
print("coords taken")
return coords

def read_elements(type_map_file, num_atoms):
"""
Reads the element symbols from the type_map.raw file and repeats them to match the number of atoms.
The function returns a list of elements, where each element corresponds to an atom.
"""
elements = []
with open(type_map_file, 'r') as file:
# Read all element symbols from the file (assuming they are space-separated on a single line)
element_symbols = file.read().split()

# Repeat or slice the element symbols to match the number of atoms
for i in range(num_atoms):
elements.append(element_symbols[i % len(element_symbols)])
print("element read")
#print(elements)
return elements

def read_forces(force_file):
"""
Reads the force values from the force.raw file and returns them as a list of tuples.
Each tuple corresponds to the (fx, fy, fz) forces acting on an atom.
"""
forces = []
with open(force_file, 'r') as file:
lines = file.readlines()
for line in lines:
values = list(map(float, line.split()))
for i in range(0, len(values), 3):
fx, fy, fz = values[i], values[i+1], values[i+2]
forces.append((fx, fy, fz))
print("forces acquuired")
return forces

def read_energy(energy_file):
"""
Reads the energy values from the energy.raw file and returns them as a list of floats.
"""
energies = []
with open(energy_file, 'r') as file:
lines = file.readlines()
for line in lines:
energy = float(line.strip())
energies.append(energy)
print("energyyyyy")
return energies

def find_molecule_folders(directory='.'):
"""
Finds folders with names starting with 'training_mol_' in the specified directory.
Returns a list of tuples (folder_path, molecule_identifier).
"""
folders = []
for entry in os.listdir(directory):
if entry.startswith('training_mol_') and os.path.isdir(os.path.join(directory, entry)):
mol_identifier = entry[len('training_mol_'):] # Extract the part after 'training_mol_'
folder_path = os.path.join(directory, entry)
folders.append((folder_path, mol_identifier))
print(folders)
print("training_mol folders found")
return folders

def generate_n2p2_test_files_for_all_folders():
"""
Finds all relevant folders and generates n2p2 test files for each.
"""
print("going through files")
folders = find_molecule_folders()
for folder_path, mol_identifier in folders:
molecule_name = mol_identifier
output_filename = os.path.join(folder_path, f"{molecule_name}_input.data")
coord_file = os.path.join(folder_path, "coord.raw")
type_map_file = os.path.join(folder_path, "type_map.raw")
force_file = os.path.join(folder_path, "force.raw")
energy_file = os.path.join(folder_path, "energy.raw")

generate_n2p2_test_file(output_filename, molecule_name, coord_file, type_map_file, force_file, energy_file, mol_identifier)

def generate_n2p2_test_file(output_filename, molecule_name, coord_file, type_map_file, force_file, energy_file, mol_identifier):
"""
Generates the n2p2 test file by calling the necessary functions.
"""
create_file(output_filename)
write_to_file(output_filename, molecule_name, coord_file, type_map_file, force_file, energy_file, mol_identifier)

# Run the script for all folders
sample = generate_n2p2_test_files_for_all_folders()

0 comments on commit 70025b2

Please sign in to comment.