Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Addition of MDAnalysis RDKit converter #87

Open
wants to merge 8 commits into
base: Development_v_1.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions openmmdl/openmmdl_analysis/openmmdlanalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
renumber_atoms_in_residues,
process_pdb,
extract_and_save_ligand_as_sdf,
convert_ligand_to_smiles,
)
from openmmdl.openmmdl_analysis.rmsd_calculation import (
rmsd_for_atomgroups,
Expand Down Expand Up @@ -298,8 +297,8 @@ def main():
print("\033[1mFiles are preprocessed\033[0m")

if ligand_sdf == None:
extract_and_save_ligand_as_sdf(topology, "./lig.sdf", ligand)
ligand_sdf = "./lig.sdf"
extract_and_save_ligand_as_sdf(topology, "./ligand_prepared.sdf", ligand)
ligand_sdf = "./ligand_prepared.sdf"

if not pdb_md:
pdb_md = mda.Universe(topology, trajectory)
Expand Down Expand Up @@ -363,7 +362,6 @@ def main():
ligand_rings.append(current_ring)
print("\033[1mLigand ring data gathered\033[0m")

convert_ligand_to_smiles(ligand_sdf, output_smi="lig.smi")
if peptide != None:
ligand_rings = None

Expand Down Expand Up @@ -592,15 +590,10 @@ def main():
binding_site[binding_mode] = values
occurrence_count = top_10_nodes_with_occurrences[binding_mode]
occurrence_percent = 100 * occurrence_count / total_frames
with open("lig.smi", "r") as file:
reference_smiles = (
file.read().strip()
) # Read the SMILES from the file and remove any leading/trailing whitespace
reference_mol = Chem.MolFromSmiles(reference_smiles)
prepared_ligand = AllChem.AssignBondOrdersFromTemplate(
reference_mol, lig_rd
)
# Generate 2D coordinates for the molecule
# Convert ligand to RDKit with Converter
lig_atoms = complex_lig.convert_to("RDKIT")
# Remove Hydrogens and get 2D representation
prepared_ligand = Chem.RemoveAllHs(lig_atoms)
AllChem.Compute2DCoords(prepared_ligand)
split_data = split_interaction_data(values)
# Get the highlighted atom indices based on interaction type
Expand Down Expand Up @@ -716,7 +709,7 @@ def main():
merged_image_paths, "all_binding_modes_arranged.png"
)
generate_ligand_image(
ligand, "complex.pdb", "lig_no_h.pdb", "lig.smi", f"ligand_numbering.svg"
ligand, "complex.pdb", "lig_no_h.pdb", f"ligand_numbering.svg"
)
if fig_type == "png":
cairosvg.svg2png(
Expand Down
41 changes: 7 additions & 34 deletions openmmdl/openmmdl_analysis/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import rdkit
import mdtraj as md
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem import Draw, AllChem, SDWriter
from rdkit.Chem.Draw import rdMolDraw2D
from openbabel import pybel


def renumber_protein_residues(input_pdb, reference_pdb, output_pdb):
Expand Down Expand Up @@ -97,27 +95,6 @@ def increase_ring_indices(ring, lig_index):
return [atom_idx + lig_index for atom_idx in ring]


def convert_ligand_to_smiles(input_sdf, output_smi):
"""Converts ligand structures from an SDF file to SMILES :) format

Args:
input_sdf (str): Path to the SDF file with the ligand that wll be converted.
output_smi (str): Path to the output SMILES files.
"""
# Create a molecule supplier from an SDF file
mol_supplier = Chem.SDMolSupplier(input_sdf)

# Open the output SMILES file for writing
with open(output_smi, "w") as output_file:
# Iterate through molecules and convert each to SMILES
for mol in mol_supplier:
if mol is not None: # Make sure the molecule was successfully read
smiles = Chem.MolToSmiles(mol)
output_file.write(smiles + "\n")
else:
print("nono")


def process_pdb_file(input_pdb_filename):
"""Process a PDB file to make it compatible with the openmmdl_analysis package.

Expand Down Expand Up @@ -159,17 +136,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r
print(f"No ligand with residue name '{target_resname}' found in the PDB file.")
return

# Create a new Universe with only the ligand
ligand_universe = mda.Merge(ligand_atoms)

# Save the ligand Universe as a PDB file
ligand_universe.atoms.write("lig.pdb")
#Using RDKit Converter to get SDF File
lig_atoms = ligand_atoms.convert_to("RDKIT")

# use openbabel to convert pdb to sdf
mol = next(pybel.readfile("pdb", "lig.pdb"))
mol.write("sdf", output_filename, overwrite=True)
# remove the temporary pdb file
os.remove("lig.pdb")
#Write out the SDF file
writer = SDWriter(output_filename)
writer.write(lig_atoms)
writer.close()


def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name):
Expand Down
20 changes: 5 additions & 15 deletions openmmdl/openmmdl_analysis/rdkit_figure_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def generate_ligand_image(
ligand_name,
complex_pdb_file,
ligand_no_h_pdb_file,
smiles_file,
output_svg_filename,
):
"""Generates a SVG image of the ligand.
Expand All @@ -22,27 +21,18 @@ def generate_ligand_image(
ligand_name (str): Name of the ligand in the protein-ligand complex topology.
complex_pdb_file (str): Path to the protein-ligand complex PDB file.
ligand_no_h_pdb_file (str): Path to the ligand PDB file without hydrogens.
smiles_file (str): Path to the SMILES file with the reference ligand.
output_svg_filename (str): Name of the output SVG file.
"""
try:
# Load complex and ligand structures
complex = mda.Universe(complex_pdb_file)
complex_lig = complex.select_atoms(f"resname {ligand_name}")
# Load ligand without hydrogens for checking correctness atom number
ligand_no_h = mda.Universe(ligand_no_h_pdb_file)
lig_noh = ligand_no_h.select_atoms("all")
complex_lig = complex.select_atoms(f"resname {ligand_name}")

# Load ligand from PDB file
mol = Chem.MolFromPDBFile(ligand_no_h_pdb_file)
lig_rd = mol

# Load reference SMILES
with open(smiles_file, "r") as file:
reference_smiles = file.read().strip()
reference_mol = Chem.MolFromSmiles(reference_smiles)

# Prepare ligand
prepared_ligand = AllChem.AssignBondOrdersFromTemplate(reference_mol, lig_rd)
# Application of RDKit Converter to obtain rdkit mol of ligand
lig_atoms = complex_lig.convert_to("RDKIT")
prepared_ligand = Chem.RemoveAllHs(lig_atoms)
AllChem.Compute2DCoords(prepared_ligand)

# Map atom indices between ligand_no_h and complex
Expand Down