wolberlab · talagayev · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024
diff --git a/openmmdl/openmmdl_analysis/openmmdlanalysis.py b/openmmdl/openmmdl_analysis/openmmdlanalysis.py
@@ -35,7 +35,6 @@
     renumber_atoms_in_residues,
     process_pdb,
     extract_and_save_ligand_as_sdf,
-    convert_ligand_to_smiles,
 )
 from openmmdl.openmmdl_analysis.rmsd_calculation import (
     rmsd_for_atomgroups,
@@ -298,8 +297,8 @@ def main():
     print("\033[1mFiles are preprocessed\033[0m")
 
     if ligand_sdf == None:
-        extract_and_save_ligand_as_sdf(topology, "./lig.sdf", ligand)
-        ligand_sdf = "./lig.sdf"
+        extract_and_save_ligand_as_sdf(topology, "./ligand_prepared.sdf", ligand)
+        ligand_sdf = "./ligand_prepared.sdf"
 
     if not pdb_md:
         pdb_md = mda.Universe(topology, trajectory)
@@ -363,7 +362,6 @@ def main():
             ligand_rings.append(current_ring)
         print("\033[1mLigand ring data gathered\033[0m")
 
-        convert_ligand_to_smiles(ligand_sdf, output_smi="lig.smi")
     if peptide != None:
         ligand_rings = None
 
@@ -592,15 +590,10 @@ def main():
                 binding_site[binding_mode] = values
                 occurrence_count = top_10_nodes_with_occurrences[binding_mode]
                 occurrence_percent = 100 * occurrence_count / total_frames
-                with open("lig.smi", "r") as file:
-                    reference_smiles = (
-                        file.read().strip()
-                    )  # Read the SMILES from the file and remove any leading/trailing whitespace
-                reference_mol = Chem.MolFromSmiles(reference_smiles)
-                prepared_ligand = AllChem.AssignBondOrdersFromTemplate(
-                    reference_mol, lig_rd
-                )
-                # Generate 2D coordinates for the molecule
+                # Convert ligand to RDKit with Converter
+                lig_atoms = complex_lig.convert_to("RDKIT")
+                # Remove Hydrogens and get 2D representation
+                prepared_ligand = Chem.RemoveAllHs(lig_atoms)
                 AllChem.Compute2DCoords(prepared_ligand)
                 split_data = split_interaction_data(values)
                 # Get the highlighted atom indices based on interaction type
@@ -716,7 +709,7 @@ def main():
                 merged_image_paths, "all_binding_modes_arranged.png"
             )
             generate_ligand_image(
-                ligand, "complex.pdb", "lig_no_h.pdb", "lig.smi", f"ligand_numbering.svg"
+                ligand, "complex.pdb", "lig_no_h.pdb", f"ligand_numbering.svg"
             )
             if fig_type == "png":
                 cairosvg.svg2png(

diff --git a/openmmdl/openmmdl_analysis/preprocessing.py b/openmmdl/openmmdl_analysis/preprocessing.py
@@ -5,10 +5,8 @@
 import rdkit
 import mdtraj as md
 from rdkit import Chem
-from rdkit.Chem import Draw
-from rdkit.Chem import AllChem
+from rdkit.Chem import Draw, AllChem, SDWriter
 from rdkit.Chem.Draw import rdMolDraw2D
-from openbabel import pybel
 
 
 def renumber_protein_residues(input_pdb, reference_pdb, output_pdb):
@@ -97,27 +95,6 @@ def increase_ring_indices(ring, lig_index):
     return [atom_idx + lig_index for atom_idx in ring]
 
 
-def convert_ligand_to_smiles(input_sdf, output_smi):
-    """Converts ligand structures from an SDF file to SMILES :) format
-
-    Args:
-        input_sdf (str): Path to the SDF file with the ligand that wll be converted.
-        output_smi (str): Path to the output SMILES files.
-    """
-    # Create a molecule supplier from an SDF file
-    mol_supplier = Chem.SDMolSupplier(input_sdf)
-
-    # Open the output SMILES file for writing
-    with open(output_smi, "w") as output_file:
-        # Iterate through molecules and convert each to SMILES
-        for mol in mol_supplier:
-            if mol is not None:  # Make sure the molecule was successfully read
-                smiles = Chem.MolToSmiles(mol)
-                output_file.write(smiles + "\n")
-            else:
-                print("nono")
-
-
 def process_pdb_file(input_pdb_filename):
     """Process a PDB file to make it compatible with the openmmdl_analysis package.
 
@@ -159,17 +136,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r
         print(f"No ligand with residue name '{target_resname}' found in the PDB file.")
         return
 
-    # Create a new Universe with only the ligand
-    ligand_universe = mda.Merge(ligand_atoms)
-
-    # Save the ligand Universe as a PDB file
-    ligand_universe.atoms.write("lig.pdb")
+    #Using RDKit Converter to get SDF File
+    lig_atoms = ligand_atoms.convert_to("RDKIT")
 
-    # use openbabel to convert pdb to sdf
-    mol = next(pybel.readfile("pdb", "lig.pdb"))
-    mol.write("sdf", output_filename, overwrite=True)
-    # remove the temporary pdb file
-    os.remove("lig.pdb")
+    #Write out the SDF file
+    writer = SDWriter(output_filename)
+    writer.write(lig_atoms)
+    writer.close()
 
 
 def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name):

diff --git a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py
@@ -13,7 +13,6 @@ def generate_ligand_image(
     ligand_name,
     complex_pdb_file,
     ligand_no_h_pdb_file,
-    smiles_file,
     output_svg_filename,
 ):
     """Generates a SVG image of the ligand.
@@ -22,27 +21,18 @@ def generate_ligand_image(
         ligand_name (str): Name of the ligand in the protein-ligand complex topology.
         complex_pdb_file (str): Path to the protein-ligand complex PDB file.
         ligand_no_h_pdb_file (str): Path to the ligand PDB file without hydrogens.
-        smiles_file (str): Path to the SMILES file with the reference ligand.
         output_svg_filename (str): Name of the output SVG file.
     """
     try:
         # Load complex and ligand structures
         complex = mda.Universe(complex_pdb_file)
+        complex_lig = complex.select_atoms(f"resname {ligand_name}")
+        # Load ligand without hydrogens for checking correctness atom number
         ligand_no_h = mda.Universe(ligand_no_h_pdb_file)
         lig_noh = ligand_no_h.select_atoms("all")
-        complex_lig = complex.select_atoms(f"resname {ligand_name}")
-
-        # Load ligand from PDB file
-        mol = Chem.MolFromPDBFile(ligand_no_h_pdb_file)
-        lig_rd = mol
-
-        # Load reference SMILES
-        with open(smiles_file, "r") as file:
-            reference_smiles = file.read().strip()
-        reference_mol = Chem.MolFromSmiles(reference_smiles)
-
-        # Prepare ligand
-        prepared_ligand = AllChem.AssignBondOrdersFromTemplate(reference_mol, lig_rd)
+        # Application of RDKit Converter to obtain rdkit mol of ligand
+        lig_atoms = complex_lig.convert_to("RDKIT")
+        prepared_ligand = Chem.RemoveAllHs(lig_atoms)
         AllChem.Compute2DCoords(prepared_ligand)
 
         # Map atom indices between ligand_no_h and complex