diff --git a/openmmdl/openmmdl_analysis/openmmdlanalysis.py b/openmmdl/openmmdl_analysis/openmmdlanalysis.py index b92ff341..bec0e842 100644 --- a/openmmdl/openmmdl_analysis/openmmdlanalysis.py +++ b/openmmdl/openmmdl_analysis/openmmdlanalysis.py @@ -35,7 +35,6 @@ renumber_atoms_in_residues, process_pdb, extract_and_save_ligand_as_sdf, - convert_ligand_to_smiles, ) from openmmdl.openmmdl_analysis.rmsd_calculation import ( rmsd_for_atomgroups, @@ -298,8 +297,8 @@ def main(): print("\033[1mFiles are preprocessed\033[0m") if ligand_sdf == None: - extract_and_save_ligand_as_sdf(topology, "./lig.sdf", ligand) - ligand_sdf = "./lig.sdf" + extract_and_save_ligand_as_sdf(topology, "./ligand_prepared.sdf", ligand) + ligand_sdf = "./ligand_prepared.sdf" if not pdb_md: pdb_md = mda.Universe(topology, trajectory) @@ -363,7 +362,6 @@ def main(): ligand_rings.append(current_ring) print("\033[1mLigand ring data gathered\033[0m") - convert_ligand_to_smiles(ligand_sdf, output_smi="lig.smi") if peptide != None: ligand_rings = None @@ -592,15 +590,10 @@ def main(): binding_site[binding_mode] = values occurrence_count = top_10_nodes_with_occurrences[binding_mode] occurrence_percent = 100 * occurrence_count / total_frames - with open("lig.smi", "r") as file: - reference_smiles = ( - file.read().strip() - ) # Read the SMILES from the file and remove any leading/trailing whitespace - reference_mol = Chem.MolFromSmiles(reference_smiles) - prepared_ligand = AllChem.AssignBondOrdersFromTemplate( - reference_mol, lig_rd - ) - # Generate 2D coordinates for the molecule + # Convert ligand to RDKit with Converter + lig_atoms = complex_lig.convert_to("RDKIT") + # Remove Hydrogens and get 2D representation + prepared_ligand = Chem.RemoveAllHs(lig_atoms) AllChem.Compute2DCoords(prepared_ligand) split_data = split_interaction_data(values) # Get the highlighted atom indices based on interaction type @@ -716,7 +709,7 @@ def main(): merged_image_paths, "all_binding_modes_arranged.png" ) generate_ligand_image( - ligand, "complex.pdb", "lig_no_h.pdb", "lig.smi", f"ligand_numbering.svg" + ligand, "complex.pdb", "lig_no_h.pdb", f"ligand_numbering.svg" ) if fig_type == "png": cairosvg.svg2png( diff --git a/openmmdl/openmmdl_analysis/preprocessing.py b/openmmdl/openmmdl_analysis/preprocessing.py index 4dadc2b9..71fe6e0b 100644 --- a/openmmdl/openmmdl_analysis/preprocessing.py +++ b/openmmdl/openmmdl_analysis/preprocessing.py @@ -5,10 +5,8 @@ import rdkit import mdtraj as md from rdkit import Chem -from rdkit.Chem import Draw -from rdkit.Chem import AllChem +from rdkit.Chem import Draw, AllChem, SDWriter from rdkit.Chem.Draw import rdMolDraw2D -from openbabel import pybel def renumber_protein_residues(input_pdb, reference_pdb, output_pdb): @@ -97,27 +95,6 @@ def increase_ring_indices(ring, lig_index): return [atom_idx + lig_index for atom_idx in ring] -def convert_ligand_to_smiles(input_sdf, output_smi): - """Converts ligand structures from an SDF file to SMILES :) format - - Args: - input_sdf (str): Path to the SDF file with the ligand that wll be converted. - output_smi (str): Path to the output SMILES files. - """ - # Create a molecule supplier from an SDF file - mol_supplier = Chem.SDMolSupplier(input_sdf) - - # Open the output SMILES file for writing - with open(output_smi, "w") as output_file: - # Iterate through molecules and convert each to SMILES - for mol in mol_supplier: - if mol is not None: # Make sure the molecule was successfully read - smiles = Chem.MolToSmiles(mol) - output_file.write(smiles + "\n") - else: - print("nono") - - def process_pdb_file(input_pdb_filename): """Process a PDB file to make it compatible with the openmmdl_analysis package. @@ -159,17 +136,13 @@ def extract_and_save_ligand_as_sdf(input_pdb_filename, output_filename, target_r print(f"No ligand with residue name '{target_resname}' found in the PDB file.") return - # Create a new Universe with only the ligand - ligand_universe = mda.Merge(ligand_atoms) - - # Save the ligand Universe as a PDB file - ligand_universe.atoms.write("lig.pdb") + #Using RDKit Converter to get SDF File + lig_atoms = ligand_atoms.convert_to("RDKIT") - # use openbabel to convert pdb to sdf - mol = next(pybel.readfile("pdb", "lig.pdb")) - mol.write("sdf", output_filename, overwrite=True) - # remove the temporary pdb file - os.remove("lig.pdb") + #Write out the SDF file + writer = SDWriter(output_filename) + writer.write(lig_atoms) + writer.close() def renumber_atoms_in_residues(input_pdb_file, output_pdb_file, lig_name): diff --git a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py index bd6bcbf0..98e57bcb 100644 --- a/openmmdl/openmmdl_analysis/rdkit_figure_generation.py +++ b/openmmdl/openmmdl_analysis/rdkit_figure_generation.py @@ -13,7 +13,6 @@ def generate_ligand_image( ligand_name, complex_pdb_file, ligand_no_h_pdb_file, - smiles_file, output_svg_filename, ): """Generates a SVG image of the ligand. @@ -22,27 +21,18 @@ def generate_ligand_image( ligand_name (str): Name of the ligand in the protein-ligand complex topology. complex_pdb_file (str): Path to the protein-ligand complex PDB file. ligand_no_h_pdb_file (str): Path to the ligand PDB file without hydrogens. - smiles_file (str): Path to the SMILES file with the reference ligand. output_svg_filename (str): Name of the output SVG file. """ try: # Load complex and ligand structures complex = mda.Universe(complex_pdb_file) + complex_lig = complex.select_atoms(f"resname {ligand_name}") + # Load ligand without hydrogens for checking correctness atom number ligand_no_h = mda.Universe(ligand_no_h_pdb_file) lig_noh = ligand_no_h.select_atoms("all") - complex_lig = complex.select_atoms(f"resname {ligand_name}") - - # Load ligand from PDB file - mol = Chem.MolFromPDBFile(ligand_no_h_pdb_file) - lig_rd = mol - - # Load reference SMILES - with open(smiles_file, "r") as file: - reference_smiles = file.read().strip() - reference_mol = Chem.MolFromSmiles(reference_smiles) - - # Prepare ligand - prepared_ligand = AllChem.AssignBondOrdersFromTemplate(reference_mol, lig_rd) + # Application of RDKit Converter to obtain rdkit mol of ligand + lig_atoms = complex_lig.convert_to("RDKIT") + prepared_ligand = Chem.RemoveAllHs(lig_atoms) AllChem.Compute2DCoords(prepared_ligand) # Map atom indices between ligand_no_h and complex