From 03d57926bfad95b686281831d91d4a525bedefa6 Mon Sep 17 00:00:00 2001 From: David Kastner Date: Sat, 27 Jan 2024 23:47:05 -0500 Subject: [PATCH] New script for cc coupling analysis --- pyqmmm/cli.py | 8 +- pyqmmm/md/cc_coupling.py | 130 +++++++++++++++++++++++++++++++++ pyqmmm/md/hbond_analyzer.py | 2 +- pyqmmm/qm/orca_scan_plotter.py | 59 ++++++++------- 4 files changed, 169 insertions(+), 30 deletions(-) create mode 100644 pyqmmm/md/cc_coupling.py diff --git a/pyqmmm/cli.py b/pyqmmm/cli.py index ceba7ab..cbb7ef7 100644 --- a/pyqmmm/cli.py +++ b/pyqmmm/cli.py @@ -171,7 +171,6 @@ def md( import pyqmmm.md.quickcsa pyqmmm.md.quickcsa.quick_csa() - @cli.command() @click.option("--plot_energy", "-pe", is_flag=True, help="Plot the energy of a xyz traj.") @click.option("--flip_xyz", "-f", is_flag=True, help="Reverse and xyz trajectory.") @@ -245,13 +244,14 @@ def qm( atom_pairs = [(145, 146), (65, 145), (66, 145), (12, 145), (32, 145), (145, 149)] pyqmmm.qm.bond_valence.calculate_bond_valence(atom_pairs, 4) pyqmmm.qm.bond_valence.plot_bond_valence() + if orca_scan: import pyqmmm.qm.orca_scan_plotter atom_1 = input(" > What is your first atom being scanned? ") atom_2 = input(" > What is your second atom being scanned? ") - - df = pyqmmm.qm.orca_scan_plotter.read_orca_output("orca.out") - pyqmmm.qm.orca_scan_plotter.plot_energy(df, atom_1, atom_2) + distances, relative_energies = pyqmmm.qm.orca_scan_plotter.read_orca_output("orca.out") + print(f" > Start distance: {distances[0]}, End distance: {distances[-1]}\n") + pyqmmm.qm.orca_scan_plotter.plot_energy(distances, relative_energies, atom_1, atom_2) if __name__ == "__main__": # Run the command-line interface when this script is executed diff --git a/pyqmmm/md/cc_coupling.py b/pyqmmm/md/cc_coupling.py new file mode 100644 index 0000000..487d7b6 --- /dev/null +++ b/pyqmmm/md/cc_coupling.py @@ -0,0 +1,130 @@ +import numpy as np +import pandas as pd +import seaborn as sns +from typing import List +import matplotlib.pyplot as plt + +def format_plot() -> None: + """ + General plotting parameters for the Kulik Lab. + + """ + font = {"family": "sans-serif", "weight": "bold", "size": 10} + plt.rc("font", **font) + plt.rcParams["xtick.major.pad"] = 5 + plt.rcParams["ytick.major.pad"] = 5 + plt.rcParams["axes.linewidth"] = 2 + plt.rcParams["xtick.major.size"] = 7 + plt.rcParams["xtick.major.width"] = 2 + plt.rcParams["ytick.major.size"] = 7 + plt.rcParams["ytick.major.width"] = 2 + plt.rcParams["xtick.direction"] = "in" + plt.rcParams["ytick.direction"] = "in" + plt.rcParams["xtick.top"] = True + plt.rcParams["ytick.right"] = True + plt.rcParams["svg.fonttype"] = "none" + +def heatmap(data: str, residues: List[str], delete: List[int] = [], + out_file: str = "heatmap", v=[-0.4, 0.4]) -> None: + + """ + Generates formatted heat maps. + + Uses the Kulik Lab figure formatting standards. + + Parameters + ---------- + data: str + The name of the data file, which can be a data or a dat file. + delete: List[int] + A list of the amino acids you would like removed indexed at zero. + out_file: str + The name you would like the image saved as. + + Examples + -------- + heatmap(data="cacovar.dat", protein="mc6sa", delete=[0,15,16,27,28,29], out_file="matrix_geom.svg") + + """ + + # General styling variables + light_gray = "#8e8e8e" + dark_gray = "#7e7e7e" + + # Sort the delete list + delete.sort() + + # Delete residues + residues = [item for index, item in enumerate(residues) if index not in delete] + + # Identify matrix format and read in + contents = open(data, "r").readlines() + contents_joined = " ".join(contents) # Create a single string for parsing + if "," in contents_joined: # If a csv file + matrix = np.genfromtxt(data, delimiter=",") + elif " " in contents_joined: # If a dat file + matrix = [] + for line in contents: + matrix.append(line.split()) + matrix = [[float(j) for j in i] for i in matrix] # Strings to float + matrix = np.array(matrix) + + np.fill_diagonal(matrix, 0) # Set the diagonal to zero as they are trivial + + df = pd.DataFrame(matrix) + # Remove specific rows and columns from non-residues + + # Drop rows and columns + if len(delete) > 0: + df = df.drop(delete, axis=0) + df = df.drop(delete, axis=1) + + df.columns = residues + df.index = residues + + # Apply base Kulik plot parameters + format_plot() + + # Set cmap + cmap = "RdBu" if any(x < 0 for x in v) else "Blues" + + # Generate plot + ax = sns.heatmap( + df, + cmap=cmap, + vmin=v[0], + vmax=v[1], + xticklabels=True, + yticklabels=True, + linewidth=0.03, + linecolor=light_gray, + ) + ax.get_yaxis().set_tick_params(direction="out") + plt.ylabel("residues", fontsize=10, weight="bold") + plt.xlabel("residues", fontsize=10, weight="bold") + plt.tick_params(axis="y", which="major", labelsize=8, rotation=0, length=0) + plt.tick_params(axis="x", which="major", labelsize=8, rotation=90, length=0) + ax.xaxis.tick_top() # x axis on top + ax.xaxis.set_label_position("top") + # Add lines every five residues + ax.hlines([5, 10, 15, 20, 25], colors=dark_gray, *ax.get_xlim(), linewidth=1.5) + ax.vlines([5, 10, 15, 20, 25], colors=dark_gray, *ax.get_ylim(), linewidth=1.5) + # Add broders + ax.hlines([0, len(residues)], colors="k", *ax.get_xlim(), linewidth=3.5) + ax.vlines([0, len(residues)], colors="k", *ax.get_ylim(), linewidth=3.5) + + extensions = ["png", "svg"] + for ext in extensions: + plt.savefig(f"{out_file}.{ext}", bbox_inches="tight", format=ext, dpi=300) + plt.close() + +if __name__ == "__main__": + pdb_path = f"../1_cluster/1/rep.c0.pdb" + residues = qa.process.get_protein_sequence(pdb_path) + delete = [] + heatmap( + data="cacovar.dat", + residues=residues, + delete=delete, + out_file="matrix_geom.png", + ) \ No newline at end of file diff --git a/pyqmmm/md/hbond_analyzer.py b/pyqmmm/md/hbond_analyzer.py index 91b9b0e..9ebf179 100644 --- a/pyqmmm/md/hbond_analyzer.py +++ b/pyqmmm/md/hbond_analyzer.py @@ -247,7 +247,7 @@ def plot(data, file_path): df = df[df.ge(0.1).all(axis=1)] # 10% occurence cutoff # Sort dataframe by occurrence in descending order and select the top 7 rows - df = df.sort_values(by=[df.columns[0]], ascending=False).head(7) + df = df.sort_values(by=[df.columns[0]], ascending=False).head(15) ax = df.plot.bar( color="darkgray", figsize=(4, 4) diff --git a/pyqmmm/qm/orca_scan_plotter.py b/pyqmmm/qm/orca_scan_plotter.py index 6331daf..23de258 100644 --- a/pyqmmm/qm/orca_scan_plotter.py +++ b/pyqmmm/qm/orca_scan_plotter.py @@ -1,5 +1,12 @@ +""" +Plots the results of an ORCA scan. + +This script was difficult to write because of a strange matplotlib issue. +No matter how I entered the data, maplotlib would autosort the x-axis. +To fix this, I had to manually set the axes and add padding. +""" + import matplotlib.pyplot as plt -import pandas as pd HARTREE_TO_KCAL_MOL = 627.509 @@ -27,8 +34,10 @@ def read_orca_output(file_name): lines = file.readlines() start_reading = False - data = [] + distances = [] + relative_energies = [] first_energy_kcal_mol = None + for line in lines: if "The Calculated Surface using the 'Actual Energy'" in line: start_reading = True @@ -43,45 +52,45 @@ def read_orca_output(file_name): if first_energy_kcal_mol is None: first_energy_kcal_mol = energy_kcal_mol relative_energy = energy_kcal_mol - first_energy_kcal_mol - data.append((distance, relative_energy)) + distances.append(distance) + relative_energies.append(relative_energy) except ValueError: break else: break - df = pd.DataFrame(data, columns=['Distance', 'Relative Energy']) - return df + return distances, relative_energies -def plot_energy(df, atom_1, atom_2): +def plot_energy(distances, relative_energies, atom_1, atom_2): format_plot() - + # Create a figure with adjustable size - fig = plt.figure(figsize=(4, 4)) # Start with a larger figure size - ax = fig.add_subplot(111) + fig, ax = plt.subplots(figsize=(4, 4)) - # Plot the data - ax.plot(df['Distance'], df['Relative Energy'], marker='o', color='b') - ax.set_xlabel(f"{atom_1}···{atom_2} distance (Å)", weight="bold") - ax.set_ylabel("relative energy (kcal/mol)", weight="bold") + # Plot the data using lists + ax.scatter(distances, relative_energies, marker='o', color='b') - # Set x-axis and y-axis limits explicitly based on the data range - x_min, x_max = df['Distance'].min(), df['Distance'].max() - y_min, y_max = df['Relative Energy'].min(), df['Relative Energy'].max() - x_range = x_max - x_min - y_range = y_max - y_min - ax.set_xlim(x_min - 0.05 * x_range, x_max + 0.05 * x_range) - ax.set_ylim(y_min - 0.05 * y_range, y_max + 0.05 * y_range) + # Set the x-axis limits based on the first and last values in the distances list + x_range = max(distances) - min(distances) + padding = x_range * 0.06 - # Adjust the aspect ratio of the plot area to be square based on data ranges - ratio = x_range / y_range - ax.set_aspect(ratio, adjustable='box') + # Check if its ascending or descending to add padding + if distances[0] < distances[-1]: + ax.set_xlim([distances[0] - padding, distances[-1] + padding]) + else: + ax.set_xlim([distances[0] + padding, distances[-1] - padding]) + + ax.set_xlabel(f"{atom_1}···{atom_2} distance (Å)", weight="bold") + ax.set_ylabel("Relative energy (kcal/mol)", weight="bold") plt.savefig("energy_scan.png", bbox_inches="tight", dpi=600) plt.savefig("energy_scan.svg", bbox_inches="tight", format="svg") +# Main execution if __name__ == "__main__": atom_1 = input(" > What is your first atom being scanned? ") atom_2 = input(" > What is your second atom being scanned? ") - df = read_orca_output("orca.out") - plot_energy(df, atom_1, atom_2) + distances, relative_energies = read_orca_output("orca.out") + print(f" > Start distance: {distances[0]}, End distance: {distances[-1]}\n") + plot_energy(distances, relative_energies, atom_1, atom_2)