Skip to content

Commit

Permalink
Merge pull request #54 from davidkastner/cc-coupling
Browse files Browse the repository at this point in the history
New script for cc coupling analysis
  • Loading branch information
davidkastner authored Jan 28, 2024
2 parents acc72f6 + 03d5792 commit 3200564
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 30 deletions.
8 changes: 4 additions & 4 deletions pyqmmm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ def md(
import pyqmmm.md.quickcsa
pyqmmm.md.quickcsa.quick_csa()


@cli.command()
@click.option("--plot_energy", "-pe", is_flag=True, help="Plot the energy of a xyz traj.")
@click.option("--flip_xyz", "-f", is_flag=True, help="Reverse and xyz trajectory.")
Expand Down Expand Up @@ -245,13 +244,14 @@ def qm(
atom_pairs = [(145, 146), (65, 145), (66, 145), (12, 145), (32, 145), (145, 149)]
pyqmmm.qm.bond_valence.calculate_bond_valence(atom_pairs, 4)
pyqmmm.qm.bond_valence.plot_bond_valence()

if orca_scan:
import pyqmmm.qm.orca_scan_plotter
atom_1 = input(" > What is your first atom being scanned? ")
atom_2 = input(" > What is your second atom being scanned? ")

df = pyqmmm.qm.orca_scan_plotter.read_orca_output("orca.out")
pyqmmm.qm.orca_scan_plotter.plot_energy(df, atom_1, atom_2)
distances, relative_energies = pyqmmm.qm.orca_scan_plotter.read_orca_output("orca.out")
print(f" > Start distance: {distances[0]}, End distance: {distances[-1]}\n")
pyqmmm.qm.orca_scan_plotter.plot_energy(distances, relative_energies, atom_1, atom_2)

if __name__ == "__main__":
# Run the command-line interface when this script is executed
Expand Down
130 changes: 130 additions & 0 deletions pyqmmm/md/cc_coupling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import numpy as np
import pandas as pd
import seaborn as sns
from typing import List
import matplotlib.pyplot as plt

def format_plot() -> None:
"""
General plotting parameters for the Kulik Lab.
"""
font = {"family": "sans-serif", "weight": "bold", "size": 10}
plt.rc("font", **font)
plt.rcParams["xtick.major.pad"] = 5
plt.rcParams["ytick.major.pad"] = 5
plt.rcParams["axes.linewidth"] = 2
plt.rcParams["xtick.major.size"] = 7
plt.rcParams["xtick.major.width"] = 2
plt.rcParams["ytick.major.size"] = 7
plt.rcParams["ytick.major.width"] = 2
plt.rcParams["xtick.direction"] = "in"
plt.rcParams["ytick.direction"] = "in"
plt.rcParams["xtick.top"] = True
plt.rcParams["ytick.right"] = True
plt.rcParams["svg.fonttype"] = "none"

def heatmap(data: str, residues: List[str], delete: List[int] = [],
out_file: str = "heatmap", v=[-0.4, 0.4]) -> None:

"""
Generates formatted heat maps.
Uses the Kulik Lab figure formatting standards.
Parameters
----------
data: str
The name of the data file, which can be a data or a dat file.
delete: List[int]
A list of the amino acids you would like removed indexed at zero.
out_file: str
The name you would like the image saved as.
Examples
--------
heatmap(data="cacovar.dat", protein="mc6sa", delete=[0,15,16,27,28,29], out_file="matrix_geom.svg")
"""

# General styling variables
light_gray = "#8e8e8e"
dark_gray = "#7e7e7e"

# Sort the delete list
delete.sort()

# Delete residues
residues = [item for index, item in enumerate(residues) if index not in delete]

# Identify matrix format and read in
contents = open(data, "r").readlines()
contents_joined = " ".join(contents) # Create a single string for parsing
if "," in contents_joined: # If a csv file
matrix = np.genfromtxt(data, delimiter=",")
elif " " in contents_joined: # If a dat file
matrix = []
for line in contents:
matrix.append(line.split())
matrix = [[float(j) for j in i] for i in matrix] # Strings to float
matrix = np.array(matrix)

np.fill_diagonal(matrix, 0) # Set the diagonal to zero as they are trivial

df = pd.DataFrame(matrix)
# Remove specific rows and columns from non-residues

# Drop rows and columns
if len(delete) > 0:
df = df.drop(delete, axis=0)
df = df.drop(delete, axis=1)

df.columns = residues
df.index = residues

# Apply base Kulik plot parameters
format_plot()

# Set cmap
cmap = "RdBu" if any(x < 0 for x in v) else "Blues"

# Generate plot
ax = sns.heatmap(
df,
cmap=cmap,
vmin=v[0],
vmax=v[1],
xticklabels=True,
yticklabels=True,
linewidth=0.03,
linecolor=light_gray,
)
ax.get_yaxis().set_tick_params(direction="out")
plt.ylabel("residues", fontsize=10, weight="bold")
plt.xlabel("residues", fontsize=10, weight="bold")
plt.tick_params(axis="y", which="major", labelsize=8, rotation=0, length=0)
plt.tick_params(axis="x", which="major", labelsize=8, rotation=90, length=0)
ax.xaxis.tick_top() # x axis on top
ax.xaxis.set_label_position("top")
# Add lines every five residues
ax.hlines([5, 10, 15, 20, 25], colors=dark_gray, *ax.get_xlim(), linewidth=1.5)
ax.vlines([5, 10, 15, 20, 25], colors=dark_gray, *ax.get_ylim(), linewidth=1.5)
# Add broders
ax.hlines([0, len(residues)], colors="k", *ax.get_xlim(), linewidth=3.5)
ax.vlines([0, len(residues)], colors="k", *ax.get_ylim(), linewidth=3.5)

extensions = ["png", "svg"]
for ext in extensions:
plt.savefig(f"{out_file}.{ext}", bbox_inches="tight", format=ext, dpi=300)
plt.close()

if __name__ == "__main__":
pdb_path = f"../1_cluster/1/rep.c0.pdb"
residues = qa.process.get_protein_sequence(pdb_path)
delete = []
heatmap(
data="cacovar.dat",
residues=residues,
delete=delete,
out_file="matrix_geom.png",
)
2 changes: 1 addition & 1 deletion pyqmmm/md/hbond_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def plot(data, file_path):
df = df[df.ge(0.1).all(axis=1)] # 10% occurence cutoff

# Sort dataframe by occurrence in descending order and select the top 7 rows
df = df.sort_values(by=[df.columns[0]], ascending=False).head(7)
df = df.sort_values(by=[df.columns[0]], ascending=False).head(15)

ax = df.plot.bar(
color="darkgray", figsize=(4, 4)
Expand Down
59 changes: 34 additions & 25 deletions pyqmmm/qm/orca_scan_plotter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
"""
Plots the results of an ORCA scan.
This script was difficult to write because of a strange matplotlib issue.
No matter how I entered the data, maplotlib would autosort the x-axis.
To fix this, I had to manually set the axes and add padding.
"""

import matplotlib.pyplot as plt
import pandas as pd

HARTREE_TO_KCAL_MOL = 627.509

Expand Down Expand Up @@ -27,8 +34,10 @@ def read_orca_output(file_name):
lines = file.readlines()

start_reading = False
data = []
distances = []
relative_energies = []
first_energy_kcal_mol = None

for line in lines:
if "The Calculated Surface using the 'Actual Energy'" in line:
start_reading = True
Expand All @@ -43,45 +52,45 @@ def read_orca_output(file_name):
if first_energy_kcal_mol is None:
first_energy_kcal_mol = energy_kcal_mol
relative_energy = energy_kcal_mol - first_energy_kcal_mol
data.append((distance, relative_energy))
distances.append(distance)
relative_energies.append(relative_energy)
except ValueError:
break
else:
break
df = pd.DataFrame(data, columns=['Distance', 'Relative Energy'])

return df
return distances, relative_energies

def plot_energy(df, atom_1, atom_2):
def plot_energy(distances, relative_energies, atom_1, atom_2):
format_plot()

# Create a figure with adjustable size
fig = plt.figure(figsize=(4, 4)) # Start with a larger figure size
ax = fig.add_subplot(111)
fig, ax = plt.subplots(figsize=(4, 4))

# Plot the data
ax.plot(df['Distance'], df['Relative Energy'], marker='o', color='b')
ax.set_xlabel(f"{atom_1}···{atom_2} distance (Å)", weight="bold")
ax.set_ylabel("relative energy (kcal/mol)", weight="bold")
# Plot the data using lists
ax.scatter(distances, relative_energies, marker='o', color='b')

# Set x-axis and y-axis limits explicitly based on the data range
x_min, x_max = df['Distance'].min(), df['Distance'].max()
y_min, y_max = df['Relative Energy'].min(), df['Relative Energy'].max()
x_range = x_max - x_min
y_range = y_max - y_min
ax.set_xlim(x_min - 0.05 * x_range, x_max + 0.05 * x_range)
ax.set_ylim(y_min - 0.05 * y_range, y_max + 0.05 * y_range)
# Set the x-axis limits based on the first and last values in the distances list
x_range = max(distances) - min(distances)
padding = x_range * 0.06

# Adjust the aspect ratio of the plot area to be square based on data ranges
ratio = x_range / y_range
ax.set_aspect(ratio, adjustable='box')
# Check if its ascending or descending to add padding
if distances[0] < distances[-1]:
ax.set_xlim([distances[0] - padding, distances[-1] + padding])
else:
ax.set_xlim([distances[0] + padding, distances[-1] - padding])

ax.set_xlabel(f"{atom_1}···{atom_2} distance (Å)", weight="bold")
ax.set_ylabel("Relative energy (kcal/mol)", weight="bold")

plt.savefig("energy_scan.png", bbox_inches="tight", dpi=600)
plt.savefig("energy_scan.svg", bbox_inches="tight", format="svg")

# Main execution
if __name__ == "__main__":
atom_1 = input(" > What is your first atom being scanned? ")
atom_2 = input(" > What is your second atom being scanned? ")

df = read_orca_output("orca.out")
plot_energy(df, atom_1, atom_2)
distances, relative_energies = read_orca_output("orca.out")
print(f" > Start distance: {distances[0]}, End distance: {distances[-1]}\n")
plot_energy(distances, relative_energies, atom_1, atom_2)

0 comments on commit 3200564

Please sign in to comment.