Skip to content

Commit

Permalink
Bulk update. Completely eliminated JS from templates, fixed rendering…
Browse files Browse the repository at this point in the history
… of multiplots, various small fixes
  • Loading branch information
aozalevsky committed Feb 20, 2024
1 parent a311301 commit 2398f42
Show file tree
Hide file tree
Showing 11 changed files with 955 additions and 1,039 deletions.
46 changes: 6 additions & 40 deletions ihm_validation/get_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from mmcif_io import GetInputInformation
import bokeh
import numpy as np
from bokeh.io import output_file, curdoc, export_png, export_svg, show
from bokeh.io import output_file, curdoc, export_svg, show
from bokeh.models import (ColumnDataSource, Legend, LegendItem, FactorRange,
Div, BasicTickFormatter)
from bokeh.palettes import viridis, Reds256, linear_palette
Expand All @@ -27,7 +27,6 @@
silence(EMPTY_LAYOUT, True)



class Plots(GetInputInformation):
def __init__(self, mmcif, imageDirName, driver):
super().__init__(mmcif)
Expand Down Expand Up @@ -101,6 +100,7 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
export_svg(p, filename=self.filename+'/' +
self.ID+'_' + str(i) + "_quality_at_glance_MQ.svg", webdriver=self.driver)


grid = gridplot(plots, ncols=1,
merge_tools=True,
toolbar_location='right')
Expand Down Expand Up @@ -131,7 +131,8 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
'('+str(violations[i])+' %)' for i, j in enumerate(counts)]

# set the size of the axis
n = 3 if len(model) < 3 else len(model)
# n = 3 if len(model) < 3 else len(model)
n = len(counts)
source = ColumnDataSource(
data=dict(Scores=Scores, counts=counts, legends=legends, color=viridis(n)))

Expand Down Expand Up @@ -204,10 +205,9 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
# first panel is model quality
export_svg(fullplot, filename=self.filename+'/' +
self.ID+"quality_at_glance_MQ.svg", webdriver=self.driver)
export_png(fullplot, filename=self.filename+'/' +
self.ID+"quality_at_glance_MQ.png", webdriver=self.driver)
save(fullplot, filename=self.filename+'/' +
self.ID+"quality_at_glance_MQ.html")

# DATA QUALITY
# check for sas data, if exists, plot
# this section will be updated with more data assessments, as and when it is complete
Expand Down Expand Up @@ -243,10 +243,9 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
pd.output_backend = "svg"
export_svg(pd, filename=self.filename+'/' +
self.ID+"quality_at_glance_DQ.svg", webdriver=self.driver)
export_png(pd, filename=self.filename+'/' +
self.ID+"quality_at_glance_DQ.png", webdriver=self.driver)
save(pd, filename=self.filename+'/' +
self.ID+"quality_at_glance_DQ.html")

# FIT TO DATA QUALITY
# check for sas data, if exists, plot
# this section will be updated with more data assessments, as and when it is complete
Expand Down Expand Up @@ -284,38 +283,5 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
pf.output_backend = "svg"
export_svg(pf, filename=self.filename+'/' +
self.ID+"quality_at_glance_FQ.svg", webdriver=self.driver)
export_png(pf, filename=self.filename+'/' +
self.ID+"quality_at_glance_FQ.png", webdriver=self.driver)
save(pf, filename=self.filename+'/' +
self.ID+"quality_at_glance_FQ.html")
# check for XL_MS data, if exists, plot
# if len(cx_fit.keys()) > 0:
# Scores = ['Model '+str(i) for i, j in cx_fit.items()]
# counts = [round(float(j), 2) for i, j in cx_fit.items()]
# # legends=[str(i) for i in counts]
# legends = ['Model ' + str(i+1) + ': ' +
# str(j)+'%' for i, j in enumerate(counts)]
# source = ColumnDataSource(data=dict(
# Scores=Scores, counts=counts, legends=legends, color=viridis(len(legends))))
# pf1 = figure(y_range=Scores, x_range=(0, max(counts)+1),
# plot_height=450, plot_width=800, title="Fit to XL-MS Input")
# rf1 = pf1.hbar(y='Scores', right='counts', color='color',
# source=source, alpha=0.8, line_color='black')
# pf1.ygrid.grid_line_color = None
# pf1.xaxis.major_label_text_font_size = "12pt"
# pf1.yaxis.major_label_text_font_size = "12pt"
# pf1.title.text_font_size = '12pt'
# pf1.title.align = "center"
# pf1.title.vertical_align = 'top'
#
# legend = Legend(items=[LegendItem(label=legends[i], renderers=[
# rf1], index=i) for i in range(len(legends))], location='center',
# orientation='vertical', label_text_font_size="12px")
# pf1.add_layout(legend, 'right')
# pf1.output_backend = "svg"
# export_svgs(pf1, filename=self.filename+'/' +
# self.ID+"quality_at_glance_FQ1.svg")
# export_png(pf1, filename=self.filename+'/' +
# self.ID+"quality_at_glance_FQ1.png")
# save(pf1, filename=self.filename+'/' +
# self.ID+"quality_at_glance_FQ1.html")
77 changes: 54 additions & 23 deletions ihm_validation/ihm_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from pathlib import Path
import utility
from report import WriteReport
from distutils.util import strtobool

# from validation.WKhtmlToPdf import wkhtmltopdf
# import utility
Expand Down Expand Up @@ -81,6 +82,11 @@
parser.add_argument('-res', type=list, default=['Rigid bodies: 1 residue per bead.',
'Flexible regions: N/A'], help="Add information on model quality (molprobity or excluded volume)")

parser.add_argument('--enable-sas', default=True, type=lambda x: bool(strtobool(x)),
help="Run SAS validation")
parser.add_argument('--enable-cx', default=False, type=lambda x: bool(strtobool(x)),
help="Run crosslinking-MS validation")

args = parser.parse_args()
if args.p.upper() == 'YES':
physics = [
Expand Down Expand Up @@ -155,9 +161,15 @@

dirNames = {
'root': str(output_path),
'html': str(Path(output_path, f'{output_prefix}_html')),
'root_html': str(Path(output_path, output_prefix)),
}

dirNames.update(
{
'html': str(Path(dirNames['root_html'], 'htmls')),
}
)

dirNames.update(
{
'images': str(Path(dirNames['html'], 'images')),
Expand Down Expand Up @@ -185,7 +197,7 @@ def createdirs(dirNames: dict):
if Path(name).is_dir():
logging.info(f"Directory {name} already exists")
else:
os.mkdir(name)
Path(name).mkdir(parents=True)
logging.info(f"Directory {name} created ")


Expand Down Expand Up @@ -244,9 +256,14 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
if __name__ == "__main__":
from pyvirtualdisplay import Display
from selenium import webdriver
display = Display(visible=0, size=(1024, 768))
display.start()
driver = webdriver.Firefox()
# display = Display(visible=0, size=(1024, 768))
# display.start()
# driver = webdriver.Firefox()

firefox_options = webdriver.FirefoxOptions()
firefox_options.add_argument('--headless')
driver = webdriver.Firefox(options=firefox_options)


logging.info("Clean up and create output directories")
utility.clean_all()
Expand Down Expand Up @@ -280,26 +297,42 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
template_dict, molprobity_dict, exv_data = report.run_model_quality(
template_dict, csvDirName=dirNames['csv'], htmlDirName=dirNames['html'])

logging.info("SAS validation")
template_dict, sas_data, sas_fit = report.run_sas_validation(template_dict)
if args.enable_sas:
logging.info("SAS validation")
template_dict, sas_data, sas_fit = report.run_sas_validation(template_dict)

logging.info("SAS validation plots")
report.run_sas_validation_plots(
template_dict, imageDirName=dirNames['images'])

logging.info("SAS validation plots")
report.run_sas_validation_plots(
template_dict, imageDirName=dirNames['images'])
else:
sas_data = {}
sas_fit = {}

# uncomment below to run CX analysis
logging.info("CX validation")
template_dict, cx_data, cx_ertypes = report.run_cx_validation(template_dict)
cx_fit = None
if args.enable_cx:
logging.info("CX validation")
template_dict, cx_data, cx_ertypes = report.run_cx_validation(template_dict)
cx_fit = None

logging.info("CX validation plots")
report.run_cx_validation_plots(template_dict,
imageDirName=dirNames['images'])

logging.info("CX validation plots")
report.run_cx_validation_plots(template_dict,
imageDirName=dirNames['images'])
else:
cx_fit = None

logging.info("Quality at a glance")
report.run_quality_glance(
molprobity_dict, exv_data, sas_data, sas_fit, cx_fit, imageDirName=dirNames['images'])

logging.info("Write PDF")
output_pdf = write_pdf(args.f, template_dict, template_pdf,
dirNames['pdf'], dirNames['pdf'])
shutil.copy(output_pdf, str(output_path))

template_dict['validation_pdf'] = Path(output_pdf).name

logging.info("Supplementary table")
template_dict = report.run_supplementary_table(template_dict,
location=args.ls,
Expand All @@ -315,6 +348,8 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
args.f, template_dict, template_file_supp, dirNames['pdf'], dirNames['pdf'])
shutil.copy(output_pdf, str(output_path))

template_dict['supplementary_pdf'] = Path(output_pdf).name

# logging.info("Write JSON")
# write_json(args.f, template_dict, dirNames['json'], dirNames['json'])

Expand All @@ -329,22 +364,18 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
)
# Compress html output to one file
shutil.make_archive(
base_name=dirNames['html'],
root_dir=output_path,
base_dir=output_prefix,
base_name=f'{dirNames["root_html"]}_html',
format='gztar')

logging.info("Write PDF")
output_pdf = write_pdf(args.f, template_dict, template_pdf,
dirNames['pdf'], dirNames['pdf'])

shutil.copy(output_pdf, str(output_path))

# Keep uncompressed html output for convience
# otherwise delete
if args.keep_html:
pass
else:
shutil.rmtree(dirNames['html'])
shutil.rmtree(dirNames['root_html'])

logging.info("Final cleanup")
utility.clean_all()
33 changes: 24 additions & 9 deletions ihm_validation/mmcif_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,23 +112,38 @@ def get_id_from_entry(self) -> str:
entry = entry_init.strip()
return entry

def get_title(self) -> str:
"""get title from citations """
title = None
def get_primary_citation_info(self) -> tuple:
'''get title and authors for the primary citation'''
title, authors = None, None
for citation in self.system.citations:
if citation.is_primary:
try:
title = citation.title
except AttributeError:
title = 'Title not available/Citation not provided'
return title

try:
authors = '; '.join(citation.authors)
except AttributeError:
authors = 'Authors are not available/Citation not provided'

return (title, authors)


def get_authors(self) -> str:
"""get names of authors from citations """
cit = self.system.citations
if cit:
return '; '.join(cit[0].authors)
return 'Citation not present in file'
"""get authors of the structure; fallback to authors of primary citation """
output = None
if len(self.system.authors) > 0:
output = '; '.join(self.system.authors)
elif len(self.system.citations) > 0:
for citation in self.system.citations:
if citation.is_primary and len(citation.authors) > 0:
output = '; '.join(citation.authors)

if output is None:
output = 'Authors are not available'

return output

def get_struc_title(self) -> str:
"""get name of molecule"""
Expand Down
7 changes: 2 additions & 5 deletions ihm_validation/molprobity.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_internal_version(self, tool: str = 'molprobity.clashscore') -> str:

mp_core_path = Path(
Path(mp_tool_path).parent,
'../../modules/molprobity/lib/core.php'
'../../molprobity/lib/core.php'
)

if mp_core_path.is_file():
Expand Down Expand Up @@ -201,10 +201,7 @@ def run_molprobity(self, d: dict):

with open(f_name, 'w+') as f:
run(['molprobity.molprobity', self._tempcif,
# "disable_uc_volume_vs_n_atoms_check=True",
# This is a legacy option and causes extremely
# large memory consumption with recent
# molprobity versions on PDB-Dev entries
"disable_uc_volume_vs_n_atoms_check=True",

This comment has been minimized.

Copy link
@aozalevsky

aozalevsky Feb 26, 2024

Author Contributor

this is a regression, basically a reversion of #24bc1b

"coot=False"],
stdout=f,
cwd=self.cache)
Expand Down
14 changes: 9 additions & 5 deletions ihm_validation/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@ def run_entry_composition(self, Template_Dict: dict) -> dict:
Template_Dict['ID_R'] = (
self.input.get_id()[0:6]+'_'+self.input.get_id()[6:]).split()
Template_Dict['Molecule'] = self.input.get_struc_title()
Template_Dict['Title'] = self.input.get_title()
Template_Dict['Authors'] = self.input.get_authors()
title, authors = self.input.get_primary_citation_info()
Template_Dict['Citation_Title'] = title
Template_Dict['Citation_Authors'] = authors
Template_Dict['Entry_list'] = utility.dict_to_JSlist(
self.input.get_composition())
Template_Dict['number_of_molecules'] = self.input.get_number_of_models()
Expand Down Expand Up @@ -510,10 +512,12 @@ def run_supplementary_table(self,
# Template_Dict['validation_input'] = validation_input

validation_input = []
if Template_Dict['cx_stats_per_model']:
min_cx = min(Template_Dict['cx_stats_per_model'])
max_cx = max(Template_Dict['cx_stats_per_model'])
validation_input.append(f'Satisfaction of crosslinks: {min_cx:.2f}-{max_cx:.2f}%')
if 'cx_stats_per_model' in Template_Dict:
if Template_Dict['cx_stats_per_model']:
min_cx = min(Template_Dict['cx_stats_per_model'])
max_cx = max(Template_Dict['cx_stats_per_model'])
validation_input.append(f'Satisfaction of crosslinks: {min_cx:.2f}-{max_cx:.2f}%')

if len(validation_input) == 0:
validation_input.append('Fit of model to information used to compute it has not been determined')
Template_Dict['validation_input'] = validation_input
Expand Down
Loading

0 comments on commit 2398f42

Please sign in to comment.