Bulk update. Completely eliminated JS from templates, fixed rendering…

… of multiplots, various small fixes
salilab · Feb 20, 2024 · 2398f42 · aozalevsky · Feb 26, 2024 · 2398f42
1 parent a311301
commit 2398f42
Show file tree

Hide file tree

Showing 11 changed files with 955 additions and 1,039 deletions.
diff --git a/ihm_validation/get_plots.py b/ihm_validation/get_plots.py
@@ -12,7 +12,7 @@
 from mmcif_io import GetInputInformation
 import bokeh
 import numpy as np
-from bokeh.io import output_file, curdoc, export_png, export_svg, show
+from bokeh.io import output_file, curdoc, export_svg, show
 from bokeh.models import (ColumnDataSource, Legend, LegendItem, FactorRange,
                           Div, BasicTickFormatter)
 from bokeh.palettes import viridis, Reds256, linear_palette
@@ -27,7 +27,6 @@
 silence(EMPTY_LAYOUT, True)
 
 
-
 class Plots(GetInputInformation):
     def __init__(self, mmcif, imageDirName, driver):
         super().__init__(mmcif)
@@ -101,6 +100,7 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
                 export_svg(p, filename=self.filename+'/' +
                             self.ID+'_' + str(i) + "_quality_at_glance_MQ.svg", webdriver=self.driver)
 
+
             grid = gridplot(plots, ncols=1,
                             merge_tools=True,
                             toolbar_location='right')
@@ -131,7 +131,8 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
                        '('+str(violations[i])+' %)' for i, j in enumerate(counts)]
 
             # set the size of the axis
-            n = 3 if len(model) < 3 else len(model)
+            # n = 3 if len(model) < 3 else len(model)
+            n = len(counts)
             source = ColumnDataSource(
                 data=dict(Scores=Scores, counts=counts, legends=legends, color=viridis(n)))
 
@@ -204,10 +205,9 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
         # first panel is model quality
         export_svg(fullplot, filename=self.filename+'/' +
                     self.ID+"quality_at_glance_MQ.svg", webdriver=self.driver)
-        export_png(fullplot, filename=self.filename+'/' +
-                   self.ID+"quality_at_glance_MQ.png", webdriver=self.driver)
         save(fullplot, filename=self.filename+'/' +
              self.ID+"quality_at_glance_MQ.html")
+
         # DATA QUALITY
         # check for sas data, if exists, plot
         # this section will be updated with more data assessments, as and when it is complete
@@ -243,10 +243,9 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
             pd.output_backend = "svg"
             export_svg(pd, filename=self.filename+'/' +
                         self.ID+"quality_at_glance_DQ.svg", webdriver=self.driver)
-            export_png(pd, filename=self.filename+'/' +
-                       self.ID+"quality_at_glance_DQ.png", webdriver=self.driver)
             save(pd, filename=self.filename+'/' +
                  self.ID+"quality_at_glance_DQ.html")
+
         # FIT TO DATA QUALITY
         # check for sas data, if exists, plot
         # this section will be updated with more data assessments, as and when it is complete
@@ -284,38 +283,5 @@ def plot_quality_at_glance(self, molprobity_data: dict, exv_data: dict,
             pf.output_backend = "svg"
             export_svg(pf, filename=self.filename+'/' +
                         self.ID+"quality_at_glance_FQ.svg", webdriver=self.driver)
-            export_png(pf, filename=self.filename+'/' +
-                       self.ID+"quality_at_glance_FQ.png", webdriver=self.driver)
             save(pf, filename=self.filename+'/' +
                  self.ID+"quality_at_glance_FQ.html")
-        # check for XL_MS data, if exists, plot
-#        if len(cx_fit.keys()) > 0:
-#            Scores = ['Model '+str(i) for i, j in cx_fit.items()]
-#            counts = [round(float(j), 2) for i, j in cx_fit.items()]
-#            # legends=[str(i) for i in counts]
-#            legends = ['Model ' + str(i+1) + ': ' +
-#                       str(j)+'%' for i, j in enumerate(counts)]
-#            source = ColumnDataSource(data=dict(
-#                Scores=Scores, counts=counts, legends=legends, color=viridis(len(legends))))
-#            pf1 = figure(y_range=Scores, x_range=(0, max(counts)+1),
-#                         plot_height=450, plot_width=800, title="Fit to XL-MS Input")
-#            rf1 = pf1.hbar(y='Scores', right='counts', color='color',
-#                           source=source, alpha=0.8, line_color='black')
-#            pf1.ygrid.grid_line_color = None
-#            pf1.xaxis.major_label_text_font_size = "12pt"
-#            pf1.yaxis.major_label_text_font_size = "12pt"
-#            pf1.title.text_font_size = '12pt'
-#            pf1.title.align = "center"
-#            pf1.title.vertical_align = 'top'
-#
-#            legend = Legend(items=[LegendItem(label=legends[i], renderers=[
-#                            rf1], index=i) for i in range(len(legends))], location='center',
-#                            orientation='vertical', label_text_font_size="12px")
-#            pf1.add_layout(legend, 'right')
-#            pf1.output_backend = "svg"
-#            export_svgs(pf1, filename=self.filename+'/' +
-#                        self.ID+"quality_at_glance_FQ1.svg")
-#            export_png(pf1, filename=self.filename+'/' +
-#                       self.ID+"quality_at_glance_FQ1.png")
-#            save(pf1, filename=self.filename+'/' +
-#                 self.ID+"quality_at_glance_FQ1.html")
diff --git a/ihm_validation/ihm_validator.py b/ihm_validation/ihm_validator.py
@@ -20,6 +20,7 @@
 from pathlib import Path
 import utility
 from report import WriteReport
+from distutils.util import strtobool
 
 # from validation.WKhtmlToPdf import  wkhtmltopdf
 # import utility
@@ -81,6 +82,11 @@
 parser.add_argument('-res', type=list, default=['Rigid bodies: 1 residue per bead.',
                                                 'Flexible regions: N/A'], help="Add information on model quality (molprobity or excluded volume)")
 
+parser.add_argument('--enable-sas', default=True, type=lambda x: bool(strtobool(x)),
+                        help="Run SAS validation")
+parser.add_argument('--enable-cx', default=False, type=lambda x: bool(strtobool(x)),
+                        help="Run crosslinking-MS validation")
+
 args = parser.parse_args()
 if args.p.upper() == 'YES':
     physics = [
@@ -155,9 +161,15 @@
 
 dirNames = {
     'root': str(output_path),
-    'html': str(Path(output_path, f'{output_prefix}_html')),
+    'root_html': str(Path(output_path, output_prefix)),
 }
 
+dirNames.update(
+    {
+        'html': str(Path(dirNames['root_html'], 'htmls')),
+    }
+)
+
 dirNames.update(
     {
         'images':  str(Path(dirNames['html'], 'images')),
@@ -185,7 +197,7 @@ def createdirs(dirNames: dict):
         if Path(name).is_dir():
             logging.info(f"Directory {name} already exists")
         else:
-            os.mkdir(name)
+            Path(name).mkdir(parents=True)
             logging.info(f"Directory {name} created ")
 
 
@@ -244,9 +256,14 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
 if __name__ == "__main__":
     from pyvirtualdisplay import Display
     from selenium import webdriver
-    display = Display(visible=0, size=(1024, 768))
-    display.start()
-    driver = webdriver.Firefox()
+    # display = Display(visible=0, size=(1024, 768))
+    # display.start()
+    # driver = webdriver.Firefox()
+
+    firefox_options = webdriver.FirefoxOptions()
+    firefox_options.add_argument('--headless')
+    driver = webdriver.Firefox(options=firefox_options)
+
 
     logging.info("Clean up and create output directories")
     utility.clean_all()
@@ -280,26 +297,42 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
     template_dict, molprobity_dict, exv_data = report.run_model_quality(
         template_dict, csvDirName=dirNames['csv'], htmlDirName=dirNames['html'])
 
-    logging.info("SAS validation")
-    template_dict, sas_data, sas_fit = report.run_sas_validation(template_dict)
+    if args.enable_sas:
+        logging.info("SAS validation")
+        template_dict, sas_data, sas_fit = report.run_sas_validation(template_dict)
+
+        logging.info("SAS validation plots")
+        report.run_sas_validation_plots(
+            template_dict, imageDirName=dirNames['images'])
 
-    logging.info("SAS validation plots")
-    report.run_sas_validation_plots(
-        template_dict, imageDirName=dirNames['images'])
+    else:
+        sas_data = {}
+        sas_fit = {}
 
     # uncomment below to run CX analysis
-    logging.info("CX validation")
-    template_dict, cx_data, cx_ertypes = report.run_cx_validation(template_dict)
-    cx_fit = None
+    if args.enable_cx:
+        logging.info("CX validation")
+        template_dict, cx_data, cx_ertypes = report.run_cx_validation(template_dict)
+        cx_fit = None
+
+        logging.info("CX validation plots")
+        report.run_cx_validation_plots(template_dict,
+                                       imageDirName=dirNames['images'])
 
-    logging.info("CX validation plots")
-    report.run_cx_validation_plots(template_dict,
-                                   imageDirName=dirNames['images'])
+    else:
+        cx_fit = None
 
     logging.info("Quality at a glance")
     report.run_quality_glance(
         molprobity_dict, exv_data, sas_data, sas_fit, cx_fit, imageDirName=dirNames['images'])
 
+    logging.info("Write PDF")
+    output_pdf = write_pdf(args.f, template_dict, template_pdf,
+              dirNames['pdf'], dirNames['pdf'])
+    shutil.copy(output_pdf, str(output_path))
+
+    template_dict['validation_pdf'] = Path(output_pdf).name
+
     logging.info("Supplementary table")
     template_dict = report.run_supplementary_table(template_dict,
                                                    location=args.ls,
@@ -315,6 +348,8 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
         args.f, template_dict, template_file_supp, dirNames['pdf'], dirNames['pdf'])
     shutil.copy(output_pdf, str(output_path))
 
+    template_dict['supplementary_pdf'] = Path(output_pdf).name
+
     # logging.info("Write JSON")
     # write_json(args.f, template_dict, dirNames['json'], dirNames['json'])
 
@@ -329,22 +364,18 @@ def write_json(mmcif_file: str, template_dict: dict, dirName: str, dirName_Outpu
         )
     # Compress html output to one file
     shutil.make_archive(
-        base_name=dirNames['html'],
         root_dir=output_path,
+        base_dir=output_prefix,
+        base_name=f'{dirNames["root_html"]}_html',
         format='gztar')
 
-    logging.info("Write PDF")
-    output_pdf = write_pdf(args.f, template_dict, template_pdf,
-              dirNames['pdf'], dirNames['pdf'])
-
-    shutil.copy(output_pdf, str(output_path))
 
     # Keep uncompressed html output for convience
     # otherwise delete
     if args.keep_html:
         pass
     else:
-        shutil.rmtree(dirNames['html'])
+        shutil.rmtree(dirNames['root_html'])
 
     logging.info("Final cleanup")
     utility.clean_all()
diff --git a/ihm_validation/mmcif_io.py b/ihm_validation/mmcif_io.py
@@ -112,23 +112,38 @@ def get_id_from_entry(self) -> str:
                     entry = entry_init.strip()
         return entry
 
-    def get_title(self) -> str:
-        """get title from citations """
-        title = None
+    def get_primary_citation_info(self) -> tuple:
+        '''get title and authors for the primary citation'''
+        title, authors = None, None
         for citation in self.system.citations:
             if citation.is_primary:
                 try:
                     title = citation.title
                 except AttributeError:
                     title = 'Title not available/Citation not provided'
-        return title
+
+                try:
+                    authors =  '; '.join(citation.authors)
+                except AttributeError:
+                    authors = 'Authors are not available/Citation not provided'
+
+        return (title, authors)
+
 
     def get_authors(self) -> str:
-        """get names of authors from citations """
-        cit = self.system.citations
-        if cit:
-            return '; '.join(cit[0].authors)
-        return 'Citation not present in file'
+        """get authors of the structure; fallback to authors of primary citation """
+        output = None
+        if len(self.system.authors) > 0:
+            output = '; '.join(self.system.authors)
+        elif len(self.system.citations) > 0:
+            for citation in self.system.citations:
+                if citation.is_primary and len(citation.authors) > 0:
+                    output =  '; '.join(citation.authors)
+
+        if output is None:
+            output = 'Authors are not available'
+
+        return output
 
     def get_struc_title(self) -> str:
         """get name of molecule"""

diff --git a/ihm_validation/molprobity.py b/ihm_validation/molprobity.py
@@ -82,7 +82,7 @@ def get_internal_version(self, tool: str = 'molprobity.clashscore') -> str:
 
         mp_core_path = Path(
             Path(mp_tool_path).parent,
-            '../../modules/molprobity/lib/core.php'
+            '../../molprobity/lib/core.php'
         )
 
         if mp_core_path.is_file():
@@ -201,10 +201,7 @@ def run_molprobity(self, d: dict):
 
         with open(f_name, 'w+') as f:
             run(['molprobity.molprobity', self._tempcif,
-                 # "disable_uc_volume_vs_n_atoms_check=True",
-                 # This is a legacy option and causes extremely
-                 # large memory consumption with recent
-                 # molprobity versions on PDB-Dev entries
+                 "disable_uc_volume_vs_n_atoms_check=True",
                  "coot=False"],
                 stdout=f,
                 cwd=self.cache)

diff --git a/ihm_validation/report.py b/ihm_validation/report.py
@@ -70,8 +70,10 @@ def run_entry_composition(self, Template_Dict: dict) -> dict:
         Template_Dict['ID_R'] = (
             self.input.get_id()[0:6]+'_'+self.input.get_id()[6:]).split()
         Template_Dict['Molecule'] = self.input.get_struc_title()
-        Template_Dict['Title'] = self.input.get_title()
         Template_Dict['Authors'] = self.input.get_authors()
+        title, authors = self.input.get_primary_citation_info()
+        Template_Dict['Citation_Title'] = title
+        Template_Dict['Citation_Authors'] = authors
         Template_Dict['Entry_list'] = utility.dict_to_JSlist(
             self.input.get_composition())
         Template_Dict['number_of_molecules'] = self.input.get_number_of_models()
@@ -510,10 +512,12 @@ def run_supplementary_table(self,
         #     Template_Dict['validation_input'] = validation_input
 
         validation_input = []
-        if Template_Dict['cx_stats_per_model']:
-            min_cx = min(Template_Dict['cx_stats_per_model'])
-            max_cx = max(Template_Dict['cx_stats_per_model'])
-            validation_input.append(f'Satisfaction of crosslinks: {min_cx:.2f}-{max_cx:.2f}%')
+        if 'cx_stats_per_model' in Template_Dict:
+            if Template_Dict['cx_stats_per_model']:
+                min_cx = min(Template_Dict['cx_stats_per_model'])
+                max_cx = max(Template_Dict['cx_stats_per_model'])
+                validation_input.append(f'Satisfaction of crosslinks: {min_cx:.2f}-{max_cx:.2f}%')
+
         if len(validation_input) == 0:
             validation_input.append('Fit of model to information used to compute it has not been determined')
         Template_Dict['validation_input'] = validation_input