Merge pull request #18 from theiagen/dev

* v2.2.0 updates * add source field and remove aminoglycosides from rrs * prevent future warning * make sure rifampicin is rifampin * update documentation and versions * remove duplicated thing
theiagen · Dec 11, 2024 · 6d83dc7 · 6d83dc7
2 parents b4a87e2 + cdcf302
commit 6d83dc7
Show file tree

Hide file tree

Showing 8 changed files with 18 additions and 16 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,7 @@
 # Shelby Bennett, Erin Young, Curtis Kapsak, & Kutluhan Incekara
 
 ARG SAMTOOLS_VER="1.18"
-ARG TBP_PARSER_VER="2.2.0"
+ARG TBP_PARSER_VER="2.2.1"
 
 FROM ubuntu:jammy AS builder
 
@@ -42,7 +42,7 @@ ARG TBP_PARSER_VER
 LABEL base.image="ubuntu:jammy"
 LABEL dockerfile.version="1"
 LABEL software="tbp-parser"
-LABEL software.version="2.2.0"
+LABEL software.version="2.1.1"
 LABEL description="tbp-parser and samtools"
 LABEL website="https://github.com/theiagen/tbp-parser"
 LABEL license="https://github.com/theiagen/tbp-parser/blob/main/LICENSE"

diff --git a/docs/inputs/theiaprok.md b/docs/inputs/theiaprok.md
@@ -29,7 +29,7 @@ The following optional inputs are also available for user modification on Terra:
 | `merlin_magic` | **tbp_parser_coverage_regions_bed** | File | A BED file containing the regions to calculate percent coverage for | [tbdb-modified-regions.md](https://github.com/theiagen/tbp-parser/blob/main/data/tbdb-modified-regions.bed) |
 | `merlin_magic` | **tbp_parser_coverage_threshold** | Int | The minimum percentage of a region that has depth above the threshold set by `min_depth` (used for a gene/locus to pass QC) | 100 |
 | `merlin_magic` | **tbp_parser_debug** | Boolean | Set to `false` to turn off debug mode for `tbp-parser` | `true` |
-| `merlin_magic` | **tbp_parser_docker_image** | String | The Docker image to use when running `tbp-parser` | "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.0" |
+| `merlin_magic` | **tbp_parser_docker_image** | String | The Docker image to use when running `tbp-parser` | "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.1" |
 | `merlin_magic` | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in `tbp-parser` | 0.1 |
 | `merlin_magic` | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied |  |
 | `merlin_magic` | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 |

diff --git a/docs/usage.md b/docs/usage.md
@@ -9,19 +9,19 @@ title: Getting Started
 We highly recommend using the following Docker iamge to run tbp-parser:
 
 ``` bash
-docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.0 #(1)!
+docker pull us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.1 #(1)!
 ```
 
 1. We host our Docker images on the Google Artifact Registry so that they are always availble for usage.
 
 The entrypoint for this Docker image is the `tbp-parser` help message. To run this container *interactively*, you can use the following command:
 
 ``` bash
-docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.0
+docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1
 
 # Once inside the container interactively, you can run the tbp-parser tool
 python3 /tbp-parser/tbp_parser/tbp_parser.py -v
-# v2.2.0
+# v2.2.1
 ```
 
 ### Locally with Python

diff --git a/tbp_parser/Coverage.py b/tbp_parser/Coverage.py
@@ -140,8 +140,10 @@ def reformat_coverage(self):
       except:
         self.logger.error("An expected gene ({}) was not found in laboratorian report.\nSomething may have gone wrong.".format(gene))
 
-      DF_COVERAGE = pd.concat([DF_COVERAGE, pd.DataFrame({"Gene": gene, "Percent_Coverage": percent_coverage, "Warning": warning}, index=[0])], ignore_index=True)
-
+      if len(DF_COVERAGE) == 0:
+        DF_COVERAGE = pd.DataFrame({"Gene": gene, "Percent_Coverage": percent_coverage, "Warning": warning}, index=[0])
+      else:
+        DF_COVERAGE = pd.concat([DF_COVERAGE, pd.DataFrame({"Gene": gene, "Percent_Coverage": percent_coverage, "Warning": warning}, index=[0])], ignore_index=True)
 
     if self.tngs:
       self.logger.debug("COV:Merging the tNGS expert rule regions coverage with the initial coverage report and renaming columns")

diff --git a/tbp_parser/LIMS.py b/tbp_parser/LIMS.py
@@ -281,7 +281,7 @@ def apply_lims_rules(self, gene_dictionary, DF_LIMS, max_mdl_resistance, antimic
               elif substitution not in mutations_per_gene[gene]:
                   mutations_per_gene[gene] = "{}; {}".format("".join(mutations_per_gene[gene]), substitution)
           else:
-            self.logger.debug("LIMS:This mutation (\"{}\", origin gene: {}) is not being added to the LIMS report because it is a non-rpoB RRDR \"S\" mutation".format(mutation, gene))
+            self.logger.debug("LIMS:This mutation (\"{}\", origin gene: {}) is not being added to the LIMS report because it is not an rpoB RRDR \"S\" mutation".format(mutation, gene))
 
         # Mutations for a particular gene have been added to the mutations_per_gene dictionary.
         # if that gene has mutations associated with it, we want to perform some additional filtration,

diff --git a/tbp_parser/Laboratorian.py b/tbp_parser/Laboratorian.py
@@ -56,7 +56,7 @@ def iterate_section(self, variant_section, row_list):
       # extract all of the annotations for the variant
       variant.extract_annotations()
 
-      self.logger.debug("LAB:The current variant (gene: {}) has {} annotations; now iterating through them".format(variant.gene_name, len(variant.annotation_dictionary)))
+      self.logger.debug("LAB:The current variant (gene: {}) has {} annotation(s); now iterating through them".format(variant.gene_name, len(variant.annotation_dictionary)))
       for annotation_row in variant.annotation_dictionary.values():
         # complete the row objects
         annotation_row.complete_row()

diff --git a/tbp_parser/Row.py b/tbp_parser/Row.py
@@ -51,7 +51,7 @@ def __init__(self, logger, variant, who_confidence, drug, gene_name=None, depth=
         self.tbprofiler_variant_substitution_type = self.variant.type
         self.tbprofiler_variant_substitution_nt = self.variant.nucleotide_change
         self.tbprofiler_variant_substitution_aa = self.variant.protein_change
-        self.logger.debug("ROW:This mutations is a {} with {} and {}".format(self.tbprofiler_variant_substitution_type, self.tbprofiler_variant_substitution_nt, self.tbprofiler_variant_substitution_aa))
+        self.logger.debug("ROW:This mutation is a {} with nucleotide change \"{}\" and protein change \"{}\"".format(self.tbprofiler_variant_substitution_type, self.tbprofiler_variant_substitution_nt, self.tbprofiler_variant_substitution_aa))
         # change blank aa substitutions to NA
         if self.tbprofiler_variant_substitution_aa == "":
           self.tbprofiler_variant_substitution_aa = "NA"
@@ -283,7 +283,6 @@ def complete_row(self):
 
     self.logger.debug("ROW:Interpretation logic applied or skipped; now removing any 'noexpert' suffixes")
     self.describe_rationale()
-    self.logger.debug("rationale = {}".format(self.rationale))
     self.logger.debug("ROW:Finished completing the row's values, now exiting function")
 
   def rank_annotation(self): 

diff --git a/tbp_parser/Variant.py b/tbp_parser/Variant.py
@@ -120,11 +120,14 @@ def extract_annotations(self):
 
       self.logger.debug("VAR:The annotation dictionary has all gene associated drugs included; it now has a length of {}".format(len(self.annotation_dictionary)))
 
+
     else:
       # possibilities 1b and 2: the annotation field has no content or the field does not exist
-      self.logger.debug("VAR:The annotation field has no content or does not exist. Now iterating through gene associated drugs.")
+      self.logger.debug("VAR:The annotation field has no content or does not exist. Now iterating through gene associated drugs and gene-drug combination dictionary.")
 
       for drug in self.gene_associated_drugs:
+        if drug == "rifampicin":
+          drug = "rifampin"
         self.annotation_dictionary[drug] = Row(self.logger, self, "No WHO annotation", drug)
 
       if self.gene_name in globals.GENE_TO_ANTIMICROBIAL_DRUG_NAME.keys():
@@ -140,7 +143,7 @@ def apply_expert_rules(self, interpretation_destination):
     """
     Apply rules 1-3 from the CDPH interpretation logic document regarding the interpretation of potential resistance mutations.
     """
-    self.logger.debug("VAR:Within the Variant class apply_expert_rules function")
+    self.logger.debug("VAR:Within the Variant class apply_expert_rules function for {}".format(interpretation_destination))
 
     position_nt = globals.get_position(self.nucleotide_change)
     position_aa = globals.get_position(self.protein_change)
@@ -216,8 +219,6 @@ def apply_expert_rules(self, interpretation_destination):
     # rules 2.2.2.1 and 3.2.2 & 3.2.3
     elif self.gene_name in ["gyrA", "gyrB", "rpoB"]: 
       self.logger.debug("VAR:The gene is {}, now checking if the position requires special consideration".format(self.gene_name))
-      self.logger.debug("VAR:length of aa: {}".format(len(position_aa)))
-      self.logger.debug("VAR:SEPCIAL POSITIONS: {}".format(globals.SPECIAL_POSITIONS[self.gene_name]))
 
       if globals.is_within_range(position_aa, globals.SPECIAL_POSITIONS[self.gene_name]):
         self.logger.debug("VAR:The position is within the special positions; interpretation is 'R' if rpoB (or 'U' if not) and nonsynonymous, else 'S'")