oicr-gsi · OumaimaHamza · Jan 9, 2025 · Dec 19, 2024 · Dec 19, 2024 · Jan 7, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Unreleased
 - GCGI-1461: Fix output paths in calls to get_logger
+- GCGI-1481: Fix raw coverage auto-population to exclude normal samples before selection in TAR assay
 
 ## 1.7.8: 2024-12-12
 - GCGI-1464: Standalone script to diff two Djerba JSON reports

diff --git a/src/lib/djerba/plugins/tar/sample/plugin.py b/src/lib/djerba/plugins/tar/sample/plugin.py
@@ -18,30 +18,28 @@
     raise RuntimeError('QC-ETL import failure! Try checking python versions') from err
 
 class main(plugin_base):
-
     PLUGIN_VERSION = '1.0.0'
     QCETL_CACHE = "/scratch2/groups/gsi/production/qcetl_v1"
-    
+
     def configure(self, config):
         config = self.apply_defaults(config)
         wrapper = self.get_config_wrapper(config)
-        
+
         # Get input_data.json if it exists; else return None
         input_data = self.workspace.read_maybe_input_params()
 
         # Get various IDs
         keys = [constants.ONCOTREE, constants.KNOWN_VARIANTS, constants.SAMPLE_TYPE]
-        key_mapping = {k:k for k in keys} # mapping from INI keys to input_params.json keys
+        key_mapping = {k: k for k in keys}  # mapping from INI keys to input_params.json keys
         key_mapping[constants.GROUP_ID] = constants.TUMOUR_ID
-        for key,val in key_mapping.items():
+        for key, val in key_mapping.items():
             if wrapper.my_param_is_null(key):
                 if input_data != None:
                     wrapper.set_my_param(key, input_data[val])
                 else:
                     msg = "Cannot find {0} in manual config or input_params.json".format(key)
                     self.logger.error(msg)
                     raise RuntimeError(msg)
-
 
         # Get files from path_info.json
         wrapper = self.update_wrapper_if_null(
@@ -69,12 +67,14 @@ def configure(self, config):
 
         # Get values for collapsed coverage for Pl and BC and put in config for QC reporting
         if wrapper.my_param_is_null(constants.COVERAGE_PL):
-            wrapper.set_my_param(constants.COVERAGE_PL, self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_FILE]))
+            wrapper.set_my_param(constants.COVERAGE_PL,
+                                 self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_FILE]))
         if wrapper.my_param_is_null(constants.COVERAGE_BC):
-            wrapper.set_my_param(constants.COVERAGE_BC, self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_NORMAL_FILE]))
-
+            wrapper.set_my_param(constants.COVERAGE_BC, self.process_consensus_cruncher(
+                config[self.identifier][constants.CONSENSUS_NORMAL_FILE]))
+
         return wrapper.get_config()
-    
+
     def extract(self, config):
         wrapper = self.get_config_wrapper(config)
         work_dir = self.workspace.get_work_dir()
@@ -90,33 +90,53 @@ def extract(self, config):
 
         # If purity is <10%, only report as <10% (not exact number)
         purity = float(purity)
-        rounded_purity = round(purity*100, 1)
+        rounded_purity = round(purity * 100, 1)
         if rounded_purity < 10:
             rounded_purity = "<10"
 
-        results =  {
-                constants.ONCOTREE: config[self.identifier][constants.ONCOTREE],
-                constants.KNOWN_VARIANTS : config[self.identifier][constants.KNOWN_VARIANTS],
-                constants.SAMPLE_TYPE : config[self.identifier][constants.SAMPLE_TYPE],
-                constants.CANCER_CONTENT : rounded_purity,
-                constants.RAW_COVERAGE : int(config[self.identifier][constants.RAW_COVERAGE]),
-                constants.UNIQUE_COVERAGE : int(config[self.identifier][constants.COVERAGE_PL]),
-            }
+        results = {
+            constants.ONCOTREE: config[self.identifier][constants.ONCOTREE],
+            constants.KNOWN_VARIANTS: config[self.identifier][constants.KNOWN_VARIANTS],
+            constants.SAMPLE_TYPE: config[self.identifier][constants.SAMPLE_TYPE],
+            constants.CANCER_CONTENT: rounded_purity,
+            constants.RAW_COVERAGE: int(config[self.identifier][constants.RAW_COVERAGE]),
+            constants.UNIQUE_COVERAGE: int(config[self.identifier][constants.COVERAGE_PL]),
+        }
         data['results'] = results
         return data
 
     def fetch_coverage_etl_data(self, group_id):
         etl_cache = QCETLCache(self.QCETL_CACHE)
         cached_coverages = etl_cache.hsmetrics.metrics
         columns_of_interest = gsiqcetl.column.HsMetricsColumn
-        data = cached_coverages.loc[ (cached_coverages[columns_of_interest.GroupID] == group_id),  [columns_of_interest.GroupID, columns_of_interest.MeanBaitCoverage] ]
+
+        # Filter data for the group_id
+        data = cached_coverages.loc[
+            (cached_coverages[columns_of_interest.GroupID] == group_id),
+            [
+                columns_of_interest.GroupID,
+                columns_of_interest.MeanBaitCoverage,
+                columns_of_interest.TissueType,
+            ]
+        ]
+
         qc_dict = {}
         if len(data) > 0:
-           qc_dict[constants.RAW_COVERAGE] = int(round(data.iloc[0][columns_of_interest.MeanBaitCoverage].item(),0))
+            # Exclude the reference
+            filtered_data = data[data[columns_of_interest.TissueType] != 'R']
+
+            if len(filtered_data) > 0:
+                # Take the first row of the filtered data
+                selected_value = filtered_data.iloc[0][columns_of_interest.MeanBaitCoverage]
+                qc_dict[constants.RAW_COVERAGE] = int(round(selected_value, 0))
+            else:
+                msg = f"No valid QC metrics found for group_id {group_id} after filtering out the normal."
+                raise MissingQCETLError(msg)
         else:
-            msg = "QC metrics associated with group_id {0} not found in QC-ETL and no value found in .ini ".format(group_id)
+            msg = f"QC metrics associated with group_id {group_id} not found in QC-ETL and no value found in .ini."
             raise MissingQCETLError(msg)
-        return(qc_dict)
+
+        return qc_dict
 
     def render(self, data):
         renderer = mako_renderer(self.get_module_dir())
@@ -125,22 +145,22 @@ def render(self, data):
     def process_ichor_json(self, ichor_metrics):
         with open(ichor_metrics, 'r') as ichor_results:
             ichor_json = json.load(ichor_results)
-        return(ichor_json)
+        return (ichor_json)
 
     def process_consensus_cruncher(self, consensus_cruncher_file):
         header_line = False
         with open(consensus_cruncher_file, 'r') as cc_file:
             reader_file = csv.reader(cc_file, delimiter="\t")
             for row in reader_file:
                 if row:
-                    if row[0] == "BAIT_SET" :
+                    if row[0] == "BAIT_SET":
                         header_line = True
                     elif header_line:
-                        unique_coverage = float(row[9]) 
+                        unique_coverage = float(row[9])
                         header_line = False
                     else:
                         next
-        return(int(round(unique_coverage, 0)))
+        return (int(round(unique_coverage, 0)))
 
     def specify_params(self):
         discovered = [