Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev gcgi 1481 raw coverage autofill #504

Merged
merged 6 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased
- GCGI-1461: Fix output paths in calls to get_logger
- GCGI-1481: Fix raw coverage auto-population to exclude normal samples before selection in TAR assay

## 1.7.8: 2024-12-12
- GCGI-1464: Standalone script to diff two Djerba JSON reports
Expand Down
74 changes: 47 additions & 27 deletions src/lib/djerba/plugins/tar/sample/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,28 @@
raise RuntimeError('QC-ETL import failure! Try checking python versions') from err

class main(plugin_base):

PLUGIN_VERSION = '1.0.0'
QCETL_CACHE = "/scratch2/groups/gsi/production/qcetl_v1"

def configure(self, config):
config = self.apply_defaults(config)
wrapper = self.get_config_wrapper(config)

# Get input_data.json if it exists; else return None
input_data = self.workspace.read_maybe_input_params()

# Get various IDs
keys = [constants.ONCOTREE, constants.KNOWN_VARIANTS, constants.SAMPLE_TYPE]
key_mapping = {k:k for k in keys} # mapping from INI keys to input_params.json keys
key_mapping = {k: k for k in keys} # mapping from INI keys to input_params.json keys
key_mapping[constants.GROUP_ID] = constants.TUMOUR_ID
for key,val in key_mapping.items():
for key, val in key_mapping.items():
if wrapper.my_param_is_null(key):
if input_data != None:
wrapper.set_my_param(key, input_data[val])
else:
msg = "Cannot find {0} in manual config or input_params.json".format(key)
self.logger.error(msg)
raise RuntimeError(msg)


# Get files from path_info.json
wrapper = self.update_wrapper_if_null(
Expand Down Expand Up @@ -69,12 +67,14 @@ def configure(self, config):

# Get values for collapsed coverage for Pl and BC and put in config for QC reporting
if wrapper.my_param_is_null(constants.COVERAGE_PL):
wrapper.set_my_param(constants.COVERAGE_PL, self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_FILE]))
wrapper.set_my_param(constants.COVERAGE_PL,
self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_FILE]))
if wrapper.my_param_is_null(constants.COVERAGE_BC):
wrapper.set_my_param(constants.COVERAGE_BC, self.process_consensus_cruncher(config[self.identifier][constants.CONSENSUS_NORMAL_FILE]))

wrapper.set_my_param(constants.COVERAGE_BC, self.process_consensus_cruncher(
config[self.identifier][constants.CONSENSUS_NORMAL_FILE]))

return wrapper.get_config()

def extract(self, config):
wrapper = self.get_config_wrapper(config)
work_dir = self.workspace.get_work_dir()
Expand All @@ -90,33 +90,53 @@ def extract(self, config):

# If purity is <10%, only report as <10% (not exact number)
purity = float(purity)
rounded_purity = round(purity*100, 1)
rounded_purity = round(purity * 100, 1)
if rounded_purity < 10:
rounded_purity = "<10"

results = {
constants.ONCOTREE: config[self.identifier][constants.ONCOTREE],
constants.KNOWN_VARIANTS : config[self.identifier][constants.KNOWN_VARIANTS],
constants.SAMPLE_TYPE : config[self.identifier][constants.SAMPLE_TYPE],
constants.CANCER_CONTENT : rounded_purity,
constants.RAW_COVERAGE : int(config[self.identifier][constants.RAW_COVERAGE]),
constants.UNIQUE_COVERAGE : int(config[self.identifier][constants.COVERAGE_PL]),
}
results = {
constants.ONCOTREE: config[self.identifier][constants.ONCOTREE],
constants.KNOWN_VARIANTS: config[self.identifier][constants.KNOWN_VARIANTS],
constants.SAMPLE_TYPE: config[self.identifier][constants.SAMPLE_TYPE],
constants.CANCER_CONTENT: rounded_purity,
constants.RAW_COVERAGE: int(config[self.identifier][constants.RAW_COVERAGE]),
constants.UNIQUE_COVERAGE: int(config[self.identifier][constants.COVERAGE_PL]),
}
data['results'] = results
return data

def fetch_coverage_etl_data(self, group_id):
etl_cache = QCETLCache(self.QCETL_CACHE)
cached_coverages = etl_cache.hsmetrics.metrics
columns_of_interest = gsiqcetl.column.HsMetricsColumn
data = cached_coverages.loc[ (cached_coverages[columns_of_interest.GroupID] == group_id), [columns_of_interest.GroupID, columns_of_interest.MeanBaitCoverage] ]

# Filter data for the group_id
data = cached_coverages.loc[
(cached_coverages[columns_of_interest.GroupID] == group_id),
[
columns_of_interest.GroupID,
columns_of_interest.MeanBaitCoverage,
columns_of_interest.TissueType,
]
]

qc_dict = {}
if len(data) > 0:
qc_dict[constants.RAW_COVERAGE] = int(round(data.iloc[0][columns_of_interest.MeanBaitCoverage].item(),0))
# Exclude the reference
filtered_data = data[data[columns_of_interest.TissueType] != 'R']

if len(filtered_data) > 0:
# Take the first row of the filtered data
iainrb marked this conversation as resolved.
Show resolved Hide resolved
selected_value = filtered_data.iloc[0][columns_of_interest.MeanBaitCoverage]
qc_dict[constants.RAW_COVERAGE] = int(round(selected_value, 0))
else:
msg = f"No valid QC metrics found for group_id {group_id} after filtering out the normal."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please log the error with self.logger.error(msg) before calling raise

raise MissingQCETLError(msg)
else:
msg = "QC metrics associated with group_id {0} not found in QC-ETL and no value found in .ini ".format(group_id)
msg = f"QC metrics associated with group_id {group_id} not found in QC-ETL and no value found in .ini."
raise MissingQCETLError(msg)
return(qc_dict)

return qc_dict

def render(self, data):
renderer = mako_renderer(self.get_module_dir())
Expand All @@ -125,22 +145,22 @@ def render(self, data):
def process_ichor_json(self, ichor_metrics):
with open(ichor_metrics, 'r') as ichor_results:
ichor_json = json.load(ichor_results)
return(ichor_json)
return (ichor_json)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No parentheses needed here -- return ichor_json works and is simpler


def process_consensus_cruncher(self, consensus_cruncher_file):
header_line = False
with open(consensus_cruncher_file, 'r') as cc_file:
reader_file = csv.reader(cc_file, delimiter="\t")
for row in reader_file:
if row:
if row[0] == "BAIT_SET" :
if row[0] == "BAIT_SET":
header_line = True
elif header_line:
unique_coverage = float(row[9])
unique_coverage = float(row[9])
header_line = False
else:
next
return(int(round(unique_coverage, 0)))
return (int(round(unique_coverage, 0)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, outermost parentheses are not needed -- can have return int(round(unique_coverage, 0))


def specify_params(self):
discovered = [
Expand Down
Loading