Skip to content

Commit

Permalink
added nextclade_percentN_spike column to datatables
Browse files Browse the repository at this point in the history
- based on nextclade `missing` output
- empty if nextclade output empty
- HTML report untouched
  • Loading branch information
MarieLataretu committed Aug 14, 2024
1 parent f86681e commit d6333ab
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions bin/summary_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,25 @@ def add_nextclade_results(self, nextclade_results):
# N information
self.add_column_raw('nextclade_missing', res_data["missing"])

# N percentage information in spike
# inspired by https://github.com/nextstrain/nextclade/issues/715
def get_percent_N_in_region(positions_and_ranges, positions_of_interest=set(range(int(21563), int(25384)+1))):
# Nextclade: ranges are closed (they include both left and right boundaries)
# python: ranges: start inclusive, end exclusive
# get missing positions (= Ns) from nextclade missing output
missing_set = set()
if positions_and_ranges == 'nan' or positions_and_ranges == '':
return ''
else:
for region in positions_and_ranges.split(','):
if '-' not in region:
missing_set.update([int(region)])
elif '-' in region:
missing_set.update(range(int(region.split('-')[0]), int(region.split('-')[1])+1))
# intersect the sets to get all positions of interest that are missing
return len(missing_set & positions_of_interest)/len(positions_of_interest)*100
self.add_column_raw('nextclade_percentN_spike', res_data['missing'].apply(lambda x: get_percent_N_in_region(str(x))))

res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']]
res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']]
res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']]
Expand Down

0 comments on commit d6333ab

Please sign in to comment.