Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Summarize #276 #285

Merged
merged 32 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
de99763
update feature table TODO: access elements rather than data types in …
Clockwork-Rat Sep 7, 2023
d02d132
Implementation of add metadata and taxonomy to tabulate seqs
Clockwork-Rat Sep 11, 2023
cbb5ad4
Update to flake8 standards
Clockwork-Rat Sep 11, 2023
59515df
fix jinja syntax
Clockwork-Rat Sep 12, 2023
186220e
fix jinja syntax
Clockwork-Rat Sep 12, 2023
90e65fc
protect against metadata being none
Clockwork-Rat Sep 12, 2023
2f92a7a
add tests for new features
Clockwork-Rat Sep 12, 2023
88f201a
update test_optional_outputs and correct jinja syntaxt
Clockwork-Rat Sep 12, 2023
cb566a5
clean up
Clockwork-Rat Sep 12, 2023
9632014
add descriptions to new inputs and parameters
Clockwork-Rat Sep 12, 2023
6c67e49
fix indentation
Clockwork-Rat Sep 12, 2023
d2d0b87
adjust plugin setup and visualizer.py
Clockwork-Rat Sep 14, 2023
3f1801c
Update to final version
Clockwork-Rat Sep 18, 2023
5610b14
fix bug with union and intersect, add tests for union and intersect
Clockwork-Rat Sep 18, 2023
4067415
add test for strict failure
Clockwork-Rat Sep 18, 2023
1de93cc
fix merge conflict
Clockwork-Rat Sep 19, 2023
2998b30
fix merge conflict
Clockwork-Rat Sep 19, 2023
226e585
add sorting to all columns
Clockwork-Rat Sep 19, 2023
4eee7ce
add ability to name taxonomies
Clockwork-Rat Sep 20, 2023
dd3684c
fix strict failure test
Clockwork-Rat Sep 20, 2023
b536aaa
add test for taxonomy name appearing
Clockwork-Rat Sep 20, 2023
3adcd73
Add multiple Taxonomies to union test
Clockwork-Rat Sep 21, 2023
3861ea4
Fix collection representation
Clockwork-Rat Sep 21, 2023
566dfc0
try fix sort
Clockwork-Rat Sep 21, 2023
9bf45d5
add dash to blank lines
Clockwork-Rat Sep 21, 2023
a3cbe74
Fix testing data
Clockwork-Rat Sep 21, 2023
e55d7db
small update
Clockwork-Rat Sep 21, 2023
375acff
add choices to tabulate_seqs
Clockwork-Rat Sep 25, 2023
bff07b0
switch sequence and sequence length headers back
Clockwork-Rat Sep 25, 2023
7282930
add usage examples for tabulate_seqs including single and multiple ta…
Clockwork-Rat Sep 26, 2023
84f273b
add test data
Clockwork-Rat Sep 26, 2023
0d78058
update test data names
Clockwork-Rat Sep 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions q2_feature_table/_summarize/_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,47 @@

def tabulate_seqs(output_dir: str, data: DNAIterator,
taxonomy: pd.DataFrame = None,
metadata: qiime2.Metadata = None) -> None:
sequences = []
metadata: qiime2.Metadata = None,
merge_method: str = 'strict') -> None:

display_sequences = set()
sequences = {}
seq_lengths = []
with open(os.path.join(output_dir, 'sequences.fasta'), 'w') as fh:
for sequence in data:
skbio.io.write(sequence, format='fasta', into=fh)
str_seq = str(sequence)
seq_len = len(str_seq)
sequences.append({'id': sequence.metadata['id'],
'len': seq_len,
'url': _blast_url_template % str_seq,
'seq': str_seq})
display_sequences.add(sequence.metadata['id'])
sequences[sequence.metadata['id']]\
= {'len': seq_len,
'url': _blast_url_template % str_seq,
'seq': str_seq}
seq_lengths.append(seq_len)

if metadata is not None:
metadata_df = metadata.to_dataframe()
if merge_method == 'union':
display_sequences = display_sequences.union(metadata_df.index)
elif merge_method == 'intersect':
display_sequences = display_sequences.intersection(
metadata_df.index)
elif merge_method == 'strict':
if set(metadata_df.index) != display_sequences:
raise Exception('Merge method is strict and IDs do not match')
if taxonomy is not None:
for name in taxonomy.keys():
if merge_method == 'union':
display_sequences = display_sequences.union(
taxonomy[name][0].index)
elif merge_method == 'intersect':
display_sequences = display_sequences.intersection(
taxonomy[name][0].index)
elif merge_method == 'strict':
if set(taxonomy[name][0].index) != display_sequences:
raise Exception('Merge method is strict and IDs do not \
match')
Oddant1 marked this conversation as resolved.
Show resolved Hide resolved

seq_len_stats = _compute_descriptive_stats(seq_lengths)
_write_tsvs_of_descriptive_stats(seq_len_stats, output_dir)

Expand All @@ -54,7 +81,9 @@ def tabulate_seqs(output_dir: str, data: DNAIterator,
if taxonomy is not None:
context['taxonomy'] = taxonomy
if metadata is not None:
context['metadata'] = metadata.to_dataframe()
context['metadata'] = metadata_df
context['display_sequences'] = display_sequences
context['union'] = True if merge_method == 'union' else False
q2templates.render(index, output_dir, context=context)

js = os.path.join(
Expand Down
40 changes: 28 additions & 12 deletions q2_feature_table/_summarize/tabulate_seqs_assets/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -92,32 +92,48 @@ <h3>Sequence Table</h3>
<th data-tsorter="numeric">Sequence Length</th>
<th data-tsorter="link">Sequence</th>
{% if taxonomy is defined %}
{% for col in taxonomy.columns %}
<th>{{ col }}</th>
{% for name in taxonomy.keys() %}
<th data-tsorter="default">{{name}}</th>
{% endfor %}
{% endif %}
{% if metadata is defined %}
{% for name in metadata.columns %}
<th> {{ name }} </th>
<th data-tsorter="default">{{ name }}</th>
{% endfor %}
{% endif %}
</tr>
</thead>
<tbody>
{% for sequence in data %}
{% for sequence in display_sequences %}
<tr>
<td>{{ sequence.id }}</td>
<td>{{ sequence.len }}</td>
<td><samp><a target="_blank" href="{{ sequence.url }}" rel="noopener noreferrer">{{ sequence.seq }}</a></samp></td>
<td>{{ sequence }}</td>
{% if sequence in data %}
<td><samp><a target="_blank" href="{{ sequence.url }}" rel="noopener noreferrer">{{ data[sequence].seq }}</a></samp></td>
<td>{{ data[sequence].len }}</td>
{% else %}
<td></td>
<td></td>
{% endif %}
{% if taxonomy is defined %}
{% for col in taxonomy.columns %}
<td>{{ taxonomy.loc[sequence.id, col] }}</td>
{% for name in taxonomy.columns %}
{% set member = taxonomy[name][0] %}
{% if sequence in member.index %}
<td>{{ member.loc[sequence, "Taxon"] }}</td>
{% else %}
<td></td>
{% endif %}
{% endfor %}
{% endif %}
{% if metadata is defined %}
{% for name in metadata.columns %}
<td>{{ metadata.loc[sequence.id, name] }}</td>
{% endfor %}
{% if sequence in metadata.index %}
{% for name in metadata.columns %}
<td>{{ metadata.loc[sequence, name] }}</td>
{% endfor %}
{% else %}
{%for name in metadata.columns %}
<td></td>
{% endfor %}
{% endif %}
{% endif %}
</tr>
{% endfor %}
Expand Down
9 changes: 6 additions & 3 deletions q2_feature_table/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,14 +509,17 @@
plugin.visualizers.register_function(
function=q2_feature_table.tabulate_seqs,
inputs={'data': FeatureData[Sequence | AlignedSequence],
'taxonomy': FeatureData[Taxonomy]},
parameters={'metadata': Metadata},
'taxonomy': Collection[FeatureData[Taxonomy]]},
parameters={'metadata': Metadata, 'merge_method': Str},
input_descriptions={
'data': 'The feature sequences to be tabulated.',
'taxonomy': 'The taxonomic classifications of the tabulated features.'
},
parameter_descriptions={
'metadata': 'Any additional metadata for the tabulated features.'},
'metadata': 'Any additional metadata for the tabulated features.',
'merge_method':
'Method that joins data sets: Strict(default), Union, Intersection'
},
name='View sequence associated with each feature',
description="Generate tabular view of feature identifier to sequence "
"mapping, including links to BLAST each sequence against "
Expand Down
147 changes: 144 additions & 3 deletions q2_feature_table/tests/test_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ def test_optional_inputs(self):
columns=['Taxon', 'Confidence'])

metadata = qiime2.Metadata(metadata)
taxonomy = {"Taxon Name": [taxonomy]}
taxonomy = pd.DataFrame.from_dict(taxonomy)

with tempfile.TemporaryDirectory() as output_dir:
tabulate_seqs(output_dir, seqs, metadata=metadata,
Expand All @@ -210,10 +212,149 @@ def test_optional_inputs(self):
self.assertTrue('<td>3</td>' in file_text)
self.assertTrue('<td>4</td>' in file_text)
self.assertTrue('<td>6</td>' in file_text)
self.assertTrue('<td>1.0</td>' in file_text)
self.assertTrue('<td>a;b;e;d</td>' in file_text)
self.assertTrue('<td>a;b;c;d</td>' in file_text)
self.assertTrue('<td>0.4</td>' in file_text)
self.assertTrue('<td>0.7</td>' in file_text)
self.assertTrue('<td>08</td>' in file_text)
self.assertTrue('<td>17</td>' in file_text)

def test_optional_input_union(self):
seqs = DNAIterator(skbio.DNA(a, metadata=b)for a, b in (
('A', {'id': 'seq01'}),
('AA', {'id': 'seq02'}),
('AAA', {'id': 'seq03'}),
('AAAA', {'id': 'seq04'}),
('AAAA', {'id': 'seq05'}),
('AAA', {'id': 'seq06'}),
('AA', {'id': 'seq07'}),
('AAAAAAAAAA', {'id': 'seq08'})))

metadata = pd.DataFrame(index=['seq01', 'seq02',
'seq15', 'seq04',
'seq05', 'seq90',
'seq48', 'seq08'],
columns=['att1', 'att2'],
data=[['00', '01'], ['10', '11'],
['03', '04'], ['12', '13'],
['05', '06'], ['14', '15'],
['07', '08'], ['16', '17']])
metadata.index.name = 'feature id'

taxonomy = pd.DataFrame([('a;b;c;d', '1.0'), ('a;b;c;f', '0.7'),
Clockwork-Rat marked this conversation as resolved.
Show resolved Hide resolved
('a;b;h;d', '0.3'), ('a;b;d;f', '0.7'),
('a;b;e;d', '0.4'), ('a;b;c;f', '0.6'),
('a;b;t;d', '1.0'), ('a;b;d;f', '0.5')],
index=['seq17', 'seq02', 'seq03', 'seq48',
'seq05', 'seq19', 'seq07', 'seq08'],
columns=['Taxon', 'Confidence'])

metadata = qiime2.Metadata(metadata)
taxonomy = {"Taxon Name": [taxonomy]}
taxonomy = pd.DataFrame.from_dict(taxonomy)
Oddant1 marked this conversation as resolved.
Show resolved Hide resolved

with tempfile.TemporaryDirectory() as output_dir:
tabulate_seqs(output_dir, seqs, metadata=metadata,
taxonomy=taxonomy, merge_method="union")
expected_fp = os.path.join(output_dir, 'index.html')
with open(expected_fp) as fh:
file_text = fh.read()
self.assertTrue('<td>8</td>' in file_text)
self.assertTrue('<td>1</td>' in file_text)
self.assertTrue('<td>10</td>' in file_text)
self.assertTrue('<td>3.62</td>' in file_text)
self.assertTrue('<td>9</td>' in file_text)
self.assertTrue('<td>1</td>' in file_text)
self.assertTrue('seq17' in file_text)
self.assertTrue('<td>2</td>' in file_text)
self.assertTrue('<td>3</td>' in file_text)
self.assertTrue('<td>4</td>' in file_text)
self.assertTrue('<td>6</td>' in file_text)
self.assertTrue('<td>a;b;e;d</td>' in file_text)
self.assertTrue('<td>a;b;c;d</td>' in file_text)
self.assertTrue('<td>08</td>' in file_text)
self.assertTrue('Taxon Name' in file_text)

def test_optional_inputs_intersect(self):
seqs = DNAIterator(skbio.DNA(a, metadata=b)for a, b in (
('A', {'id': 'seq01'}),
('AA', {'id': 'seq02'}),
('AAA', {'id': 'seq03'}),
('AAAA', {'id': 'seq04'}),
('AAAA', {'id': 'seq05'}),
('AAA', {'id': 'seq06'}),
('AA', {'id': 'seq07'}),
('AAAAAAAAAA', {'id': 'seq08'})))

metadata = pd.DataFrame(index=['seq01', 'seq02',
'seq15', 'seq04',
'seq05', 'seq90',
'seq48', 'seq08'],
columns=['att1', 'att2'],
data=[['00', '01'], ['10', '11'],
['03', '04'], ['12', '13'],
['05', '06'], ['14', '15'],
['07', '08'], ['16', '17']])
metadata.index.name = 'feature id'

taxonomy = pd.DataFrame([('a;b;c;d', '1.0'), ('a;b;c;f', '0.7'),
('a;b;h;d', '0.3'), ('a;b;d;f', '0.7'),
('a;b;e;d', '0.4'), ('a;b;c;f', '0.6'),
('a;b;t;d', '1.0'), ('a;b;d;f', '0.5')],
index=['seq17', 'seq02', 'seq03', 'seq48',
'seq05', 'seq19', 'seq07', 'seq08'],
columns=['Taxon', 'Confidence'])

metadata = qiime2.Metadata(metadata)
taxonomy = {"Taxon Name": [taxonomy]}
taxonomy = pd.DataFrame.from_dict(taxonomy)

with tempfile.TemporaryDirectory() as output_dir:
tabulate_seqs(output_dir, seqs, metadata=metadata,
taxonomy=taxonomy, merge_method="intersect")
expected_fp = os.path.join(output_dir, 'index.html')
with open(expected_fp) as fh:
file_text = fh.read()
self.assertFalse('<td>a;b;c;d</td>' in file_text)
self.assertFalse('<td>14</td>' in file_text)
self.assertTrue('<td>10</td>' in file_text)

def test_optional_input_strict_fail(self):
seqs = DNAIterator(skbio.DNA(a, metadata=b)for a, b in (
('A', {'id': 'seq01'}),
('AA', {'id': 'seq02'}),
('AAA', {'id': 'seq03'}),
('AAAA', {'id': 'seq04'}),
('AAAA', {'id': 'seq05'}),
('AAA', {'id': 'seq06'}),
('AA', {'id': 'seq07'}),
('AAAAAAAAAA', {'id': 'seq08'})))

metadata = pd.DataFrame(index=['seq01', 'seq02',
'seq15', 'seq04',
'seq05', 'seq90',
'seq48', 'seq08'],
columns=['att1', 'att2'],
data=[['00', '01'], ['10', '11'],
['03', '04'], ['12', '13'],
['05', '06'], ['14', '15'],
['07', '08'], ['16', '17']])
metadata.index.name = 'feature id'

taxonomy = pd.DataFrame([('a;b;c;d', '1.0'), ('a;b;c;f', '0.7'),
('a;b;h;d', '0.3'), ('a;b;d;f', '0.7'),
('a;b;e;d', '0.4'), ('a;b;c;f', '0.6'),
('a;b;t;d', '1.0'), ('a;b;d;f', '0.5')],
index=['seq17', 'seq02', 'seq03', 'seq48',
'seq05', 'seq19', 'seq07', 'seq08'],
columns=['Taxon', 'Confidence'])
metadata = qiime2.Metadata(metadata)
taxonomy = {"Taxon Name": [taxonomy]}
taxonomy = pd.DataFrame.from_dict(taxonomy)

with self.assertRaisesRegex(Exception, "Merge method is strict"):
with tempfile.TemporaryDirectory() as output_dir:
tabulate_seqs(output_dir, seqs, metadata=metadata,
taxonomy=taxonomy)
# Did not error out, this is a problem


class SummarizeTests(TestCase):
Expand Down