Skip to content

Commit

Permalink
typemap properties and plugin setup
Browse files Browse the repository at this point in the history
  • Loading branch information
VinzentRisch committed Jul 22, 2024
1 parent 6ba8359 commit 9b4458e
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 28 deletions.
4 changes: 2 additions & 2 deletions q2_feature_table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from ._normalize import rarefy
from ._normalize import rarefy, normalize
from ._subsample_ids import subsample_ids
from ._transform import (presence_absence, relative_frequency, transpose)
from ._summarize import (summarize, tabulate_seqs, tabulate_sample_frequencies,
Expand All @@ -31,4 +31,4 @@
'filter_seqs', 'subsample_ids', 'rename_ids',
'filter_features_conditionally', 'split',
'tabulate_feature_frequencies', 'tabulate_sample_frequencies',
'summarize_plus']
'summarize_plus', 'normalize']
25 changes: 15 additions & 10 deletions q2_feature_table/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,24 @@ def _validate_parameters(method, m_trim, a_trim, gene_length):
if method in ["tpm", "fpkm"] and not gene_length:
raise ValueError("gene-length input is missing.")

# Raise Error if gene-length is given when using methods TMM, UQ, CUF, CPM or CTF
# Raise Error if gene-length is given when using methods TMM, UQ, CUF,
# CPM or CTF
if method in ["tmm", "uq", "cuf", "ctf", "cpm"] and gene_length:
raise ValueError(
"gene-length input can only be used with FPKM and TPM methods."
)

# Raise Error if m_trim or a_trim are given when not using methods TMM or CTF
if (method not in ["tmm", "ctf"]) and (m_trim is not None or a_trim is not None):
# Raise Error if m_trim or a_trim are given when not using methods
# TMM or CTF
if ((method not in ["tmm", "ctf"])
and (m_trim is not None or a_trim is not None)):
raise ValueError(
"Parameters m-trim and a-trim can only be used with methods TMM and CTF."
"Parameters m-trim and a-trim can only be used with "
"methods TMM and CTF."
)

# Set m_trim and a_trim to their default values for methods TMM and CTF
# Set m_trim and a_trim to their default values for methods
# TMM and CTF
if method in ["tmm", "ctf"]:
m_trim = 0.3 if m_trim is None else m_trim
a_trim = 0.05 if a_trim is None else a_trim
Expand All @@ -106,13 +111,13 @@ def _convert_lengths(table, gene_length):
skiprows=1,
).squeeze("columns")

# Check if all gene IDs that are present in the table are also present in
# the lengths
# Check if all gene IDs that are present in the table are also
# present in the lengths
if not set(table.columns).issubset(set(lengths.index)):
only_in_counts = set(table.columns) - set(lengths.index)
raise ValueError(
f"There are genes present in the FeatureTable that are not present "
f"in the gene-length input. Missing lengths for genes: "
f"{only_in_counts}"
f"There are genes present in the FeatureTable that are "
f"not present in the gene-length input. Missing lengths "
f"for genes: {only_in_counts}"
)
return lengths
9 changes: 9 additions & 0 deletions q2_feature_table/citations.bib
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,12 @@ @article{NCBI-BLAST
year = {2008},
doi = {10.1093/nar/gkn201},
}

@misc{Zmrzlikar_RNAnorm_RNA-seq_data_2023,
author = {Zmrzlikar, Jure and Žganec, Matjaž and Ausec, Luka and Štajdohar, Miha},
month = jul,
title = {{RNAnorm: RNA-seq data normalization in Python}},
url = {https://github.com/genialis/RNAnorm},
version = {2.1.0},
year = {2023}
}
81 changes: 78 additions & 3 deletions q2_feature_table/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from qiime2.core.type import Properties
from qiime2.plugin import (Plugin, Int, Float, Range, Metadata, Str, Bool,
Choices, MetadataColumn, Categorical, List,
Citations, TypeMatch, TypeMap, Collection,
Visualization)

from q2_types.feature_table import (
FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Composition)
FeatureTable, Frequency, RelativeFrequency, PresenceAbsence, Composition,
Normalized)
from q2_types.feature_data import (
FeatureData, Sequence, Taxonomy, AlignedSequence)
FeatureData, Sequence, Taxonomy, AlignedSequence, SequenceCharacteristics)
from q2_types.metadata import ImmutableMetadata

import q2_feature_table
Expand Down Expand Up @@ -723,3 +724,77 @@
"Tabulate sample and feature frequencies.",
examples={'feature_table_summarize_plus': ex.feature_table_summarize_plus}
)

P_method, T_normalized_table = TypeMap(
{
Str
% Choices("tpm"): (
FeatureTable[Normalized % Properties("TPM")],
),
Str
% Choices("fpkm"): (
FeatureTable[Normalized % Properties("FPKM")],
),
Str
% Choices("tmm"): (
FeatureTable[Normalized % Properties("TMM")],
),
Str
% Choices("uq"): (
FeatureTable[Normalized % Properties("UQ")],
),
Str
% Choices("cuf"): (
FeatureTable[Normalized % Properties("CUF")],
),
Str
% Choices("ctf"): (
FeatureTable[Normalized % Properties("CTF")],
),
Str
% Choices("cpm"): (
FeatureTable[Normalized % Properties("CPM")],
),
}
)

plugin.methods.register_function(
function=q2_feature_table.normalize,
inputs={
"table": FeatureTable[Frequency],
"gene_length": FeatureData[
SequenceCharacteristics % Properties("length")],
},
parameters={
"method": P_method,
"m_trim": Float % Range(
0, 1, inclusive_start=True, inclusive_end=True
),
"a_trim": Float % Range(
0, 1, inclusive_start=True, inclusive_end=True
),
},
outputs=[("normalized_table", T_normalized_table)],
input_descriptions={
"table": "Feature table with gene counts.",
"gene_length": "Gene lengths of all genes in the feature table.",
},
parameter_descriptions={
"method": "Specify the normalization method to be used. Use FPKM or "
"TPM for within comparisons and TMM, UQ, CUF or CTF for between "
"sample camparisons. Check https://www.genialis.com/wp-content/uploads"
"/2023/12/2023-Normalizing-RNA-seq-data-in-Python-with-RNAnorm.pdf "
"for more information on the methods.",
"m_trim": "Two sided cutoff for M-values. Can only be used for "
"methods TMM and CTF. (default = 0.3)",
"a_trim": "Two sided cutoff for A-values. Can only be used for "
"methods TMM and CTF. (default = 0.05)",
},
output_descriptions={
"normalized_table": "Feature table normalized with specified method."
},
name="Normalize FeatureTable",
description="Normalize FeatureTable by gene length, library size and "
"composition with common methods for RNA-seq.",
citations=[citations["Zmrzlikar_RNAnorm_RNA-seq_data_2023"]],
)
33 changes: 20 additions & 13 deletions q2_feature_table/tests/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import shutil
from unittest import TestCase, main
from unittest.mock import MagicMock, patch

Expand All @@ -18,8 +17,8 @@
from q2_types.feature_data import SequenceCharacteristicsDirectoryFormat

from q2_feature_table import rarefy
from q2_feature_table._normalize import _validate_parameters, _convert_lengths, \
normalize
from q2_feature_table._normalize import (_validate_parameters, \
_convert_lengths, normalize)


class RarefyTests(TestCase):
Expand Down Expand Up @@ -89,16 +88,19 @@ def test_validate_parameters_uq_with_m_a_trim(self):

def test_validate_parameters_tpm_missing_gene_length(self):
# Test Error raised if gene-length is missing with TPM method
with self.assertRaisesRegex(ValueError, "gene-length input is missing."):
with self.assertRaisesRegex(
ValueError, "gene-length input is missing."):
_validate_parameters("tpm", None, None, None)

def test_validate_parameters_tmm_gene_length(self):
# Test Error raised if gene-length is given with TMM method
with self.assertRaisesRegex(
ValueError,
"gene-length input can only be used with FPKM and TPM methods."
"gene-length input can only be used with FPKM and "
"TPM methods."
):
_validate_parameters("tmm", None, None, gene_length=MagicMock())
_validate_parameters(
"tmm", None, None, gene_length=MagicMock())

def test_validate_parameters_default_m_a_trim(self):
# Test if m_trim and a_trim get set to default values if None
Expand All @@ -115,7 +117,8 @@ def test_validate_parameters_m_a_trim(self):
def test_convert_lengths_gene_length(self):
# Test _convert_lengths
gene_length = SequenceCharacteristicsDirectoryFormat()
with open(os.path.join(str(gene_length), "sequence_characteristics.tsv"),
with open(os.path.join(
str(gene_length), "sequence_characteristics.tsv"),
'w') as file:
file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0")

Expand All @@ -125,21 +128,24 @@ def test_convert_lengths_gene_length(self):
def test_convert_lengths_short_gene_length(self):
# Test Error raised if gene-length is missing genes
gene_length = SequenceCharacteristicsDirectoryFormat()
with open(os.path.join(str(gene_length), "sequence_characteristics.tsv"), 'w') as file:
with open(os.path.join(
str(gene_length),
"sequence_characteristics.tsv"), 'w') as file:
file.write("id\tlength\nARO1\t1356.0")
with self.assertRaisesRegex(
ValueError,
"There are genes present in the FeatureTable that are not present "
"in the gene-length input. Missing lengths for genes: "
"{'ARO2'}",
"There are genes present in the FeatureTable that are "
"not present in the gene-length input. Missing lengths "
"for genes: {'ARO2'}",
):
_convert_lengths(self.table, gene_length=gene_length)

@patch("q2_feature_table._normalize.TPM")
def test_tpm_fpkm_with_valid_inputs(self, mock_tpm):
# Test valid inputs for TPM method
gene_length = SequenceCharacteristicsDirectoryFormat()
with open(os.path.join(str(gene_length), "sequence_characteristics.tsv"),
with open(os.path.join(
str(gene_length), "sequence_characteristics.tsv"),
'w') as file:
file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0")
normalize(table=self.table, gene_length=gene_length, method="tpm")
Expand All @@ -148,7 +154,8 @@ def test_tpm_fpkm_with_valid_inputs(self, mock_tpm):
def test_tmm_uq_cuf_ctf_with_valid_inputs(self, mock_tmm):
# Test valid inputs for TMM method
gene_length = SequenceCharacteristicsDirectoryFormat()
with open(os.path.join(str(gene_length), "sequence_characteristics.tsv"),
with open(os.path.join(
str(gene_length), "sequence_characteristics.tsv"),
'w') as file:
file.write("id\tlength\nARO1\t1356.0\nARO2\t1173.0")
normalize(table=self.table, method="tmm", a_trim=0.06, m_trim=0.4)
Expand Down

0 comments on commit 9b4458e

Please sign in to comment.