From f670ccda6acd1890cd739e8bda077ddc37d5883e Mon Sep 17 00:00:00 2001 From: ChrisKent Date: Thu, 27 Jun 2024 19:37:24 +0100 Subject: [PATCH] Created more verbose error messages, using custom messages. Added an option to attempt to fix the v2 primernames to v3 --- README.md | 6 +- primal_page/__init__.py | 2 +- primal_page/bedfiles.py | 218 ++++++++++++++++++++---------------- primal_page/dev.py | 6 +- primal_page/errors.py | 45 ++++++++ primal_page/main.py | 82 +++++++++++--- primal_page/schemas.py | 19 ++++ pyproject.toml | 2 +- tests/create/test_create.py | 9 +- tests/test_bedfile.py | 154 +++++++++++++++++++++++-- tests/test_regex.py | 135 ---------------------- 11 files changed, 406 insertions(+), 272 deletions(-) create mode 100644 primal_page/errors.py diff --git a/README.md b/README.md index 9e68f75..4227020 100644 --- a/README.md +++ b/README.md @@ -281,6 +281,7 @@ $ primal-page create [OPTIONS] SCHEMEPATH * `--links-homepage TEXT`: Optional link to homepage * `--link-vendor TEXT`: Optional link to vendors * `--link-misc TEXT`: Optional miscellaneous link +* `--fix / --no-fix`: Attempt to fix the scheme [default: no-fix] * `--help`: Show this message and exit. @@ -559,7 +560,7 @@ $ primal-page modify change-status [OPTIONS] SCHEMEINFO [SCHEMESTATUS]:[withdraw **Arguments**: * `SCHEMEINFO`: The path to info.json [required] -* `[SCHEMESTATUS]:[withdrawn|deprecated|autogenerated|draft|tested|validated]`: The scheme class [default: SchemeStatus.DRAFT] +* `[SCHEMESTATUS]:[withdrawn|deprecated|autogenerated|draft|tested|validated]`: The scheme class [default: draft] **Options**: @@ -677,5 +678,4 @@ $ primal-page remove [OPTIONS] SCHEMEINFO **Options**: -* `--help`: Show this message and exit. - +* `--help`: Show this message and exit. \ No newline at end of file diff --git a/primal_page/__init__.py b/primal_page/__init__.py index e4adfb8..14d9d2f 100644 --- a/primal_page/__init__.py +++ b/primal_page/__init__.py @@ -1 +1 @@ -__version__ = "1.6.0" +__version__ = "1.7.0" diff --git a/primal_page/bedfiles.py b/primal_page/bedfiles.py index d648bef..f34b763 100644 --- a/primal_page/bedfiles.py +++ b/primal_page/bedfiles.py @@ -2,10 +2,36 @@ import re from enum import Enum +from primal_page.errors import InvalidBedFileLine, PrimerNameError, PrimerVersionError + # Primername versions V2_PRIMERNAME = r"^[a-zA-Z0-9\-]+_[0-9]+_(LEFT|RIGHT)_[0-9]+$" V1_PRIMERNAME = r"^[a-zA-Z0-9\-]+_[0-9]+_(LEFT|RIGHT)(_ALT[0-9]*|_alt[0-9]*)*$" +# Bedfile versions +## This doesn't parse the contents just the structure +BEDFILE_LINE = r"^\S+\t\d+\t\d+\t\S+\t\d+\t(\+|\-)\t[a-zA-Z]+$" + + +# bedfile versions +class BedfileVersion(Enum): + """ + V1 bedfiles use a 6 col system + V2 bedfiles use a 7 col system and V1 primernames + V3 bedfiles use a 7 col system and V2 primernames + """ + + V1 = "v1.0" + V2 = "v2.0" + V3 = "v3.0" + INVALID = "invalid" # Not applicable + + +class BEDFileResult(Enum): + VALID = 0 + INVALID_VERSION = 1 + INVALID_STRUCTURE = 2 + class PrimerNameVersion(Enum): V1 = "v1" @@ -13,18 +39,61 @@ class PrimerNameVersion(Enum): INVALID = "invalid" # Not applicable +class BedLine: + def __init__( + self, + chrom: str, + start: int, + end: int, + primername: str, + pool: int, + strand: str, + seq: str, + ): + self.chrom = chrom + self.start = start + self.end = end + self.primername = primername + self.pool = pool + self.strand = strand + self.seq = seq + + # Validate the primername + if determine_primername_version(primername) == PrimerNameVersion.INVALID: + raise PrimerNameError(f"{primername}") + + # Autogenerated fields + self.prefix = primername.split("_")[0] + self.amplicon_number = int(primername.split("_")[1]) + self.pn_direction = primername.split("_")[2] + self.primernumber: int | None = None + + def __str__(self): + return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername}\t{self.pool}\t{self.strand}\t{self.seq}" + + def parsed_primername(self, primernumber: int) -> str: + return "_".join( + [ + self.prefix, + str(self.amplicon_number), + self.pn_direction, + str(primernumber), + ] + ) + + def parsed_bedline(self) -> str: + return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername if self.primernumber is None else self.parsed_primername(self.primernumber)}\t{self.pool}\t{self.strand}\t{self.seq}" + + def determine_primername_version(primername: str) -> PrimerNameVersion: """ Determines the version of the primer name. - Args: - primername (str): The primer name to check. - - Returns: - PrimerNameVersion: The version of the primer name. - - Raises: - None. + :param primername: The primer name to check. + :type primername: str + :return: The version of the primer name. + :rtype: PrimerNameVersion + :raises: None """ if re.search(V2_PRIMERNAME, primername): return PrimerNameVersion.V2 @@ -34,11 +103,17 @@ def determine_primername_version(primername: str) -> PrimerNameVersion: return PrimerNameVersion.INVALID -def convert_v1_primernames_to_v2(primername: str) -> str: +def convert_v1_primernames_to_v2(primername: str, primernumber=1) -> str: """ - Convert a v1 primername to a v2 primername. Cannot handle alt primers - :param primername: The v1 primername - :return: The v2 primername + Convert a v1 primername to a v2 primername. Cannot convert alt primers. + + :param primername: The v1 primername to convert. + :type primername: str + :param primernumber: The primernumber to add to the primername. + :type primernumber: int + :return: The v2 primername. + :rtype: str + :raises: ValueError """ # Check if this is a v1 primername if determine_primername_version(primername) != PrimerNameVersion.V1: @@ -47,34 +122,21 @@ def convert_v1_primernames_to_v2(primername: str) -> str: # Split the primername data = primername.split("_") # Remove the alt - if data[-1] == "alt" or data[-1] == "ALT": + if "alt" in data[-1].lower(): raise ValueError(f"{primername} is a v1 alt primername, cannot convert") - data.append("0") - # Join back together + # Add primernumber and return + data.append(str(primernumber)) return "_".join(data) -# Bedfile versions -## This doesn't parse the contents just the structure -BEDFILE_LINE = r"^\S+\t\d+\t\d+\t\S+\t\d+\t(\+|\-)\t[a-zA-Z]+$" - - -class BEDFileResult(Enum): - VALID = 0 - INVALID_VERSION = 1 - INVALID_STRUCTURE = 2 - - def validate_bedfile_line_structure(line: str) -> bool: """ - This function validates the structure of a bedfile line, but not the contents. - - Args: - line (str): The line to be validated. - - Returns: - bool: True if the line structure is valid, False otherwise. + This function validates the structure of a bedfile line, but not the contents. Empty lines will error. + :param line: The line to validate. + :type line: str + :return: Whether the line is valid or not. + :rtype: bool """ line = line.strip() if line.startswith("#"): @@ -103,8 +165,13 @@ def validate_bedfile(bedfile: pathlib.Path) -> BEDFileResult: # Check each line for line in bedlines: + # Skip empty lines + if not line: + continue if not validate_bedfile_line_structure(line): - return BEDFileResult.INVALID_STRUCTURE + raise InvalidBedFileLine( + f"Invalid line in bedfile:\n{line}\nShould contain 7 columns separated by tabs." + ) # Check the bedfile names. match determine_bedfile_version(bedfile): @@ -114,25 +181,14 @@ def validate_bedfile(bedfile: pathlib.Path) -> BEDFileResult: return BEDFileResult.VALID -# bedfile versions -class BedfileVersion(Enum): - """ - V1 bedfiles use a 6 col system - V2 bedfiles use a 7 col system and V1 primernames - V3 bedfiles use a 7 col system and V2 primernames - """ - - V1 = "v1.0" - V2 = "v2.0" - V3 = "v3.0" - INVALID = "invalid" # Not applicable - - def determine_bedfile_version(input: list[list] | pathlib.Path) -> BedfileVersion: """ Determine the bedfile version :param input: Either the bedfile lines as a list or the bedfile path :return: The bedfile version + :raises: + - PrimerNameError: If there are invalid primer names + - PrimerVersionError: If there is a mix of primernames """ if isinstance(input, pathlib.Path): bedlines, _ = read_bed_file(input) @@ -146,60 +202,30 @@ def determine_bedfile_version(input: list[list] | pathlib.Path) -> BedfileVersio # If 7 cols then v2 or v3 # Check from primername primernames = [x[3] for x in bedlines] - primer_name_versions = {determine_primername_version(x) for x in primernames} + primer_name_versions_dict = { + x: determine_primername_version(x) for x in primernames + } + primer_name_versions = set(primer_name_versions_dict.values()) + if primer_name_versions == {PrimerNameVersion.V1}: return BedfileVersion.V2 elif primer_name_versions == {PrimerNameVersion.V2}: return BedfileVersion.V3 - # Invalid if we get here - # Mix of v1, v2 or invalid - return BedfileVersion.INVALID - - -class BedLine: - def __init__( - self, - chrom: str, - start: int, - end: int, - primername: str, - pool: int, - strand: str, - seq: str, - ): - self.chrom = chrom - self.start = start - self.end = end - self.primername = primername - self.pool = pool - self.strand = strand - self.seq = seq - # Validate the primername - if determine_primername_version(primername) == PrimerNameVersion.INVALID: - raise ValueError(f"Invalid primername: {primername}") + if PrimerNameVersion.INVALID in primer_name_versions: + # Raise an error if there are invalid primer names + invalid_primer_names = { + pn + for pn, pv in primer_name_versions_dict.items() + if pv == PrimerNameVersion.INVALID + } + raise PrimerNameError(f"{invalid_primer_names}") - # Autogenerated fields - self.prefix = primername.split("_")[0] - self.amplicon_number = int(primername.split("_")[1]) - self.pn_direction = primername.split("_")[2] - self.primernumber: int | None = None - - def __str__(self): - return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername}\t{self.pool}\t{self.strand}\t{self.seq}" - - def parsed_primername(self, primernumber: int) -> str: - return "_".join( - [ - self.prefix, - str(self.amplicon_number), - self.pn_direction, - str(primernumber), - ] - ) + if primer_name_versions == {PrimerNameVersion.V1, PrimerNameVersion.V2}: + raise PrimerVersionError("Mix of v1 and v2 primer names") - def parsed_bedline(self) -> str: - return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername if self.primernumber is None else self.parsed_primername(self.primernumber)}\t{self.pool}\t{self.strand}\t{self.seq}" + # This should not be reached + return BedfileVersion.INVALID def read_bedlines(bedfilepath: pathlib.Path) -> tuple[list[BedLine], list[str]]: @@ -307,4 +333,4 @@ def regenerate_v3_bedfile(bedfile: pathlib.Path) -> str: # Add the line to the bedfile bedfile_str_list.append(line.parsed_bedline()) - return "\n".join(bedfile_str_list) + return "\n".join(bedfile_str_list) + "\n" diff --git a/primal_page/dev.py b/primal_page/dev.py index 5edbb03..e1f1fb8 100644 --- a/primal_page/dev.py +++ b/primal_page/dev.py @@ -1,9 +1,9 @@ -from Bio import SeqIO import hashlib import json import pathlib import typer +from Bio import SeqIO from typing_extensions import Annotated from primal_page.bedfiles import ( @@ -53,7 +53,9 @@ def regenerate( # Hash the reference.fasta file # If the hash is different, rewrite the file ref_hash = hashfile(scheme_path / "reference.fasta") - ref_str = "".join((x.format("fasta") for x in SeqIO.parse(scheme_path / "reference.fasta", "fasta"))) + ref_str = "".join( + x.format("fasta") for x in SeqIO.parse(scheme_path / "reference.fasta", "fasta") + ) if ref_hash != hashlib.md5(ref_str.encode()).hexdigest(): with open(scheme_path / "reference.fasta", "w") as ref_file: ref_file.write(ref_str) diff --git a/primal_page/errors.py b/primal_page/errors.py new file mode 100644 index 0000000..42d60f2 --- /dev/null +++ b/primal_page/errors.py @@ -0,0 +1,45 @@ +from click import UsageError + + +class PrimerNameError(UsageError): + """Raised when a primername is invalid""" + + def __init__(self, primername: str): + super().__init__( + f"Invalid primernames: {primername}. Please use format (name)_(amplicon-number)_(LEFT|RIGHT) with optional _(primer-number)" + ) + + +class PrimerVersionError(UsageError): + """Raised when a primername is unexpected""" + + def __init__(self, message: str): + super().__init__(message) + + +class InvalidBedFileLine(UsageError): + """Raised when a bedline is invalid""" + + def __init__(self, message: str): + super().__init__(message) + + +class SchemeExists(UsageError): + """Raised when a Scheme already exists""" + + def __init__(self, message: str): + super().__init__(message) + + +class FileNotFound(UsageError): + """Raised when a file is not found""" + + def __init__(self, message: str): + super().__init__(message) + + +class InvalidReference(UsageError): + """Raised when a file is not found""" + + def __init__(self, message: str): + super().__init__(message) diff --git a/primal_page/main.py b/primal_page/main.py index 715ac29..6748bc5 100644 --- a/primal_page/main.py +++ b/primal_page/main.py @@ -1,5 +1,3 @@ -from Bio import SeqIO - import json import pathlib import shutil @@ -7,6 +5,7 @@ from typing import Optional import typer +from Bio import SeqIO from typing_extensions import Annotated from primal_page.__init__ import __version__ @@ -14,14 +13,23 @@ BEDFileResult, BedfileVersion, determine_bedfile_version, + regenerate_v3_bedfile, validate_bedfile, ) from primal_page.build_index import create_index from primal_page.dev import app as dev_app from primal_page.download import app as download_app +from primal_page.errors import FileNotFound, InvalidReference, SchemeExists from primal_page.modify import app as modify_app from primal_page.modify import hashfile, regenerate_readme, trim_file_whitespace -from primal_page.schemas import Collection, Info, Links, PrimerClass, SchemeStatus +from primal_page.schemas import ( + Collection, + Info, + IUPACAmbiguousDNA, + Links, + PrimerClass, + SchemeStatus, +) class FindResult(Enum): @@ -30,7 +38,7 @@ class FindResult(Enum): # Create the typer app -app = typer.Typer(no_args_is_help=True) +app = typer.Typer(no_args_is_help=True, pretty_exceptions_show_locals=False) app.add_typer( modify_app, name="modify", @@ -59,6 +67,29 @@ def primal_page( pass +def validate_ref_file(ref_file: pathlib.Path): + """ + Validate the reference.fasta file + :param ref_file: The path to the reference.fasta file + :raises InvalidReference: If the reference.fasta file is invalid + """ + # Very simple fasta validation + try: + records = SeqIO.index(ref_file, "fasta") + except Exception as e: + raise InvalidReference(f"Could not validate {ref_file}: {e}") from e + + for record in records.values(): + seq_bases = set(record.seq) + # Check DNA sequence + if not seq_bases.issubset(IUPACAmbiguousDNA): + raise InvalidReference( + f"Invalid DNA bases ({', '.join(seq_bases.difference(IUPACAmbiguousDNA))}) found in {ref_file}: {record.id}" + ) + if len(record.seq) == 0: + raise InvalidReference(f"Empty sequence found in {ref_file}") + + def find_ref( cli_reference: pathlib.Path | None, found_files: list[pathlib.Path], @@ -70,7 +101,7 @@ def find_ref( :param found_files: A list of all files found in the scheme directory :param schemepath: The path to the scheme directory :return: The path to the reference.fasta file - :raises FileNotFoundError: If the reference.fasta file cannot be found + :raises FileNotFound: If the reference.fasta file cannot be found """ # Search for reference.fasta if cli_reference is None: # No reference specified @@ -83,14 +114,14 @@ def find_ref( if len(reference_list) == 1: return reference_list[0] else: - raise FileNotFoundError( + raise FileNotFound( f"Could not find a SINGLE reference.fasta file in {schemepath} or its subdirectories, found {len(reference_list)}. Please specify manually with --reference" ) elif cli_reference.exists(): - # TODO validate the reference.fasta file + validate_ref_file(cli_reference) return cli_reference else: - raise FileNotFoundError(f"Could not find file at {cli_reference}") + raise FileNotFound(f"Could not find file at {cli_reference}") def find_primerbed( @@ -104,7 +135,7 @@ def find_primerbed( :param found_files: A list of all files found in the scheme directory :param schemepath: The path to the scheme directory :return: The path to the primer.bed file - :raises FileNotFoundError: If the primer.bed file cannot be found + :raises FileNotFound: If the primer.bed file cannot be found """ # Search for primer.bed if cli_primerbed is None: # No primer.bed specified @@ -115,13 +146,13 @@ def find_primerbed( if len(primer_bed_list) == 1: return primer_bed_list[0] else: - raise FileNotFoundError( + raise FileNotFound( f"Could not find a SINGLE *.primer.bed file in {schemepath} or its subdirectories, found {len(primer_bed_list)}. Please specify manually with --primerbed" ) elif cli_primerbed.exists(): return cli_primerbed else: - raise FileNotFoundError(f"Could not find file at {cli_primerbed}") + raise FileNotFound(f"Could not find file at {cli_primerbed}") def find_config( @@ -243,6 +274,7 @@ def create( link_misc: Annotated[ list[str], typer.Option(help="Optional miscellaneous link") ] = [], + fix: Annotated[bool, typer.Option(help="Attempt to fix the scheme")] = False, ): """Create a new scheme in the required format""" @@ -250,7 +282,9 @@ def create( found_files = [x for x in schemepath.rglob("*")] # Check for a single primer.bed file - valid_primer_bed = find_primerbed(primerbed, found_files, schemepath) + valid_primer_bed = find_primerbed( + cli_primerbed=primerbed, found_files=found_files, schemepath=schemepath + ) match validate_bedfile(valid_primer_bed): case BEDFileResult.VALID: @@ -267,9 +301,17 @@ def create( primerbed_version: BedfileVersion = determine_bedfile_version(valid_primer_bed) if primerbed_version != BedfileVersion.V3: - raise typer.BadParameter( - f"Primerbed version {primerbed_version.value} is not supported. Please use a v3.0 bedfile" - ) + if fix: + try: + bedfile_str = regenerate_v3_bedfile(valid_primer_bed) + except Exception as e: + raise typer.BadParameter( + f"Could not fix the primerbed file: {e}" + ) from e + else: + raise typer.BadParameter( + f"Primerbed version {primerbed_version.value} is not supported. Please update to v3.0 bedfile (See FAQ), or try with --fix to attempt to parse." + ) # Find the reference.fasta file valid_ref = find_ref(reference, found_files, schemepath) @@ -298,7 +340,7 @@ def create( elif status == FindResult.NOT_FOUND: if algorithmversion is None: - raise FileNotFoundError( + raise FileNotFound( f"Could not find a config.json file in {schemepath}. Please specify manually with --configpath or specify algorithmversion with --algorithmversion" ) @@ -373,14 +415,18 @@ def create( # Check if the repo already exists repo_dir = output / schemename / str(ampliconsize) / schemeversion if repo_dir.exists(): - raise FileExistsError(f"{repo_dir} already exists") + raise SchemeExists(f"{repo_dir} already exists") repo_dir.mkdir(parents=True) # If this fails it will deleted the half completed scheme # Need to check the repo doesnt already exist try: # Copy files and trim whitespace - trim_file_whitespace(valid_primer_bed, repo_dir / "primer.bed") + if fix: + with open(repo_dir / "primer.bed", "w") as bedfile: + bedfile.write(bedfile_str) + else: + trim_file_whitespace(valid_primer_bed, repo_dir / "primer.bed") # parse the reference.fasta file with open(repo_dir / "reference.fasta", "w") as ref_file: records = SeqIO.parse(valid_ref, "fasta") diff --git a/primal_page/schemas.py b/primal_page/schemas.py index 819e8fe..5ba9785 100644 --- a/primal_page/schemas.py +++ b/primal_page/schemas.py @@ -49,6 +49,25 @@ def not_empty(x: list | set | str) -> list | set | str: return x +IUPACAmbiguousDNA = { + "A", + "G", + "K", + "Y", + "B", + "S", + "N", + "H", + "C", + "W", + "D", + "R", + "M", + "T", + "V", +} + + class Collection(Enum): # Authors ARTIC = "ARTIC" diff --git a/pyproject.toml b/pyproject.toml index 9996d0e..8e73520 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "primal-page" -version = "1.6.0" +version = "1.7.0" description = "Tooling and schemas for the primerschemes index" authors = ["ChrisKent "] readme = "README.md" diff --git a/tests/create/test_create.py b/tests/create/test_create.py index c1e923d..8abcd4a 100644 --- a/tests/create/test_create.py +++ b/tests/create/test_create.py @@ -3,6 +3,7 @@ import shutil import unittest +from primal_page.errors import FileNotFound from primal_page.main import FindResult, create, find_config, find_primerbed, find_ref from primal_page.schemas import SchemeStatus @@ -131,7 +132,7 @@ def test_find_ref(self): ) # Test fail when given a file with two refs - with self.assertRaises(FileNotFoundError): + with self.assertRaises(FileNotFound): new_schemepath = pathlib.Path( "tests/test_input" ) # This dir contains two schemes dirs @@ -139,7 +140,7 @@ def test_find_ref(self): find_ref(None, new_found_files, new_schemepath) # Test fail when given a file that doesn't exist - with self.assertRaises(FileNotFoundError): + with self.assertRaises(FileNotFound): find_ref( cli_reference=pathlib.Path( "tests/test_input/test_covid/missingref.fasta" @@ -173,13 +174,13 @@ def test_find_primerbed(self): ) # Test fail when given a file with two refs - with self.assertRaises(FileNotFoundError): + with self.assertRaises(FileNotFound): new_schemepath = pathlib.Path("tests/test_input") new_found_files = [x for x in new_schemepath.rglob("*")] find_primerbed(None, new_found_files, new_schemepath) # Test fail when given a file that doesn't exist - with self.assertRaises(FileNotFoundError): + with self.assertRaises(FileNotFound): find_primerbed( cli_primerbed=pathlib.Path("tests/test_input/test_covid/missing.bed"), found_files=self.found_files, diff --git a/tests/test_bedfile.py b/tests/test_bedfile.py index 192b890..70650db 100644 --- a/tests/test_bedfile.py +++ b/tests/test_bedfile.py @@ -1,7 +1,17 @@ import pathlib import unittest -from primal_page.bedfiles import BEDFileResult, validate_bedfile +from primal_page.bedfiles import ( + BEDFileResult, + BedfileVersion, + PrimerNameVersion, + convert_v1_primernames_to_v2, + determine_bedfile_version, + determine_primername_version, + validate_bedfile, + validate_bedfile_line_structure, +) +from primal_page.errors import InvalidBedFileLine, PrimerNameError, PrimerVersionError class TestBedfile(unittest.TestCase): @@ -13,22 +23,142 @@ class TestBedfile(unittest.TestCase): def test_validate_bedfile(self): # Test v1 - self.assertEqual( - validate_bedfile(self.v1bedfile), BEDFileResult.INVALID_STRUCTURE - ) + with self.assertRaises(InvalidBedFileLine): + validate_bedfile(self.v1bedfile) # Test v2 self.assertEqual(validate_bedfile(self.v2bedfile), BEDFileResult.VALID) # Test v3 self.assertEqual(validate_bedfile(self.v3bedfile), BEDFileResult.VALID) - # Test invalid - self.assertEqual( - validate_bedfile(self.invalidbedfile), BEDFileResult.INVALID_VERSION - ) + # Test invalid raises + with self.assertRaises(PrimerVersionError): + validate_bedfile(self.invalidbedfile) + # Test invalid structure - self.assertEqual( - validate_bedfile(self.invalidstructbedfile), - BEDFileResult.INVALID_STRUCTURE, - ) + with self.assertRaises(InvalidBedFileLine): + validate_bedfile(self.invalidstructbedfile) + + +class TestDeterminePrimernameVersion(unittest.TestCase): + def test_determine_primername_version(self): + test_cases = { + # VALID V2 Names + "artic-nCoV_1_LEFT_0": PrimerNameVersion.V2, + "artic-nCoV_100_LEFT_99": PrimerNameVersion.V2, + "marv-2023_1_LEFT_1": PrimerNameVersion.V2, + "78h13h_0_RIGHT_0": PrimerNameVersion.V2, + "artic-nCoV_100_RIGHT_99": PrimerNameVersion.V2, + "artic-nCoV_1_LEFT_1": PrimerNameVersion.V2, + # Valid V1 Names + "artic-nCoV_1_LEFT": PrimerNameVersion.V1, + "artic-nCoV_1_LEFT_alt": PrimerNameVersion.V1, + "artic-nCoV_100_LEFT_ALT": PrimerNameVersion.V1, + "marv-2023_100_RIGHT_ALT": PrimerNameVersion.V1, + "yby17_1_LEFT": PrimerNameVersion.V1, + "yby17_1_LEFT_alt": PrimerNameVersion.V1, + "yby17_1_LEFT_ALT": PrimerNameVersion.V1, + # Invalid Names + "easyfail": PrimerNameVersion.INVALID, + "marv-2023_1_RIGHT_2_alt": PrimerNameVersion.INVALID, + "artic*nCoV_100_LEFT_99": PrimerNameVersion.INVALID, + "": PrimerNameVersion.INVALID, + } + + for primername, result in test_cases.items(): + self.assertEqual(determine_primername_version(primername), result) + + +class TestDeterminBedfileVersion(unittest.TestCase): + v1line = ["test", "0", "10", "test_5_LEFT", "0", "+"] + v2line = ["test", "0", "10", "test_5_LEFT", "0", "+", "ATCG"] + v3line = ["test", "0", "10", "test_5_LEFT_1", "0", "+", "ATCG"] + invalidbedline = ["test", "0", "10", "test-5-LEFT", "0", "+", "ATCG"] + + def test_determine_bedfile_version(self): + # Test v1 + self.assertEqual(determine_bedfile_version([self.v1line]), BedfileVersion.V1) + # Test v2 + self.assertEqual(determine_bedfile_version([self.v2line]), BedfileVersion.V2) + # Test v3 + self.assertEqual(determine_bedfile_version([self.v3line]), BedfileVersion.V3) + # Test invalid primername raises + with self.assertRaises(PrimerNameError): + determine_bedfile_version([self.invalidbedline]) + # Test mixed raises + with self.assertRaises(PrimerVersionError): + determine_bedfile_version([self.v2line, self.v3line]) + + +class TestConvertV1PrimernamesToV2(unittest.TestCase): + def test_convert_v1_primernames_to_v2_valid(self): + valid_test_cases = { + # Valid V1 Names + "artic-nCoV_1_LEFT": "artic-nCoV_1_LEFT_0", + "artic-nCoV_100_LEFT": "artic-nCoV_100_LEFT_0", + "marv-2023_100_RIGHT": "marv-2023_100_RIGHT_0", + "yby17_1_LEFT": "yby17_1_LEFT_0", + } + for primername, result in valid_test_cases.items(): + self.assertEqual(convert_v1_primernames_to_v2(primername, 0), result) + + def test_convert_v1_primernames_to_v2_invalid(self): + invalid_test_cases: set = { + # Valid V1 Names + "artic-nCoV_1_LEFT_alt", + "artic-nCoV_100_LEFT_ALT", + "marv-2023_100_RIGHT_ALT", + "yby17_1_LEFT_alt", + "yby17_1_LEFT_ALT", + "easyfail", + "marv-2023_1_RIGHT_2_alt", + "artic*nCoV_100_LEFT_99", + "", + } + for primername in invalid_test_cases: + with self.assertRaises(ValueError): + convert_v1_primernames_to_v2(primername) + + +class TestValidateBedfileLineStructure(unittest.TestCase): + v1bedfile = pathlib.Path("tests/test_input/v1.primer.bed") + v2bedfile = pathlib.Path("tests/test_input/v2.primer.bed") + v3bedfile = pathlib.Path("tests/test_input/v3.primer.bed") + invalidbedfile = pathlib.Path("tests/test_input/invalid.struct.primer.bed") + + def test_bed_file_structure_v3(self): + """ + Test that the bed file structure is correct + """ + with open(self.v3bedfile) as bedfile: + for line in bedfile.readlines(): + self.assertTrue(validate_bedfile_line_structure(line)) + + def test_bed_file_structure_v2(self): + """ + Test that the bed file structure is correct + """ + with open(self.v2bedfile) as bedfile: + for line in bedfile.readlines(): + self.assertTrue(validate_bedfile_line_structure(line)) + + def test_bed_file_structure_v1(self): + """ + V1 Bedfiles are not supported in this index + """ + with open(self.v1bedfile) as bedfile: + results = [ + validate_bedfile_line_structure(line) for line in bedfile.readlines() + ] + self.assertFalse(all(results)) + + def test_bed_file_structure_invalid(self): + """ + Test that the bed file structure is correct + """ + with open(self.invalidbedfile) as bedfile: + results = [ + validate_bedfile_line_structure(line) for line in bedfile.readlines() + ] + self.assertFalse(all(results)) if __name__ == "__main__": diff --git a/tests/test_regex.py b/tests/test_regex.py index eb2bfb9..e0045d5 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -1,18 +1,11 @@ -import pathlib import re import unittest from primal_page.bedfiles import ( V1_PRIMERNAME, V2_PRIMERNAME, - PrimerNameVersion, - convert_v1_primernames_to_v2, - determine_bedfile_version, - determine_primername_version, - validate_bedfile_line_structure, ) from primal_page.schemas import ( - BedfileVersion, not_empty, validate_schemename, validate_schemeversion, @@ -156,133 +149,5 @@ def test_not_empty_empty(self): not_empty(test_case) -class TestDetermine_primername_version(unittest.TestCase): - def test_determine_primername_version(self): - test_cases = { - # VALID V2 Names - "artic-nCoV_1_LEFT_0": PrimerNameVersion.V2, - "artic-nCoV_100_LEFT_99": PrimerNameVersion.V2, - "marv-2023_1_LEFT_1": PrimerNameVersion.V2, - "78h13h_0_RIGHT_0": PrimerNameVersion.V2, - "artic-nCoV_100_RIGHT_99": PrimerNameVersion.V2, - "artic-nCoV_1_LEFT_1": PrimerNameVersion.V2, - # Valid V1 Names - "artic-nCoV_1_LEFT": PrimerNameVersion.V1, - "artic-nCoV_1_LEFT_alt": PrimerNameVersion.V1, - "artic-nCoV_100_LEFT_ALT": PrimerNameVersion.V1, - "marv-2023_100_RIGHT_ALT": PrimerNameVersion.V1, - "yby17_1_LEFT": PrimerNameVersion.V1, - "yby17_1_LEFT_alt": PrimerNameVersion.V1, - "yby17_1_LEFT_ALT": PrimerNameVersion.V1, - # Invalid Names - "easyfail": PrimerNameVersion.INVALID, - "marv-2023_1_RIGHT_2_alt": PrimerNameVersion.INVALID, - "artic*nCoV_100_LEFT_99": PrimerNameVersion.INVALID, - "": PrimerNameVersion.INVALID, - } - - for primername, result in test_cases.items(): - self.assertEqual(determine_primername_version(primername), result) - - def test_convert_v1_primernames_to_v2_valid(self): - valid_test_cases = { - # Valid V1 Names - "artic-nCoV_1_LEFT": "artic-nCoV_1_LEFT_0", - "artic-nCoV_100_LEFT": "artic-nCoV_100_LEFT_0", - "marv-2023_100_RIGHT": "marv-2023_100_RIGHT_0", - "yby17_1_LEFT": "yby17_1_LEFT_0", - } - for primername, result in valid_test_cases.items(): - self.assertEqual(convert_v1_primernames_to_v2(primername), result) - - def test_convert_v1_primernames_to_v2_invalid(self): - invalid_test_cases: set = { - # Valid V1 Names - "artic-nCoV_1_LEFT_alt", - "artic-nCoV_100_LEFT_ALT", - "marv-2023_100_RIGHT_ALT", - "yby17_1_LEFT_alt", - "yby17_1_LEFT_ALT", - "easyfail", - "marv-2023_1_RIGHT_2_alt", - "artic*nCoV_100_LEFT_99", - "", - } - for primername in invalid_test_cases: - with self.assertRaises(ValueError): - convert_v1_primernames_to_v2(primername) - - -class TestDeterminePrimerBedVersion(unittest.TestCase): - v1bedfile = pathlib.Path("tests/test_input/v1.primer.bed") - v2bedfile = pathlib.Path("tests/test_input/v2.primer.bed") - v3bedfile = pathlib.Path("tests/test_input/v3.primer.bed") - invalidbedfile = pathlib.Path("tests/test_input/invalid.primer.bed") - - def test_parse_v1_bedfile(self): - """ - See if the correct bedfile version is returned - """ - # Test v1 - self.assertEqual(determine_bedfile_version(self.v1bedfile), BedfileVersion.V1) - - def test_parse_v2_bedfile(self): - # Test v2 - self.assertEqual(determine_bedfile_version(self.v2bedfile), BedfileVersion.V2) - - def test_parse_v3_bedfile(self): - # Test v3 - self.assertEqual(determine_bedfile_version(self.v3bedfile), BedfileVersion.V3) - - def test_parse_invalid_bedfile(self): - # Test invalid - self.assertEqual( - determine_bedfile_version(self.invalidbedfile), BedfileVersion.INVALID - ) - - -class TestValidateBedfileLineStructure(unittest.TestCase): - v1bedfile = pathlib.Path("tests/test_input/v1.primer.bed") - v2bedfile = pathlib.Path("tests/test_input/v2.primer.bed") - v3bedfile = pathlib.Path("tests/test_input/v3.primer.bed") - invalidbedfile = pathlib.Path("tests/test_input/invalid.struct.primer.bed") - - def test_bed_file_structure_v3(self): - """ - Test that the bed file structure is correct - """ - with open(self.v3bedfile) as bedfile: - for line in bedfile.readlines(): - self.assertTrue(validate_bedfile_line_structure(line)) - - def test_bed_file_structure_v2(self): - """ - Test that the bed file structure is correct - """ - with open(self.v2bedfile) as bedfile: - for line in bedfile.readlines(): - self.assertTrue(validate_bedfile_line_structure(line)) - - def test_bed_file_structure_v1(self): - """ - V1 Bedfiles are not supported in this index - """ - with open(self.v1bedfile) as bedfile: - results = [ - validate_bedfile_line_structure(line) for line in bedfile.readlines() - ] - self.assertFalse(all(results)) - - def test_bed_file_structure_invalid(self): - """ - Test that the bed file structure is correct - """ - with open(self.invalidbedfile) as bedfile: - results = [ - validate_bedfile_line_structure(line) for line in bedfile.readlines() - ] - self.assertFalse(all(results)) - - if __name__ == "__main__": unittest.main()