Skip to content

Commit

Permalink
Created more verbose error messages, using custom messages. Added an …
Browse files Browse the repository at this point in the history
…option to attempt to fix the v2 primernames to v3
  • Loading branch information
ChrisgKent committed Jun 27, 2024
1 parent a61b36e commit f670ccd
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 272 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ $ primal-page create [OPTIONS] SCHEMEPATH
* `--links-homepage TEXT`: Optional link to homepage
* `--link-vendor TEXT`: Optional link to vendors
* `--link-misc TEXT`: Optional miscellaneous link
* `--fix / --no-fix`: Attempt to fix the scheme [default: no-fix]
* `--help`: Show this message and exit.


Expand Down Expand Up @@ -559,7 +560,7 @@ $ primal-page modify change-status [OPTIONS] SCHEMEINFO [SCHEMESTATUS]:[withdraw
**Arguments**:

* `SCHEMEINFO`: The path to info.json [required]
* `[SCHEMESTATUS]:[withdrawn|deprecated|autogenerated|draft|tested|validated]`: The scheme class [default: SchemeStatus.DRAFT]
* `[SCHEMESTATUS]:[withdrawn|deprecated|autogenerated|draft|tested|validated]`: The scheme class [default: draft]

**Options**:

Expand Down Expand Up @@ -677,5 +678,4 @@ $ primal-page remove [OPTIONS] SCHEMEINFO

**Options**:

* `--help`: Show this message and exit.

* `--help`: Show this message and exit.
2 changes: 1 addition & 1 deletion primal_page/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.6.0"
__version__ = "1.7.0"
218 changes: 122 additions & 96 deletions primal_page/bedfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,98 @@
import re
from enum import Enum

from primal_page.errors import InvalidBedFileLine, PrimerNameError, PrimerVersionError

# Primername versions
V2_PRIMERNAME = r"^[a-zA-Z0-9\-]+_[0-9]+_(LEFT|RIGHT)_[0-9]+$"
V1_PRIMERNAME = r"^[a-zA-Z0-9\-]+_[0-9]+_(LEFT|RIGHT)(_ALT[0-9]*|_alt[0-9]*)*$"

# Bedfile versions
## This doesn't parse the contents just the structure
BEDFILE_LINE = r"^\S+\t\d+\t\d+\t\S+\t\d+\t(\+|\-)\t[a-zA-Z]+$"


# bedfile versions
class BedfileVersion(Enum):
"""
V1 bedfiles use a 6 col system
V2 bedfiles use a 7 col system and V1 primernames
V3 bedfiles use a 7 col system and V2 primernames
"""

V1 = "v1.0"
V2 = "v2.0"
V3 = "v3.0"
INVALID = "invalid" # Not applicable


class BEDFileResult(Enum):
VALID = 0
INVALID_VERSION = 1
INVALID_STRUCTURE = 2


class PrimerNameVersion(Enum):
V1 = "v1"
V2 = "v2"
INVALID = "invalid" # Not applicable


class BedLine:
def __init__(
self,
chrom: str,
start: int,
end: int,
primername: str,
pool: int,
strand: str,
seq: str,
):
self.chrom = chrom
self.start = start
self.end = end
self.primername = primername
self.pool = pool
self.strand = strand
self.seq = seq

# Validate the primername
if determine_primername_version(primername) == PrimerNameVersion.INVALID:
raise PrimerNameError(f"{primername}")

# Autogenerated fields
self.prefix = primername.split("_")[0]
self.amplicon_number = int(primername.split("_")[1])
self.pn_direction = primername.split("_")[2]
self.primernumber: int | None = None

def __str__(self):
return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername}\t{self.pool}\t{self.strand}\t{self.seq}"

def parsed_primername(self, primernumber: int) -> str:
return "_".join(
[
self.prefix,
str(self.amplicon_number),
self.pn_direction,
str(primernumber),
]
)

def parsed_bedline(self) -> str:
return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername if self.primernumber is None else self.parsed_primername(self.primernumber)}\t{self.pool}\t{self.strand}\t{self.seq}"


def determine_primername_version(primername: str) -> PrimerNameVersion:
"""
Determines the version of the primer name.
Args:
primername (str): The primer name to check.
Returns:
PrimerNameVersion: The version of the primer name.
Raises:
None.
:param primername: The primer name to check.
:type primername: str
:return: The version of the primer name.
:rtype: PrimerNameVersion
:raises: None
"""
if re.search(V2_PRIMERNAME, primername):
return PrimerNameVersion.V2
Expand All @@ -34,11 +103,17 @@ def determine_primername_version(primername: str) -> PrimerNameVersion:
return PrimerNameVersion.INVALID


def convert_v1_primernames_to_v2(primername: str) -> str:
def convert_v1_primernames_to_v2(primername: str, primernumber=1) -> str:
"""
Convert a v1 primername to a v2 primername. Cannot handle alt primers
:param primername: The v1 primername
:return: The v2 primername
Convert a v1 primername to a v2 primername. Cannot convert alt primers.
:param primername: The v1 primername to convert.
:type primername: str
:param primernumber: The primernumber to add to the primername.
:type primernumber: int
:return: The v2 primername.
:rtype: str
:raises: ValueError
"""
# Check if this is a v1 primername
if determine_primername_version(primername) != PrimerNameVersion.V1:
Expand All @@ -47,34 +122,21 @@ def convert_v1_primernames_to_v2(primername: str) -> str:
# Split the primername
data = primername.split("_")
# Remove the alt
if data[-1] == "alt" or data[-1] == "ALT":
if "alt" in data[-1].lower():
raise ValueError(f"{primername} is a v1 alt primername, cannot convert")

data.append("0")
# Join back together
# Add primernumber and return
data.append(str(primernumber))
return "_".join(data)


# Bedfile versions
## This doesn't parse the contents just the structure
BEDFILE_LINE = r"^\S+\t\d+\t\d+\t\S+\t\d+\t(\+|\-)\t[a-zA-Z]+$"


class BEDFileResult(Enum):
VALID = 0
INVALID_VERSION = 1
INVALID_STRUCTURE = 2


def validate_bedfile_line_structure(line: str) -> bool:
"""
This function validates the structure of a bedfile line, but not the contents.
Args:
line (str): The line to be validated.
Returns:
bool: True if the line structure is valid, False otherwise.
This function validates the structure of a bedfile line, but not the contents. Empty lines will error.
:param line: The line to validate.
:type line: str
:return: Whether the line is valid or not.
:rtype: bool
"""
line = line.strip()
if line.startswith("#"):
Expand Down Expand Up @@ -103,8 +165,13 @@ def validate_bedfile(bedfile: pathlib.Path) -> BEDFileResult:

# Check each line
for line in bedlines:
# Skip empty lines
if not line:
continue
if not validate_bedfile_line_structure(line):
return BEDFileResult.INVALID_STRUCTURE
raise InvalidBedFileLine(
f"Invalid line in bedfile:\n{line}\nShould contain 7 columns separated by tabs."
)

# Check the bedfile names.
match determine_bedfile_version(bedfile):
Expand All @@ -114,25 +181,14 @@ def validate_bedfile(bedfile: pathlib.Path) -> BEDFileResult:
return BEDFileResult.VALID


# bedfile versions
class BedfileVersion(Enum):
"""
V1 bedfiles use a 6 col system
V2 bedfiles use a 7 col system and V1 primernames
V3 bedfiles use a 7 col system and V2 primernames
"""

V1 = "v1.0"
V2 = "v2.0"
V3 = "v3.0"
INVALID = "invalid" # Not applicable


def determine_bedfile_version(input: list[list] | pathlib.Path) -> BedfileVersion:
"""
Determine the bedfile version
:param input: Either the bedfile lines as a list or the bedfile path
:return: The bedfile version
:raises:
- PrimerNameError: If there are invalid primer names
- PrimerVersionError: If there is a mix of primernames
"""
if isinstance(input, pathlib.Path):
bedlines, _ = read_bed_file(input)
Expand All @@ -146,60 +202,30 @@ def determine_bedfile_version(input: list[list] | pathlib.Path) -> BedfileVersio
# If 7 cols then v2 or v3
# Check from primername
primernames = [x[3] for x in bedlines]
primer_name_versions = {determine_primername_version(x) for x in primernames}
primer_name_versions_dict = {
x: determine_primername_version(x) for x in primernames
}
primer_name_versions = set(primer_name_versions_dict.values())

if primer_name_versions == {PrimerNameVersion.V1}:
return BedfileVersion.V2
elif primer_name_versions == {PrimerNameVersion.V2}:
return BedfileVersion.V3
# Invalid if we get here
# Mix of v1, v2 or invalid
return BedfileVersion.INVALID


class BedLine:
def __init__(
self,
chrom: str,
start: int,
end: int,
primername: str,
pool: int,
strand: str,
seq: str,
):
self.chrom = chrom
self.start = start
self.end = end
self.primername = primername
self.pool = pool
self.strand = strand
self.seq = seq

# Validate the primername
if determine_primername_version(primername) == PrimerNameVersion.INVALID:
raise ValueError(f"Invalid primername: {primername}")
if PrimerNameVersion.INVALID in primer_name_versions:
# Raise an error if there are invalid primer names
invalid_primer_names = {
pn
for pn, pv in primer_name_versions_dict.items()
if pv == PrimerNameVersion.INVALID
}
raise PrimerNameError(f"{invalid_primer_names}")

# Autogenerated fields
self.prefix = primername.split("_")[0]
self.amplicon_number = int(primername.split("_")[1])
self.pn_direction = primername.split("_")[2]
self.primernumber: int | None = None

def __str__(self):
return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername}\t{self.pool}\t{self.strand}\t{self.seq}"

def parsed_primername(self, primernumber: int) -> str:
return "_".join(
[
self.prefix,
str(self.amplicon_number),
self.pn_direction,
str(primernumber),
]
)
if primer_name_versions == {PrimerNameVersion.V1, PrimerNameVersion.V2}:
raise PrimerVersionError("Mix of v1 and v2 primer names")

def parsed_bedline(self) -> str:
return f"{self.chrom}\t{self.start}\t{self.end}\t{self.primername if self.primernumber is None else self.parsed_primername(self.primernumber)}\t{self.pool}\t{self.strand}\t{self.seq}"
# This should not be reached
return BedfileVersion.INVALID


def read_bedlines(bedfilepath: pathlib.Path) -> tuple[list[BedLine], list[str]]:
Expand Down Expand Up @@ -307,4 +333,4 @@ def regenerate_v3_bedfile(bedfile: pathlib.Path) -> str:
# Add the line to the bedfile
bedfile_str_list.append(line.parsed_bedline())

return "\n".join(bedfile_str_list)
return "\n".join(bedfile_str_list) + "\n"
6 changes: 4 additions & 2 deletions primal_page/dev.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from Bio import SeqIO
import hashlib
import json
import pathlib

import typer
from Bio import SeqIO
from typing_extensions import Annotated

from primal_page.bedfiles import (
Expand Down Expand Up @@ -53,7 +53,9 @@ def regenerate(
# Hash the reference.fasta file
# If the hash is different, rewrite the file
ref_hash = hashfile(scheme_path / "reference.fasta")
ref_str = "".join((x.format("fasta") for x in SeqIO.parse(scheme_path / "reference.fasta", "fasta")))
ref_str = "".join(
x.format("fasta") for x in SeqIO.parse(scheme_path / "reference.fasta", "fasta")
)
if ref_hash != hashlib.md5(ref_str.encode()).hexdigest():
with open(scheme_path / "reference.fasta", "w") as ref_file:
ref_file.write(ref_str)
Expand Down
45 changes: 45 additions & 0 deletions primal_page/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from click import UsageError


class PrimerNameError(UsageError):
"""Raised when a primername is invalid"""

def __init__(self, primername: str):
super().__init__(
f"Invalid primernames: {primername}. Please use format (name)_(amplicon-number)_(LEFT|RIGHT) with optional _(primer-number)"
)


class PrimerVersionError(UsageError):
"""Raised when a primername is unexpected"""

def __init__(self, message: str):
super().__init__(message)


class InvalidBedFileLine(UsageError):
"""Raised when a bedline is invalid"""

def __init__(self, message: str):
super().__init__(message)


class SchemeExists(UsageError):
"""Raised when a Scheme already exists"""

def __init__(self, message: str):
super().__init__(message)


class FileNotFound(UsageError):
"""Raised when a file is not found"""

def __init__(self, message: str):
super().__init__(message)


class InvalidReference(UsageError):
"""Raised when a file is not found"""

def __init__(self, message: str):
super().__init__(message)
Loading

0 comments on commit f670ccd

Please sign in to comment.