From 26a20c3fd658eac04b6dd10e651c2c0bd1f1bd4d Mon Sep 17 00:00:00 2001 From: Simon Penel Date: Wed, 13 Dec 2023 15:02:25 +0100 Subject: [PATCH] Adding argument force.first.aa.to.Met (Dr. Haruo Suzuki suggestion) --- DESCRIPTION | 2 +- R/getTrans.R | 32 ++++++++++++++++++++------------ R/translate.R | 6 ++++-- man/getTrans.Rd | 19 +++++++++++-------- man/translate.Rd | 7 ++++++- 5 files changed, 42 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e755c46..362a6f7 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: seqinr Title: Biological Sequences Retrieval and Analysis -Version: 4.2-36 +Version: 4.2-37 Depends: R (>= 2.10.0) Imports: ade4,segmented Authors@R: c(person("Delphine", "Charif", role = "aut"), diff --git a/R/getTrans.R b/R/getTrans.R index 78981b3..4d8bfc0 100644 --- a/R/getTrans.R +++ b/R/getTrans.R @@ -2,34 +2,42 @@ # To translate sequences: # -getTrans <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) +getTrans <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, ...) UseMethod("getTrans") -getTrans.default <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) +getTrans.default <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, ...) stop(paste("no getTrans method for objects of class:", class(object))) -getTrans.list <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) +getTrans.list <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, ...) lapply(seq_len(length(object)), - function(i) getTrans(object[[i]], sens = sens, NAstring = NAstring, ambiguous = ambiguous, ...)) + function(i) getTrans(object[[i]], sens = sens, NAstring = NAstring, ambiguous = ambiguous, + force.first.aa.to.Met = force.first.aa.to.Met, ...)) -getTrans.character <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = 0, numcode = 1) - translate(seq = object, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous) +getTrans.character <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, + ..., frame = 0, numcode = 1) + translate(seq = object, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous, + force.first.aa.to.Met = force.first.aa.to.Met) -getTrans.SeqFastadna <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = 0, numcode = 1){ +getTrans.SeqFastadna <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, + ..., frame = 0, numcode = 1){ dnaseq <- getSequence(object, as.string = FALSE) - translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous) + translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous, + force.first.aa.to.Met = force.first.aa.to.Met) } getTrans.SeqFrag <- getTrans.SeqFastadna -getTrans.SeqAcnucWeb <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., frame = "auto", numcode = "auto"){ +getTrans.SeqAcnucWeb <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, + ..., frame = "auto", numcode = "auto"){ dnaseq <- getSequence(object, as.string = FALSE) if(numcode == "auto") numcode <- attr(object, "ncbigc") if(frame == "auto") frame <- attr(object, "frame") - translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous) + translate(seq = dnaseq, frame = frame, sens = sens, numcode = numcode, NAstring = NAstring, ambiguous = ambiguous, + force.first.aa.to.Met = force.first.aa.to.Met) } -getTrans.qaw <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) getTrans(object$req, ...) +getTrans.qaw <- function(object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, ...) + getTrans(object$req, force.first.aa.to.Met = force.first.aa.to.Met, ...) -getTrans.logical <- function (object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) +getTrans.logical <- function (object, sens = "F", NAstring = "X", ambiguous = FALSE, force.first.aa.to.Met = FALSE, ...) object # so that NA is returned for virtual lists diff --git a/R/translate.R b/R/translate.R index 72f555d..184f637 100755 --- a/R/translate.R +++ b/R/translate.R @@ -1,5 +1,5 @@ translate <- function(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", - ambiguous = FALSE) + ambiguous = FALSE, force.first.aa.to.Met = FALSE) { if(any(seq%in%LETTERS)){ @@ -68,7 +68,9 @@ translate <- function(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", if( all(allaminoacids == allaminoacids[1])) result[i] <- allaminoacids[1] } } - + if (force.first.aa.to.Met) { + result[1] <- "M" + } return( result ) } diff --git a/man/getTrans.Rd b/man/getTrans.Rd index cd63a29..a470046 100644 --- a/man/getTrans.Rd +++ b/man/getTrans.Rd @@ -14,14 +14,14 @@ This function translates nucleic acid sequences into the corresponding peptide sequence. It can translate in any of the 3 forward or three reverse sense frames. In the case of reverse sense, the reverse-complement of the sequence is taken. It can translate using the standard (universal) genetic code and also with non-standard codes. Ambiguous bases can also be handled. } \usage{ -getTrans(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) -\method{getTrans}{SeqAcnucWeb}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., - frame = "auto", numcode = "auto") -\method{getTrans}{SeqFastadna}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., - frame = 0, numcode = 1) -\method{getTrans}{SeqFrag}(object, sens = "F", NAstring = "X", ambiguous = FALSE, ..., - frame = 0, numcode = 1) - +getTrans(object, sens = "F", NAstring = "X", ambiguous = FALSE, + force.first.aa.to.Met = FALSE, ...) +\method{getTrans}{SeqAcnucWeb}(object, sens = "F", NAstring = "X", ambiguous = FALSE, + force.first.aa.to.Met = FALSE, ..., frame = "auto", numcode = "auto") +\method{getTrans}{SeqFastadna}(object, sens = "F", NAstring = "X", ambiguous = FALSE, + force.first.aa.to.Met = FALSE, ..., frame = 0, numcode = 1) +\method{getTrans}{SeqFrag}(object, sens = "F", NAstring = "X", ambiguous = FALSE, + force.first.aa.to.Met = FALSE, ..., frame = 0, numcode = 1) } \arguments{ \item{object}{ an object of the class \code{\link{SeqAcnucWeb}} @@ -31,6 +31,8 @@ getTrans(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) \item{NAstring}{ How to translate amino-acids when there are ambiguous bases in codons. } \item{ambiguous}{ If TRUE, ambiguous bases are taken into account so that for instance GGN is translated to Gly in the standard genetic code. } + \item{force.first.aa.to.Met}{ If TRUE, the first codon in the sequence will be translated to + Methionine regardless of the codon in order to treat rare start codons like TTG. } \item{frame}{ Frame(s) (0,1,2) to translate. By default the frame \code{0} is used. } \item{sens}{ Direction for translation: \code{F} for the direct strand e and \code{R} for the reverse complementary strand. } \item{...}{further arguments passed to or from other methods} @@ -82,6 +84,7 @@ getTrans(object, sens = "F", NAstring = "X", ambiguous = FALSE, ...) # toycds <- s2c("tctgagcaaataaatcgg") getTrans(toycds) # should be c("S", "E", "Q", "I", "N", "R") + getTrans(toycds, force.first.aa.to.Met = TRUE) # should be c("M", "E", "Q", "I", "N", "R") # # Toy CDS example with ambiguous bases: # diff --git a/man/translate.Rd b/man/translate.Rd index 0c6220a..d2015cc 100644 --- a/man/translate.Rd +++ b/man/translate.Rd @@ -10,7 +10,8 @@ be handled. } \usage{ -translate(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", ambiguous = FALSE) +translate(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", ambiguous = FALSE, + force.first.aa.to.Met = FALSE) } \arguments{ \item{seq}{ the sequence to translate as a vector of single characters in lower case letters. } @@ -20,6 +21,9 @@ translate(seq, frame = 0, sens = "F", numcode = 1, NAstring = "X", ambiguous = F \item{NAstring}{ How to translate amino-acids when there are ambiguous bases in codons. } \item{ambiguous}{ If TRUE, ambiguous bases are taken into account so that for instance GGN is translated to Gly in the standard genetic code. } + \item{force.first.aa.to.Met}{ If TRUE, the first codon in the sequence will be translated to + Methionine regardless of the codon in order to treat rare start codons like TTG. } + } \details{ The following genetic codes are described here. The number preceding each code @@ -76,6 +80,7 @@ Use \code{\link{aaa}} to get the three-letter code for amino-acids.} ## toycds <- s2c("tctgagcaaataaatcgg") translate(seq = toycds) # should be c("S", "E", "Q", "I", "N", "R") +translate(seq = toycds, force.first.aa.to.Met = TRUE) # should be c("M", "E", "Q", "I", "N", "R") ## ## Toy CDS example with ambiguous bases: ##