Merge pull request #9 from simonpenel/main

Version 4.2-39: Fixing bug spotted by Dr. Suzuki concerning forceToLower
lbbe-software · Apr 16, 2024 · a416f0e · a416f0e
2 parents 2ae4dc8 + 4861b31
commit a416f0e
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 6 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: seqinr
 Title: Biological Sequences Retrieval and Analysis
-Version: 4.2-37
+Version: 4.2-39
 Depends: R (>= 2.10.0)
 Imports: ade4,segmented
 Authors@R: c(person("Delphine", "Charif", role = "aut"),

diff --git a/R/read.alignment.R b/R/read.alignment.R
@@ -1,8 +1,13 @@
 #
 # Read files of aligned sequences in various formats
 #
-read.alignment <- function(file, format, forceToLower = TRUE, oldclustal = FALSE, ...)
+read.alignment <- function(file, format, forceToLower = TRUE,
+                           seqtype = c("DNA", "AA"), oldclustal = FALSE, ...)
 {
+    seqtype <- match.arg(seqtype) # default is DNA
+    if (seqtype == "AA") {
+      forceToLower = FALSE;
+    }
     #
     # Check if the file is an URL:
     #
@@ -22,7 +27,7 @@ read.alignment <- function(file, format, forceToLower = TRUE, oldclustal = FALSE
     if(file.access(file, mode = 4) != 0) stop(paste("File", file, "is not readable"))
 
     fasta2ali <- function(file, ...){
-        tmp <- read.fasta(file, as.string = TRUE, ...)
+        tmp <- read.fasta(file, seqtype = seqtype, forceDNAtolower = forceToLower , as.string = TRUE, ...)
         list(length(tmp), getName(tmp), unlist(getSequence(tmp, as.string = TRUE)))
     }
 
@@ -123,7 +128,10 @@ read.alignment <- function(file, format, forceToLower = TRUE, oldclustal = FALSE
     ali <- lapply(ali, as.character)
     #cleaning \r char
     ali <- lapply(ali, function (x ){gsub ('\r','',x)})
-    if(forceToLower) ali[[3]] <- lapply(ali[[3]], tolower)
+
+    #if(forceToLower) ali[[3]] <- lapply(ali[[3]], tolower)
+    if(forceToLower) ali[[3]] <- unlist(lapply(ali[[3]], tolower))
+    ali[[3]] <- as.list(ali[[3]])
     if(format == "mase"){
         ali <- list(nb = as.numeric(ali[[1]]), nam = ali[[2]], seq = ali[[3]], com = ali[[4]])
     } else {

diff --git a/man/read.alignment.Rd b/man/read.alignment.Rd
@@ -6,16 +6,18 @@
  These formats are used to store nucleotide or protein multiple alignments.
 }
 \usage{
-read.alignment(file, format, forceToLower = TRUE, oldclustal = FALSE, ...)
+read.alignment(file, format, forceToLower = TRUE, seqtype = c("DNA", "AA"), oldclustal = FALSE, ...)
 }
 \arguments{
   \item{file}{the name of the file which the aligned sequences are to be read from.
     If it does not contain an absolute or relative path, the file name is relative
     to the current working directory, \code{\link{getwd}}. }
   \item{format}{a character string specifying the format of the file : \code{mase},
   \code{clustal}, \code{phylip}, \code{fasta} or \code{msf} }
+  \item{seqtype}{ the nature of the sequence: \code{DNA} or \code{AA}, defaulting
+    to \code{DNA} }
   \item{forceToLower}{a logical defaulting to TRUE stating whether the returned
-    characters in the sequence should be in lower case (introduced in seqinR
+    characters in a DNA sequence should be in lower case (introduced in seqinR
     release 1.1-3).}
   \item{oldclustal}{a logical defaulting to FALSE wether to use the old C function to read a clustal file (which is faster but stricter concerning sequence line length.) }
   \item{...}{For the \code{fasta} format, extra arguments are passed to the