-
Notifications
You must be signed in to change notification settings - Fork 0
/
Exclude_emptyFasta.pl
34 lines (30 loc) · 938 Bytes
/
Exclude_emptyFasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#######################
### Author: Xun Chen, Ph.D.
### Email: [email protected] or [email protected]
### ORCID: https://orcid.org/0000-0003-0327-1888
### Date: 2023/10/17
###
#######################
#!/usr/bin/perl
use strict;
use warnings;
my %seqs;
$/ = "\n>"; # read fasta by sequence, not by lines
while (<>) {
s/>//g;
my ($seq_id, @seq) = split (/\n/, $_);
my $seq = uc(join "", @seq); # rebuild sequence as a single string
my $len = length $seq;
my $numA = $seq =~ tr/A//; # removing A's from sequence returns total counts
my $numC = $seq =~ tr/C//;
my $numG = $seq =~ tr/G//;
my $numT = $seq =~ tr/T//;
my $numN = $seq =~ tr/N//;
my $num_ = $seq =~ tr/-//;
my $GC_perC = ($numC + $numG)/($len - $numN);
my $num_nt = $len-$num_-$numN;
if ($num_nt >0) {
print ">$seq_id\n$seq\n"
}
#print "$seq_id: Size=$len non_nt=$num_nt GC=$GC_perC A=$numA C=$numC G=$numG T=$numT N=$numN gap=$num_\n";
}