forked from weizhongli/cdhit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clstr_rev.pl
executable file
·83 lines (75 loc) · 1.59 KB
/
clstr_rev.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env perl
# if nr90 from nr100 and
# nr80 from nr90, so I have nr90.clstr and nr80.clstr
# but, in nr80.clstr, some gi numbers whose from nr100 are there
# use this script, I create a new nr80new.clstr, as it is clustered from nr100
$file90 = shift;
$file80 = shift;
my %gi2clstr = ();
open(TMP, $file90) || die "Can not open file";
$readin = 0;
my $gi = "";
my $clstr = "";
my $this_no = 0;
while(my $ll=<TMP>) {
if ($ll =~ /^>/ ) {
if ($readin and $gi and $this_no>1 ) {
$gi2clstr{$gi} = $clstr;
}
$gi="";
$clstr="";
$this_no=0;
}
else {
$readin = 1;
$clstr .= $ll;
if ($ll =~ /\*/ and $ll =~ />(.+)\.\.\./ ) { $gi = $1; }
$this_no++;
}
}
close(TMP);
if ($readin and $gi and $this_no>1 ) {
$gi2clstr{$gi} = $clstr;
}
my $no = 0;
open(TMP, $file80) || die "Can not open file";
while( $ll = <TMP>) {
if ($ll =~ /^>/ ) {
print $ll;
$no = 0;
}
elsif ($ll =~ />(.+)\.\.\./ ) {
$gi = $1;
chop($ll);
$rep = ( $ll =~ /\*$/) ? 1 : 0;
$iden = "";
if ($ll =~ / at (.+)$/) { $iden = $1; }
else { $iden = "100%"; }
if ( $gi2clstr{$gi} ) {
$aa = $gi2clstr{$gi};
@aa = split(/\n/, $aa);
foreach $a (@aa) {
$a =~ s/^\d+/$no/;
if (not $rep) {
if ($a =~ /\*$/) {
$a =~ s/\*/at $iden/;
}
else {
$a =~ s/at (.+)$/at $iden,$1/;
}
}
print "$a\n";
$no++;
}
}
else {
$ll =~ s/^\d+/$no/;
print "$ll\n";
$no++;
}
}
else {
print $ll;
}
}
close(TMP);