forked from scienceystuff/findIt4Me
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_with_perl.pl
74 lines (61 loc) · 1.67 KB
/
fetch_with_perl.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
my $base = 'https://www.uniprot.org';
my $tool = 'uploadlists';
my $f = $ARGV[0];
open (my $fh, "<", $f) or die "no argument was passed to this script!\n";
my $db_xref = $ARGV[1];
# if ($db_xref eq "") {
if (not defined $db_xref) {
print "## No database name provided!\n";
$db_xref = 'P_REFSEQ_AC';
}
my $dbName = '';
my %h = (
EMBL_ID => 'EMBL-GeneBank',
EMBL => 'EMBL-GeneBank_CDS-region',
P_ENTREZGENEID => 'Entrez-Gene',
P_GI => 'GI-number',
EMBL_ID => 'EMBL-GeneBank',
REFSEQ_NT_ID => 'Refseq-nucleotide',
P_REFSEQ_AC => 'Refseq-protein',
ENSEMBL_ID => 'Ensembl-ID',
ENSEMBL_PRO_ID => 'Ensembl-Protein-ID',
ENSEMBL_TRS_ID => 'Ensembl-Transcript-ID',
GENENAME => 'GeneName-Identifier'
);
if (exists $h{$db_xref}) {
my $s = $h{$db_xref};
$dbName = $s;
}
else {
$dbName = 'Other DB';
}
my @a;
while (<$fh>) {
chomp;
push @a, $_;
}
my $q = join ' ', @a;
print "Using $dbName\n";
my $params = {
from => 'ACC',
to => $db_xref,
format => 'tab', # tab == whitespace
query => $q
};
my $contact = ''; # Please set a contact email address here to help us debug in case of problems (see https://www.uniprot.org/help/privacy).
my $agent = LWP::UserAgent->new(agent => "libwww-perl $contact");
push @{$agent->requests_redirectable}, 'POST';
my $response = $agent->post("$base/$tool/", $params);
while (my $wait = $response->header('Retry-After')) {
print STDERR "Waiting ($wait)...\n";
sleep $wait;
$response = $agent->get($response->base);
}
$response->is_success ?
print $response->content :
die 'Failed, got ' . $response->status_line .
' for ' . $response->request->uri . "\n";