-
Notifications
You must be signed in to change notification settings - Fork 1
/
DilutionRerunner.pl
executable file
·110 lines (79 loc) · 2.67 KB
/
DilutionRerunner.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/perl
# DilutionRerunner.pl
use strict; use warnings;
# Objective: run the Dilution Adjusted Model on expression data with empirical
# threshold generated from thresholdDetermination.R
## Note: This script assumes that RefSeq IDs are not duplicated ##
die "Usage: DilutionRerunner.pl <expression data> <threshold data> <n of reps> \n" unless @ARGV == 3;
my $exprFile = $ARGV[0];
my $reps = $ARGV[2];
# # # # # # # # # # # #
#
# Parse expression data
#
# # # # # # # # # # # #
# Remove the three character file extension from the input
my $exprFileRoot = substr($exprFile, 0, -4);
# Separate the replicates into their own files
print "Splitting $reps replicates into individual files.\n";
for (my $i = 2; $i <= $reps + 1; $i++) {
my $rep_tmp = $i - 1;
`cut -f 1,$i $exprFile > $exprFileRoot$rep_tmp.txt`
}
# # # # # # # # # # # #
#
# Parse threshold data
#
# # # # # # # # # # # #
my @thresh;
my $count = 0;
open(my $threshFile, "<$ARGV[1]") or die "error opening $ARGV[1] for reading\n";
print "Retrieving threshold values.\n";
while(<$threshFile>){
my $line = $_;
next if ($line =~ m/Threshold/i); # skip the header
my @tmpArray = split(/\s+/, $line); # get the threshold value
push(@thresh, $tmpArray[1]);
$count++; # count the thresholds seen
if ($count == $reps) { last;}
# exits while loop before using the threshold for the mean expression value
}
close $threshFile;
# print array of thresholds if needed
#for (my $i = 0; $i < scalar(@thresh); $i++) {
# print "$thresh[$i]\n";
#}
# # # # # # # # # # # #
#
# Apply Dilution Adjustment Model
#
# # # # # # # # # # # #
# Run the dilution_effect.pl on all replicates
run_dilution($reps, $exprFileRoot, @thresh);
# # # # # # # # # # #
#
# SUBROUTINES
#
# # # # # # # # # # #
sub run_dilution {
my ($reps, $exprFileRoot, @thresh) = @_;
for (my $i = 1; $i <= $reps; $i++) {
# run dilution_effect.pl with thresholds
my $adj_file = join("_", "$exprFileRoot$i", "dilutionOUT.txt");
`./dilution_effect.pl -a $thresh[$i-1] $exprFileRoot$i.txt > $adj_file`;
# pseudocount data in preparation of differential expression analysis
my $pseuct_file = join("_", "$exprFileRoot$i", "dilutionOUT_pseudoct.txt");
`./pseudocounter.pl $adj_file > $pseuct_file`;
# prepare file names for final output
my $final_root = "$exprFileRoot$i";
$final_root =~ s/Data/Results/;
my $normEnding = "_norm.txt";
my $adjEnding = "_adj.txt";
# separate dilution adjusted and unadjusted data for use with diffexpr_Beck.R
`sort $pseuct_file | cut -f 1,2 > $final_root$normEnding`;
`sort $pseuct_file | cut -f 1,3 > $final_root$adjEnding`;
# clean up unnecessary files
`rm $adj_file`;
`rm $pseuct_file`;
}
}