-
Notifications
You must be signed in to change notification settings - Fork 2
/
liftOverBEDPE.sh
74 lines (56 loc) · 1.74 KB
/
liftOverBEDPE.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/sh
#convert bedpe files to a different genome build
for arg in "$@"
do
index=$(echo $arg | cut -f1 -d=)
val=$(echo $arg | cut -f2 -d=)
case $index in
input) input=$val;;
chain) chain=$val;;
*)
esac
done
if [ -f $chain ]; then
FROM=${chain%To*}
FROM=${FROM##*/}
TO=${chain##*To}
TO=${TO%.over.chain}
echo "This script converts the input file $input from genome $FROM to $TO"
else
echo "Chain file does not exist."
exit 1
fi
echo "===================================================="
if [ -x "$(command -v liftOver)" ]; then
echo "1) Adding a unique ID to each row in the input file $input"
else
echo "Error: liftOver does not exist or cannot be executed."
exit 1
fi
`awk 'BEGIN {FS=OFS="\t"} {print $0,NR}' $input > input_NR.txt`
echo "Done!"
echo "===================================================="
echo "2) Split the input files into two BED files"
`awk 'BEGIN {FS=OFS="\t"} {print $1,$2,$3,$NF}' input_NR.txt > input_1st.txt`
`cut -f4- input_NR.txt > input_2nd.txt`
echo "Done!"
echo "===================================================="
echo "3) Converting from $FROM to $TO"
`liftOver -bedPlus=3 input_1st.txt $chain input_1st.bed tmp.txt`
`liftOver -bedPlus=3 input_2nd.txt $chain input_2nd.bed tmp.txt`
echo "===================================================="
echo "4) Merging back"
COL=$(awk -F'\t' 'NR == 1 {print NF}' input_2nd.bed)
`join -1 4 -2 $COL -t $'\t' input_1st.bed input_2nd.bed > tmp.bedpe`
STEM=${input%.bedpe}
`cut -f2- tmp.bedpe > "$STEM"_"$TO".bedpe`
echo "===================================================="
echo "Clearing up..."
rm input_NR.txt
rm input_1st.txt
rm input_2nd.txt
rm input_1st.bed
rm input_2nd.bed
rm tmp.txt
rm tmp.bedpe
echo "Done! The output file is "$STEM"_"$TO".bedpe"