-
Notifications
You must be signed in to change notification settings - Fork 5
/
pipeline.sh
executable file
·102 lines (82 loc) · 2.85 KB
/
pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash
source ../scripts/config.py
test -d ./output || mkdir ./output
ss_bed="../data/hg19.ss-motif.for_umap.bed.gz"
sse_bed="../data/K562.SSE.hg38.for_visualization.bed.gz"
if [ ! -e $ss_bed ] || [ ! -e $sse_bed ]; then
test -e $ss_bed || echo "Missing file $ss_bed"
test -e $sse_bed || echo "Missing file $sse_bed"
fi
## Figure 3A
bed=$ss_bed
genome="$hg19"
skip=""
## SpliceBERT
../scripts/fetch_embedding.py \
$bed $skip \
-m $SPLICEBERT_510 \
-g $genome -o ./output/$(basename $bed .bed.gz).SpliceBERT &> ./output/$(basename $bed .bed.gz).SpliceBERT.log
## SpliceBERT-human
../scripts/fetch_embedding.py \
$bed \
-m $SPLICEBERT_HUMAN \
--skip-donor-acceptor-umap \
-g $genome -o ./output/$(basename $bed .bed.gz).SpliceBERT-human &> ./output/$(basename $bed .bed.gz).SpliceBERT-human.log
## run DNABERT
for k in 3 4 5 6; do
../scripts/fetch_embedding.py \
$bed \
--skip-donor-acceptor-umap \
-m $DNABERT_PREFIX/$k-new-12w-0 \
-g $genome -o ./output/$(basename $bed .bed.gz).DNABERT$k &> ./output/$(basename $bed .bed.gz).DNABERT$k.log
done
## run onehot
../scripts/fetch_embedding.py \
$bed \
-m onehot \
-g $genome -o ./output/$(basename $bed .bed.gz).onehot &> ./output/$(basename $bed .bed.gz).onehot.log
## Figure 3B
bed=$sse_bed
genome="$hg38"
skip="--skip-donor-acceptor-umap"
## SpliceBERT
../scripts/fetch_embedding.py \
$bed $skip \
-m $SPLICEBERT_510 \
-g $genome -o ./output/$(basename $bed .bed.gz).SpliceBERT &> ./output/$(basename $bed .bed.gz).SpliceBERT.log
## SpliceBERT-human
../scripts/fetch_embedding.py \
$bed \
-m $SPLICEBERT_HUMAN \
--skip-donor-acceptor-umap \
-g $genome -o ./output/$(basename $bed .bed.gz).SpliceBERT-human &> ./output/$(basename $bed .bed.gz).SpliceBERT-human.log
## run DNABERT
for k in 3 4 5 6; do
../scripts/fetch_embedding.py \
$bed \
--skip-donor-acceptor-umap \
-m $DNABERT_PREFIX/$k-new-12w-0 \
-g $genome -o ./output/$(basename $bed .bed.gz).DNABERT$k &> ./output/$(basename $bed .bed.gz).DNABERT$k.log
done
## run onehot
../scripts/fetch_embedding.py \
$bed \
-m onehot \
-g $genome -o ./output/$(basename $bed .bed.gz).onehot &> ./output/$(basename $bed .bed.gz).onehot.log
# usage: fetch_embedding.py [-h] [--skip-donor-acceptor-umap] -m MODEL -g GENOME
# -o OUTPUT
# bed
#
# positional arguments:
# bed bed file
#
# optional arguments:
# -h, --help show this help message and exit
# --skip-donor-acceptor-umap
# skip layer umap (default: False)
# -m MODEL, --model MODEL
# model path (default: None)
# -g GENOME, --genome GENOME
# genome path (default: None)
# -o OUTPUT, --output OUTPUT
# output file (default: None)