-
Notifications
You must be signed in to change notification settings - Fork 0
/
00_setup_and_download.sh
executable file
·78 lines (59 loc) · 1.96 KB
/
00_setup_and_download.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env bash
module load java/21.0.1
source src/util_config.sh
source src/util_functions.sh
# Set up directory structure
log "Initializing directories"
for directory in "${DIRS[@]}"; do
if [ ! -d "$directory" ]; then
mkdir -p "$directory"
log " Created '$directory' directory."
else
log " '$directory' directory already exists."
fi
done
log "Working directory setup completed"
# Declare an associative array to store file type and filename pairs
declare -A FA_DATA
read_csv $FA_KEY FA_DATA
# Download FASTA data
log "Downloading FASTA data"
for file in "${!FA_DATA[@]}"; do
type="${FA_DATA[${file}]}"
if [ "$type" == "ref" ]; then
download_file "$file" "$type" "$DEST_REF_DIR"
elif [ "$type" == "asm" ]; then
download_file "$file" "$type" "$DEST_ASM_DIR"
elif [ "$type" == "ref_gff" ]; then
download_file "$file" "$type" "$DEST_REF_GFF_DIR"
else
echo " Unknown file type: $type"
fi
done
log "FASTA data downloaded"
# Convert .fna to .fa (if they exist - needed for current version of BioKotlin)
log "Checking for .fna extensions"
convert_fna_to_fa $DEST_REF_DIR
convert_fna_to_fa $DEST_ASM_DIR
# Download PHGv2
log "Begin PHGv2 download"
curl -s https://api.github.com/repos/maize-genetics/phg_v2/releases/latest \
| awk -F': ' '/browser_download_url/ && /\.tar/ {
gsub(/"/, "", $(NF));
system("curl -LO " $(NF));
system("tar -xvf *.tar -C src/");
system("rm *.tar");
}'
log "PHGv2 downloaded and decompressed"
# Setup PHGv2 environment
log "START - Setting up PHGv2 environment"
"$PHG_SRC" setup-environment --env-file data/phg_environment.yml
mv *.log output/logging/
log "FINISH - Setting up PHGv2 environment"
# Initialize TileDB instances
log "START - Initializing TileDB instances"
"$PHG_SRC" initdb \
--db-path $PHG_DB_DIR \
--gvcf-anchor-gap 1000000 \
--hvcf-anchor-gap 1000
log "FINISH - Initializing TileDB instances"