-
Notifications
You must be signed in to change notification settings - Fork 0
/
ITS_extract
73 lines (50 loc) · 1.26 KB
/
ITS_extract
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
data='./'
module load bowtie2
module load samtools
module load bcftools
#Download Split SRA file
fastq-dump -I --split-files --gzip SRR3504592
#Build bowtie index
bowtie2-build TMI1172.fasta TMI_ITS
#Create sorted BAM
readarray list < ./genomes.ref
for i in ${list[@]}
do
bowtie2 --sensitive -x TMI_ITS -1 ${i}R1_val_1.fastq.gz -2 ${i}R2_val_2.fastq.gz | samtools view -bSu - | samtools sort - -o ${i}.sort
wait
done
#Index the reference genome
samtools faidx ITS_ref.fasta
#Next convert sorted bam to vcf
readarray list < ./genomes.ref
for i in ${list[@]}
do
samtools mpileup -uf ITS_ref.fasta ${i}.sort | bcftools call -c -Oz -o ${i}_2.vcf.gz
wait
done
#Unzip those vcf files too
readarray list < ./genomes.ref
for i in ${list[@]}
do
gunzip -c ${i}_2.vcf.gz > ${i}_2.vcf
wait
done
#Make fastq
for i in ${list[@]}
do
vcfutils.pl vcf2fq ${i}_2.vcf > ${i}_2.fastq
wait
done
#Replace the title of the FASTQ with the actual isolate name
for i in ${list[@]}
do
sed -i "s#TMI1172#${i}#" ${i}_2.fastq
wait
done
#FASTQ-FASTA name
readarray list < /home/spatev/workdir/CoV-2_readvis/rawreads/genomes
for i in ${list[@]}
do
seqtk seq -a ${i}.fastq > ${i}.fasta
wait
done