-
Notifications
You must be signed in to change notification settings - Fork 3
/
STIP_warning_file.nf
66 lines (48 loc) · 1.83 KB
/
STIP_warning_file.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env nextflow
params.genome = 'genome/genome.chr1.1M.fa'
genome_ch = Channel.fromPath(params.genome)
params.gtf = 'genome/genome.chr1.1M.gtf'
gtf_ch = Channel.fromPath(params.gtf)
params.snp = 'snp/bos_taurus.SNP.biallelic.100k.bed'
snp_ch = Channel.fromPath(params.snp)
process generateWindowsFromGtf {
input:
file genome from gtf_ch
output:
file 'windows.bed' into generateWindowsFromGtfOut
shell:
'''
awk -F "\t" 'BEGIN{OFS="\t"}{ if ($3 == "gene"){ if ($7 == "+"){ print $1,$4-2000,$4}else{print $1,$5,$5+2000}}}' !{genome} > windows.bed
'''
}
process intersectVariantWithWindows {
conda "bioconda::bedtools"
input:
file windows from generateWindowsFromGtfOut
file snp from snp_ch
output:
file 'snp.filteredByWindows.bed' into intersectVariantWithWindowsOut
script:
"""
bedtools intersect -a $snp -b $windows > snp.filteredByWindows.bed
"""
}
process extractFastaFromSelectSNP {
conda "bioconda::bedtools"
input:
file genome from genome_ch
file snp from intersectVariantWithWindowsOut
output:
file 'snp.filteredByWindows.fasta' into intersectVariantWithWindowsFastaOut
file 'warning.txt' into warning
script:
"""
awk '{print \$1"\t"\$2-30"\t"\$2+29;}' $snp > $snp"_modif"
bedtools getfasta -fi $genome -bed $snp"_modif" | paste - - > $snp"_modif2"
paste $snp $snp"_modif2" | awk -v warning='warning.txt' 'BEGIN{w=0}{if (toupper(substr(\$8,30,1)) != toupper(\$5)){w++;} if (substr(\$4,1,2) == "rs"){ name = \$4;}else{name = "snp_"\$1"_"\$2;} print \$1"\t"\$2"\t"\$5"\t"\$6"\t"name"\t"\$8;}END{print w > warning}' > snp.filteredByWindows.fasta
"""
}
//generateWindowsFromGtfOut.view()
//intersectVariantWithWindowsOut.view()
//intersectVariantWithWindowsFastaOut.view()
warning.view()