forked from abrusell/Tartagenia
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ranking_pipeline.sh
52 lines (40 loc) · 3.48 KB
/
ranking_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash -e
if [ $# -ne 3 ]
then
echo -e "Usage: $0 ID_phenotype_list.txt ID outdir\n\tN.B. In the outdir the file ID_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd_ACMG_DDG2P_Mendel_intervar_db.vcf.gz must be present!"
exit 1
fi
## definizione delle variabili di input
phenolist=$1
ID=$2
outdi=$3
outdir=$outdi"/"
cd $outdir
if [ -s $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd_spidex_ACMG_DDG2P_Mendel_intervar_db.vcf.gz" ]
then
spid=_spidex
elif [ ! -s $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd_ACMG_DDG2P_Mendel_intervar_db.vcf.gz" ]
then
echo -e "Check your input files. In the outdir, ID_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd(_spidex)_ACMG_DDG2P_Mendel_intervar_db.vcf.gz must be present."
exit 1
else
spid=
fi
# filtro per le CDS e i SS
/pico/work/IscrC_FoRWArDS_1/NGS_tools/vcfSifter.py $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db.vcf.gz" 2> $outdir$ID"_ranking_log"
# estraggo la lista dei geni nel vcf
/pico/work/IscrC_FoRWArDS_1/NGS_tools/vcf2genelist.py $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.vcf" 2>> $outdir$ID"_ranking_log"
# ordino e elimino duplicati
sort -u $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.genelist.txt" > $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered_genelist_sorted.txt"
rm $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.genelist.txt"
# qui uso la lista con i max 4 termini fenotipici che avro' precedentemente creato
perl /pico/home/userexternal/aciolfi0/phenolyzer/disease_annotation.pl -f -p --gene $ID"_raw_snps-indels_HapCall_genotype_filtered_genelist_sorted.txt" -ph $phenolist -logistic -out "Pheno/"$ID -addon DB_DISGENET_GENE_DISEASE_SCORE,DB_GAD_GENE_DISEASE_SCORE -addon_weight 0.25 2>> $outdir$ID"_ranking_log"
# aggiungo il phenolyzer score al vcf
/pico/work/IscrC_FoRWArDS_1/NGS_tools/phenolyzer_score_annotator.py $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.vcf" $outdir"Pheno/"$ID".final_gene_list" 2>> $outdir$ID"_ranking_log"
# applico il nostro schema di scoring alle varianti
/pico/work/IscrC_FoRWArDS_1/NGS_tools/prioritizer_maxPopAF_ExAC_intervar.py $phenolist $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.phenolyzer.vcf" 2>> $outdir$ID"_ranking_log"
# creo dei file rinominati in maniera piu' sintetica da consegnare ai biologi che hanno windows...
mkdir $outdir"RENAMED"
cp $outdir$ID"_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.phenolyzer.scored.tsv" $outdir"RENAMED/"
# rinomino l'output per il singleton
rename "_raw_snps-indels_HapCall_genotype_filtered.g.SnpEff_UCSCAnnot_dbNSFP2.dbNSFP3_gene_cadd"$spid"_ACMG_DDG2P_Mendel_intervar_db_filtered.phenolyzer.scored" "_raw_snps-indels_HapCall_genotype_filtered.phenolyzer.scored" $outdir"RENAMED/"*"tsv"