diff --git a/bam2bcf.h b/bam2bcf.h index 8f8f8db5..18cb4b76 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -108,7 +108,7 @@ plp_cd_t; typedef struct __bcf_callaux_t { - int fmt_flag, ambig_reads; + int fmt_flag, ambig_reads, ref_skip_reads; int capQ, min_baseQ, max_baseQ, delta_baseQ; int openQ, extQ, tandemQ; // for indels uint32_t min_support, max_support; // for collecting indel candidates diff --git a/bam2bcf_edlib.c b/bam2bcf_edlib.c index 4e0a38c3..5ed31725 100644 --- a/bam2bcf_edlib.c +++ b/bam2bcf_edlib.c @@ -1559,18 +1559,21 @@ int bcf_edlib_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, } } - int qbeg, qpos, qend, tbeg, tend, kk; + int qbeg, qpos, qend, tbeg, tend; uint8_t *seq = bam_get_seq(p->b); - uint32_t *cigar = bam_get_cigar(p->b); if (p->b->core.flag & BAM_FUNMAP) continue; // FIXME: the following loop should be better moved outside; // nonetheless, realignment should be much slower anyway. - for (kk = 0; kk < p->b->core.n_cigar; ++kk) - if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) - break; - if (kk < p->b->core.n_cigar) - continue; + if (!bca->ref_skip_reads) { + uint32_t *cigar = bam_get_cigar(p->b); + int kk; + for (kk = 0; kk < p->b->core.n_cigar; ++kk) + if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) + break; + if (kk < p->b->core.n_cigar) + continue; + } // determine the start and end of sequences for alignment int left2 = left, right2 = right; diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 975504f8..7c337b48 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -845,18 +845,21 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, } } - int qbeg, qpos, qend, tbeg, tend, kk; + int qbeg, qpos, qend, tbeg, tend; uint8_t *seq = bam_get_seq(p->b); - uint32_t *cigar = bam_get_cigar(p->b); if (p->b->core.flag & BAM_FUNMAP) continue; // FIXME: the following loop should be better moved outside; // nonetheless, realignment should be much slower anyway. - for (kk = 0; kk < p->b->core.n_cigar; ++kk) - if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) - break; - if (kk < p->b->core.n_cigar) - continue; + if (!bca->ref_skip_reads) { + uint32_t *cigar = bam_get_cigar(p->b); + int kk; + for (kk = 0; kk < p->b->core.n_cigar; ++kk) + if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) + break; + if (kk < p->b->core.n_cigar) + continue; + } // determine the start and end of sequences for alignment // FIXME: loops over CIGAR multiple times diff --git a/doc/bcftools.txt b/doc/bcftools.txt index 5cced978..951dd1f7 100644 --- a/doc/bcftools.txt +++ b/doc/bcftools.txt @@ -2503,6 +2503,14 @@ INFO/DPR .. Deprecated in favor of INFO/AD; Number of high-quality bases for exclude from calling and increment the first value of the AD counter ('incAD0') ['drop'] +*--ref-skip-reads*:: + Include reads with CIGAR 'N' (ref-skip) operators in their alignment. + By default these are filtered out. Enabling this option may be + necessary for calling on some RNASeq pipelines. This works with + the default caller and --indels-cns. The experimental -indels-2.0 + option does not filter out alignments with ref-skips so this option + is unnecessary there. + *-e, --ext-prob* 'INT':: Phred-scaled gap extension sequencing error probability. Reducing 'INT' leads to longer indels [20] diff --git a/mpileup.c b/mpileup.c index 943e0f6f..2aa76e67 100644 --- a/mpileup.c +++ b/mpileup.c @@ -68,7 +68,7 @@ typedef struct _mplp_pileup_t mplp_pileup_t; // Data shared by all bam files typedef struct { int min_mq, flag, min_baseQ, max_baseQ, delta_baseQ, capQ_thres, max_depth, - max_indel_depth, max_read_len, ambig_reads; + max_indel_depth, max_read_len, ambig_reads, ref_skip_reads; uint32_t fmt_flag; int rflag_skip_any_unset, rflag_skip_all_unset, rflag_skip_any_set, rflag_skip_all_set, output_type; int openQ, extQ, tandemQ, min_support, indel_win_size; // for indels @@ -882,6 +882,7 @@ static int mpileup(mplp_conf_t *conf) conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE; conf->bca->fmt_flag = conf->fmt_flag; conf->bca->ambig_reads = conf->ambig_reads; + conf->bca->ref_skip_reads = conf->ref_skip_reads; conf->bca->indel_win_size = conf->indel_win_size; conf->bca->indels_v20 = conf->indels_v20; conf->bca->edlib = conf->edlib; @@ -1291,6 +1292,8 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp) " -p, --per-sample-mF Apply -m and -F per-sample for increased sensitivity\n" " -P, --platforms STR Comma separated list of platforms for indels [all]\n" " --ar, --ambig-reads STR What to do with ambiguous indel reads: drop,incAD,incAD0 [drop]\n"); + fprintf(fp, + " --ref-skip-reads Use reads with N CIGAR op [discard by default]\n"); fprintf(fp, " --indel-bias FLOAT Raise to favour recall over precision [%.2f]\n", mplp->indel_bias); fprintf(fp, @@ -1381,6 +1384,7 @@ int main_mpileup(int argc, char *argv[]) mplp.fmt_flag = B2B_INFO_BQBZ|B2B_INFO_IDV|B2B_INFO_IMF|B2B_INFO_MQ0F|B2B_INFO_MQBZ|B2B_INFO_MQSBZ|B2B_INFO_RPBZ|B2B_INFO_SCBZ|B2B_INFO_SGB|B2B_INFO_VDB|B2B_FMT_AD; mplp.max_read_len = 500; mplp.ambig_reads = B2B_DROP; + mplp.ref_skip_reads = 0; mplp.indel_win_size = 110; mplp.poly_mqual = 0; mplp.seqQ_offset = 120; @@ -1458,6 +1462,7 @@ int main_mpileup(int argc, char *argv[]) {"seed", required_argument, NULL, 13}, {"ambig-reads", required_argument, NULL, 14}, {"ar", required_argument, NULL, 14}, + {"ref-skip-reads", no_argument, NULL, 29}, {"write-index",optional_argument,NULL,'W'}, {"del-bias", required_argument, NULL, 23}, {"poly-mqual", no_argument, NULL, 24}, @@ -1620,6 +1625,9 @@ int main_mpileup(int argc, char *argv[]) if (mplp.seqQ_offset > 200) mplp.seqQ_offset = 200; break; + case 29: + mplp.ref_skip_reads = 1; + break; case 23: mplp.del_bias = atof(optarg); break; case 24: mplp.poly_mqual = 1; break; case 26: mplp.poly_mqual = 0; break;