Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the mpileup BAM_CREF_SKIP filter optional. #2282

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bam2bcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ plp_cd_t;


typedef struct __bcf_callaux_t {
int fmt_flag, ambig_reads;
int fmt_flag, ambig_reads, ref_skip_reads;
int capQ, min_baseQ, max_baseQ, delta_baseQ;
int openQ, extQ, tandemQ; // for indels
uint32_t min_support, max_support; // for collecting indel candidates
Expand Down
17 changes: 10 additions & 7 deletions bam2bcf_edlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -1559,18 +1559,21 @@ int bcf_edlib_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
}
}

int qbeg, qpos, qend, tbeg, tend, kk;
int qbeg, qpos, qend, tbeg, tend;
uint8_t *seq = bam_get_seq(p->b);
uint32_t *cigar = bam_get_cigar(p->b);
if (p->b->core.flag & BAM_FUNMAP) continue;

// FIXME: the following loop should be better moved outside;
// nonetheless, realignment should be much slower anyway.
for (kk = 0; kk < p->b->core.n_cigar; ++kk)
if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
break;
if (kk < p->b->core.n_cigar)
continue;
if (!bca->ref_skip_reads) {
uint32_t *cigar = bam_get_cigar(p->b);
int kk;
for (kk = 0; kk < p->b->core.n_cigar; ++kk)
if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
break;
if (kk < p->b->core.n_cigar)
continue;
}

// determine the start and end of sequences for alignment
int left2 = left, right2 = right;
Expand Down
17 changes: 10 additions & 7 deletions bam2bcf_indel.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,18 +845,21 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos,
}
}

int qbeg, qpos, qend, tbeg, tend, kk;
int qbeg, qpos, qend, tbeg, tend;
uint8_t *seq = bam_get_seq(p->b);
uint32_t *cigar = bam_get_cigar(p->b);
if (p->b->core.flag & BAM_FUNMAP) continue;

// FIXME: the following loop should be better moved outside;
// nonetheless, realignment should be much slower anyway.
for (kk = 0; kk < p->b->core.n_cigar; ++kk)
if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
break;
if (kk < p->b->core.n_cigar)
continue;
if (!bca->ref_skip_reads) {
uint32_t *cigar = bam_get_cigar(p->b);
int kk;
for (kk = 0; kk < p->b->core.n_cigar; ++kk)
if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP)
break;
if (kk < p->b->core.n_cigar)
continue;
}

// determine the start and end of sequences for alignment
// FIXME: loops over CIGAR multiple times
Expand Down
8 changes: 8 additions & 0 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2503,6 +2503,14 @@ INFO/DPR .. Deprecated in favor of INFO/AD; Number of high-quality bases for
exclude from calling and increment the first value of the AD counter
('incAD0') ['drop']

*--ref-skip-reads*::
Include reads with CIGAR 'N' (ref-skip) operators in their alignment.
By default these are filtered out. Enabling this option may be
necessary for calling on some RNASeq pipelines. This works with
the default caller and --indels-cns. The experimental -indels-2.0
option does not filter out alignments with ref-skips so this option
is unnecessary there.

*-e, --ext-prob* 'INT'::
Phred-scaled gap extension sequencing error probability. Reducing 'INT'
leads to longer indels [20]
Expand Down
10 changes: 9 additions & 1 deletion mpileup.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ typedef struct _mplp_pileup_t mplp_pileup_t;
// Data shared by all bam files
typedef struct {
int min_mq, flag, min_baseQ, max_baseQ, delta_baseQ, capQ_thres, max_depth,
max_indel_depth, max_read_len, ambig_reads;
max_indel_depth, max_read_len, ambig_reads, ref_skip_reads;
uint32_t fmt_flag;
int rflag_skip_any_unset, rflag_skip_all_unset, rflag_skip_any_set, rflag_skip_all_set, output_type;
int openQ, extQ, tandemQ, min_support, indel_win_size; // for indels
Expand Down Expand Up @@ -882,6 +882,7 @@ static int mpileup(mplp_conf_t *conf)
conf->bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
conf->bca->fmt_flag = conf->fmt_flag;
conf->bca->ambig_reads = conf->ambig_reads;
conf->bca->ref_skip_reads = conf->ref_skip_reads;
conf->bca->indel_win_size = conf->indel_win_size;
conf->bca->indels_v20 = conf->indels_v20;
conf->bca->edlib = conf->edlib;
Expand Down Expand Up @@ -1291,6 +1292,8 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -p, --per-sample-mF Apply -m and -F per-sample for increased sensitivity\n"
" -P, --platforms STR Comma separated list of platforms for indels [all]\n"
" --ar, --ambig-reads STR What to do with ambiguous indel reads: drop,incAD,incAD0 [drop]\n");
fprintf(fp,
" --ref-skip-reads Use reads with N CIGAR op [discard by default]\n");
fprintf(fp,
" --indel-bias FLOAT Raise to favour recall over precision [%.2f]\n", mplp->indel_bias);
fprintf(fp,
Expand Down Expand Up @@ -1381,6 +1384,7 @@ int main_mpileup(int argc, char *argv[])
mplp.fmt_flag = B2B_INFO_BQBZ|B2B_INFO_IDV|B2B_INFO_IMF|B2B_INFO_MQ0F|B2B_INFO_MQBZ|B2B_INFO_MQSBZ|B2B_INFO_RPBZ|B2B_INFO_SCBZ|B2B_INFO_SGB|B2B_INFO_VDB|B2B_FMT_AD;
mplp.max_read_len = 500;
mplp.ambig_reads = B2B_DROP;
mplp.ref_skip_reads = 0;
mplp.indel_win_size = 110;
mplp.poly_mqual = 0;
mplp.seqQ_offset = 120;
Expand Down Expand Up @@ -1458,6 +1462,7 @@ int main_mpileup(int argc, char *argv[])
{"seed", required_argument, NULL, 13},
{"ambig-reads", required_argument, NULL, 14},
{"ar", required_argument, NULL, 14},
{"ref-skip-reads", no_argument, NULL, 29},
{"write-index",optional_argument,NULL,'W'},
{"del-bias", required_argument, NULL, 23},
{"poly-mqual", no_argument, NULL, 24},
Expand Down Expand Up @@ -1620,6 +1625,9 @@ int main_mpileup(int argc, char *argv[])
if (mplp.seqQ_offset > 200)
mplp.seqQ_offset = 200;
break;
case 29:
mplp.ref_skip_reads = 1;
break;
case 23: mplp.del_bias = atof(optarg); break;
case 24: mplp.poly_mqual = 1; break;
case 26: mplp.poly_mqual = 0; break;
Expand Down