-
Notifications
You must be signed in to change notification settings - Fork 51
/
utils.h
101 lines (96 loc) · 3.51 KB
/
utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdbool.h>
#include <zlib.h>
#include <unistd.h>
#include "stdaln.h"
#define MAX_ID_LEN (256)
#define MAX_FN_LEN (512)
#define MAX_SEQ_LEN (256)
//60+33 = 93 = '[' (was 83='S')
#define MAX_QUAL (93)
#define MIN_QUAL (33)
extern char maximum_quality;
#define CODE_AMBIGUOUS (-2)
#define CODE_NOMATCH (-1)
#define CODE_NOADAPT (9999)
/* Type to hold the forward and reverse read
of a sequence pair with quality scores */
typedef struct sqp {
char fid[MAX_ID_LEN+1];
char fseq[MAX_SEQ_LEN+1];
char fqual[MAX_SEQ_LEN+1];
size_t flen;
char rid[MAX_ID_LEN+1];
char rseq[MAX_SEQ_LEN+1];
char rqual[MAX_SEQ_LEN+1];
char rc_rseq[MAX_SEQ_LEN+1];
char rc_rqual[MAX_SEQ_LEN+1];
char merged_seq[MAX_SEQ_LEN+MAX_SEQ_LEN+1];
char merged_qual[MAX_SEQ_LEN+MAX_SEQ_LEN+1];
size_t merged_len;
size_t rlen;
size_t mpos;
} Sqp;
typedef struct sqp* SQP;
SQP SQP_init();
void SQP_destroy(SQP sqp);
void adapter_merge(SQP sqp, bool print_overhang);
void fill_merged_sequence(SQP sqp, AlnAln *aln, bool include_overhang);
void pretty_print_alignment(gzFile out, SQP sqp, char adj_q_cut, bool sort);
void pretty_print_alignment_stdaln(gzFile out, SQP sqp, AlnAln *aln, bool first_adapter, bool second_adapter, bool print_merged);
extern char mismatch_p33_merge(char pA, char pB);
extern char gap_p33_qual(char q);
extern char match_p33_merge(char pA, char pB);
void make_blunt_ends(SQP sqp, AlnAln *aln);
bool read_olap_adapter_trim(SQP sqp, size_t min_ol_adapter,
unsigned short min_match_adapter[MAX_SEQ_LEN+1],
unsigned short max_mismatch_adapter[MAX_SEQ_LEN+1],
unsigned short min_match_reads[MAX_SEQ_LEN+1],
unsigned short max_mismatch_reads[MAX_SEQ_LEN+1],
char qcut,
bool use_mask);
bool read_merge(SQP sqp, size_t min_olap,
unsigned short min_match[MAX_SEQ_LEN+1],
unsigned short max_mismatch[MAX_SEQ_LEN+1],
char adj_q_cut);
extern bool next_fastqs( gzFile ffq, gzFile rfq, SQP curr_sqp, bool p64 );
extern int write_fastq(gzFile out, char id[], char seq[], char qual[]);
extern bool f_r_id_check( char fid[], size_t fid_len, char rid[], size_t rid_len );
int read_fastq( gzFile fastq, char id[], char seq[], char qual[],
size_t *id_len, size_t *seq_len, bool p64 );
gzFile fileOpen(const char *name, char access_mode[]);
int compute_ol(
char subjectSeq[], char subjectQual[], size_t subjectLen,
char querySeq[], char queryQual[], size_t queryLen,
size_t min_olap,
unsigned short min_match[MAX_SEQ_LEN+1],
unsigned short max_mismatch[MAX_SEQ_LEN+1],
bool check_unique, char adj_q_cut );
bool k_match( const char* s1, const char* q1, size_t len1,
const char* s2, const char* q2, size_t len2,
unsigned short min_match,
unsigned short max_mismatch, char adj_q_cut);
void revcom_seq( char seq[], int len);
extern char revcom_char(const char base);
extern void rev_qual( char q[], int len );
bool adapter_trim(SQP sqp, size_t min_ol_adapter,
char *forward_primer, char *forward_primer_dummy_qual,
int forward_primer_len,
char *reverse_primer, char *reverse_primer_dummy_qual,
int reverse_primer_len,
unsigned short min_match_adapter[MAX_SEQ_LEN+1],
unsigned short max_mismatch_adapter[MAX_SEQ_LEN+1],
unsigned short min_match_reads[MAX_SEQ_LEN+1],
unsigned short max_mismatch_reads[MAX_SEQ_LEN+1],
char adj_q_cut,
bool use_mask);
#ifndef max
#define max( a, b ) ( ((a) > (b)) ? (a) : (b) )
#endif
#ifndef min
#define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
#endif