-
Notifications
You must be signed in to change notification settings - Fork 2
/
ONElib.hpp
135 lines (110 loc) · 4.89 KB
/
ONElib.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/******************************************************************************************
*
* File: ONElib.hpp
* C++ header code for ONE file reading and writing
*
* Authors: Richard Durbin ([email protected]), Gene Myers ([email protected])
* Copyright (C) Richard Durbin, Gene Myers, 2019-
*
*****************************************************************************************/
/* Last edited: Jun 9 11:07 2023 (rd109) */
#ifndef ONEfile_h
#define ONEfile_h
#include <string>
#include <stdexcept>
using namespace std ;
namespace C_1F {
extern "C" {
#include "ONElib.h"
}
}
class ONEschema
{
private:
C_1F::OneSchema *os ;
public:
ONEschema (const string &text) { os = C_1F::oneSchemaCreateFromText (text.c_str()) ; }
~ONEschema () { C_1F::oneSchemaDestroy (os) ; }
friend class ONEfile ;
} ;
class ONEfile
{
private:
C_1F::OneFile *vf ;
public:
ONEfile (const string &path, const string &mode, const ONEschema &schema, const string &type, int nthreads)
{ vf = NULL ;
const char* tc = (type.size() > 0) ? type.c_str() : 0 ;
if (mode[0] == 'r')
vf = C_1F::oneFileOpenRead (path.c_str(), schema.os, tc, nthreads) ;
else if (mode[0] == 'w' && mode[1] == 'b')
vf = C_1F::oneFileOpenWriteNew (path.c_str(), schema.os, tc, true, nthreads) ;
else if (mode[0] == 'w' && mode.size() == 1)
vf = C_1F::oneFileOpenWriteNew (path.c_str(), schema.os, tc, false, nthreads) ;
if (vf == NULL) { throw runtime_error("failed to open ONEfile") ; }
}
ONEfile (const string &path, const string &mode, ONEfile &from, int nthreads)
{ vf = NULL ;
if (mode[0] == 'w' && mode[1] == 'b')
vf = C_1F::oneFileOpenWriteFrom (path.c_str(), from.vf, true, nthreads) ;
else if (mode[0] == 'w' && mode.size() == 1)
vf = C_1F::oneFileOpenWriteFrom (path.c_str(), from.vf, false, nthreads) ;
if (vf == NULL) { throw runtime_error("failed to open ONEfile") ; }
}
~ONEfile () { C_1F::oneFileClose (vf) ; }
bool checkSchemaText (const string &text)
{ return C_1F::oneFileCheckSchemaText (vf, text.c_str()) ; }
char readLine() { return C_1F::oneReadLine (vf) ; }
int64_t length()
{ return ((vf)->field[((vf)->info[(int)(vf)->lineType]->listField)].len & 0xffffffffffffffll) ; }
int64_t getInt(int x) { return vf->field[x].i ; }
void setInt(int x, int64_t val) { vf->field[x].i = val ; }
double getReal(int x) { return vf->field[x].r ; }
void setReal(int x, double val) { vf->field[x].r = val ; }
char getChar(int x) { return vf->field[x].c ; }
void setChar(int x, char val) { vf->field[x].c = val ; }
char* setDNAchar() { return (char*) _oneList(vf) ; }
int64_t* getIntList() { return (int64_t*) C_1F::_oneList(vf) ; }
double* getRealList() { return (double*) C_1F::_oneList(vf) ; }
uint8_t* getDNA2bit () { return (uint8_t*) C_1F::_oneCompressedList(vf) ; }
string getString() { return (char *) C_1F::_oneList(vf) ; }
char* nextString(char* s) { return s + strlen(s) + 1 ; }
char* getComment() { return C_1F::oneReadComment(vf) ; }
bool gotoObject (int64_t i) { return C_1F::oneGotoObject (vf, i) ; }
int64_t gotoGroup (int64_t i) { return C_1F::oneGotoGroup (vf, i) ; }
// some extra functions to hide readable class attributes
char lineType() { return vf->lineType ; }
int64_t lineNumber() { return vf->line ; }
int64_t object() { return vf->object ; }
int64_t group() { return vf->group ; }
int64_t count(char lineType) { return vf->info[lineType]->given.count ; }
int64_t max(char lineType) { return vf->info[lineType]->given.max ; }
int64_t total(char lineType) { return vf->info[lineType]->given.total ; }
int64_t groupCount(char lineType) { return vf->info[lineType]->given.groupCount ; }
int64_t groupTotal(char lineType) { return vf->info[lineType]->given.groupTotal ; }
} ;
/******************************** simple testing code ********************************/
#ifdef TEST_HEADER
// to use this link this file to a filename ending .cpp and compile with -D TEST_HEADER e.g.
// ln -s ONElib.hpp ONEtest.cpp
// gcc -c ONElib.c
// g++ -D TEST_HEADER -o ONEtest ONEtest.cpp ONElib.o
#include <iostream>
static string schemaText =
"P 3 seq SEQUENCE\n"
"S 6 segseq segment sequences - objects are 1:1 with those in seg file\n"
"S 7 readseq read sequences\n"
"O S 1 3 DNA sequence: the DNA string\n"
"D I 1 6 STRING id - sequence identifier; unnecessary for segments\n" ;
int main (int argc, char *argv[])
{
if (argc < 2) { cerr << "need a filename as argument\n" ; exit (1) ; }
ONEschema os(schemaText) ;
ONEfile of(argv[1], "r", os, "", 1) ;
cout << "opened 1seq file " << string(argv[1]) << " with " << of.count('S') << " sequences\n" ;
while (of.readLine())
if (of.lineType() == 'S')
cout << "sequence length " << of.length() << "\n" ;
}
#endif
#endif