-
Notifications
You must be signed in to change notification settings - Fork 2
/
eseqclustersingle.cpp
114 lines (91 loc) · 3.49 KB
/
eseqclustersingle.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include "eseqclustersingle.h"
#include "cluster-common.h"
#include <eutils/efile.h>
eseqclustersingle::eseqclustersingle(){}
void eseqclustersingle::init(INDTYPE count,const estr& ofilename,const estr& seqsfile,const earray<ebasicarray<INDTYPE> >& dupslist) {
ofile.open(ofilename,"w");
ofile.write("# seqsfile: "+seqsfile+"\n");
ofile.write("# OTU_count Merge_distance Merged_OTU_id1 Merged_OTU_id2\n");
long i,j;
mergecount=0;
scount.reserve(count);
scluster.reserve(count);
smerge.reserve(count);
incluster.reserve(count);
for (i=0; i<count; ++i){
scount.add(1);
scluster.add(i);
smerge.add(-1);
incluster.add(list<INDTYPE>());
incluster[i].push_back(i);
}
for (i=0; i<dupslist.size(); ++i){
for (j=1; j<dupslist[i].size(); ++j){
++mergecount;
ofile.write(estr(scluster.size()-mergecount)+" 1.0 "+dupslist[i][0]+" "+dupslist[i][j]+"\n");
clusterData.mergearr.add(eseqdist(dupslist[i][0],dupslist[i][j],1.0));
}
}
cout << "# initializing cluster with: "<< count<< " seqs" << endl;
}
void eseqclustersingle::merge(INDTYPE x,INDTYPE y,float dist)
{
if (x==y) return;
ldieif(scount[x]==0 || scount[y]==0,"also should not happen");
clusterData.mergearr.add(eseqdist(x,y,dist));
smerge[x]=x;
smerge[y]=x;
scount[x]+=scount[y];
scount[y]=0;
list<INDTYPE>::iterator it;
for (it=incluster[y].begin(); it!=incluster[y].end(); ++it){
scluster[*it]=x;
incluster[x].push_back(*it);
}
++mergecount;
// cout << scluster.size()-mergecount << " " << dist << " " << x << " " << y << endl;
ofile.write(estr(scluster.size()-mergecount)+" "+dist+" "+x+" "+y+"\n");
}
void eseqclustersingle::add(const eseqdist& sdist){
// if (sdist.count==0) return;
ldieif(sdist.x<0 || sdist.y<0 || sdist.x>=scluster.size() || sdist.y>=scluster.size(),"out of bounds: sdist.x: "+estr(sdist.x)+" sdist.y: "+estr(sdist.y)+" scluster.size(): "+estr(scluster.size()));
INDTYPE x=scluster[sdist.x];
INDTYPE y=scluster[sdist.y];
ldieif(x<0 || y<0 || x>=scluster.size() || y>=scluster.size(),"out of bounds: sdist.x: "+estr(x)+" sdist.y: "+estr(y)+" scluster.size(): "+estr(scluster.size()));
INDTYPE tmp;
if (x>y) { tmp=x; x=y; y=tmp; }
merge(x,y,sdist.dist);
}
/*
void eseqclustersingle::add(int ind){
// if (dists[ind].count==0) return;
ldieif(dists[ind].x<0 || dists[ind].y<0 || dists[ind].x>=scluster.size() || dists[ind].y>=scluster.size(),"out of bounds: dists[ind].x: "+estr(dists[ind].x)+" dists[ind].y: "+estr(dists[ind].y)+" scluster.size(): "+estr(scluster.size()));
int x=scluster[dists[ind].x];
int y=scluster[dists[ind].y];
ldieif(x<0 || y<0 || x>=scluster.size() || y>=scluster.size(),"out of bounds: dists[ind].x: "+estr(x)+" dists[ind].y: "+estr(y)+" scluster.size(): "+estr(scluster.size()));
int tmp;
if (x>y) { tmp=x; x=y; y=tmp; tmp=dists[ind].x; dists[ind].x=dists[ind].y; dists[ind].y=tmp; }
merge(x,y,dists[ind].dist);
}
*/
void eseqclustersingle::save(const estr& filename,const estrarray& arr)
{
long i;
estr otustr;
estrhashof<ebasicarray<INDTYPE> > otus;
efile f(filename+".clstr");
for (i=0; i<scluster.size(); ++i)
f.write(estr(scluster[i])+" "+arr.keys(i)+"\n");
f.close();
list<INDTYPE>::iterator it;
INDTYPE otucount=0;
efile f2(filename);
for (i=0; i<incluster.size(); ++i){
if (scount[i]==0) continue;
f2.write(">OTU"+estr(otucount)+"\n");
for (it=incluster[i].begin(); it!=incluster[i].end(); ++it)
f2.write(arr.keys(*it)+"\n");
++otucount;
}
f2.close();
}