-
Notifications
You must be signed in to change notification settings - Fork 5
/
clustering.cpp
108 lines (98 loc) · 2.88 KB
/
clustering.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
//
// Created by eric on 19-3-30.
//
#include <algorithm>
#include <map>
#include "clustering.h"
#define getv(k) (mixed_paths[k].papi[I_PAPI_TOT_INS])
struct CenterType
{
int l, r;
};
vector<vector<DataType>> calc_classify_fake(vector<DataType> mixed_paths, const double alpha)
{
vector<vector<DataType>> ret = {mixed_paths};
return ret;
}
/**
*
* @param mixed_paths
* @param alpha variance radius
* @return
*/
vector<vector<DataType>> calc_classify(vector<DataType> mixed_paths, const double alpha)
{
vector<CenterType> centers;
vector<vector<DataType>> ret;
sort(mixed_paths.begin(), mixed_paths.end(), [](const DataType &l,const DataType &r) {
return l.papi[I_PAPI_TOT_INS] < r.papi[I_PAPI_TOT_INS];
});
int l = 0, r = 0;
for (int m = 0; m < mixed_paths.size(); ++m)
{
while (r < mixed_paths.size() && getv(r) < getv(m) * (1 + alpha)) ++r;
while (l< m && getv(l) < getv(m)* (1 - alpha)) ++l;
if (centers.empty()||(centers.back().r<l))
{
centers.push_back(CenterType{l, r});
} else if (centers.back().r - centers.back().l < r - l)
{
centers.pop_back();
centers.push_back(CenterType{l, r});
}
}
for (const auto &v:centers)
{
vector<DataType> group;
for (auto i = v.l; i < v.r; ++i)
group.push_back(mixed_paths[i]);
ret.emplace_back(vector<DataType>(group));
}
// push data between clusters
if (!centers.empty())
{
vector<DataType> group;
for (auto j = 0; j < centers[0].l; ++j)
group.push_back(mixed_paths[j]);
ret.emplace_back(vector<DataType>(group));
}
for (int i = 0; i < centers.size()-1; ++i)
{
vector<DataType> group;
for (auto j = centers[i].r; j < centers[i + 1].l; ++j)
group.push_back(mixed_paths[j]);
ret.emplace_back(vector<DataType>(group));
}
return ret;
}
unsigned long long hash_sequence(int n, void *array[])
{
unsigned long long result = 0;
for (int i = 0; i < n; ++i)
{
auto h = (long long) array[i]; // the "identity hash"
result ^= h + 0x9E3779B97F4A7C15 + (result << 6u) + (result >> 2u);
}
return (unsigned long long)result;
}
vector<vector<DataType>> comm_classify_fake(vector<DataType> &mixed_paths)
{
return vector<vector<DataType>>{mixed_paths};
}
vector<vector<DataType>> comm_classify(vector<DataType> &mixed_paths)
{
map<Comm_key, vector<DataType>> classified_paths;
for (const auto &data:mixed_paths)
{
const Comm_key key = data.to_comm_key();
if (classified_paths.count(key) == 0)
{
classified_paths[key] = vector<DataType>{};
}
classified_paths[key].push_back(data);
}
vector<vector<DataType>> ret;
for (const auto &kv:classified_paths)
ret.push_back(kv.second);
return ret;
}