Skip to content

Commit

Permalink
Merge pull request #143 from dynatrace-oss/ull-paper
Browse files Browse the repository at this point in the history
Ull paper
  • Loading branch information
oertl authored Aug 30, 2023
2 parents c943f2c + 072c323 commit 08f5f4f
Show file tree
Hide file tree
Showing 59 changed files with 2,847 additions and 2,788 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,9 @@ public UltraLogLog add(long hashValue, StateChangeObserver stateChangeObserver)
int idx = (int) (hashValue >>> q);
int nlz = Long.numberOfLeadingZeros(~(~hashValue << (-q))); // nlz in {0, 1, ..., 64-p}
byte oldState = state[idx];
long hashPrefix = registerToHashPrefix(oldState);
long hashPrefix = unpack(oldState);
hashPrefix |= 1L << (nlz + (~q)); // (nlz + (~q)) = (nlz + p - 1) in {p-1, ... 63}
byte newState = hashPrefixToRegister(hashPrefix);
byte newState = pack(hashPrefix);
state[idx] = newState;
if (stateChangeObserver != null && newState != oldState) {
int p = 64 - q;
Expand Down Expand Up @@ -359,7 +359,7 @@ public UltraLogLog add(UltraLogLog other) {
final int deltaP = otherP - p;
int j = 0;
for (int i = 0; i < state.length; ++i) {
long hashPrefix = registerToHashPrefix(state[i]) | registerToHashPrefix(otherData[j]);
long hashPrefix = unpack(state[i]) | unpack(otherData[j]);
j += 1;
for (long k = 1; k < 1L << deltaP; ++k) {
if (otherData[j] != 0) {
Expand All @@ -368,19 +368,19 @@ public UltraLogLog add(UltraLogLog other) {
j += 1;
}
if (hashPrefix != 0) {
state[i] = hashPrefixToRegister(hashPrefix);
state[i] = pack(hashPrefix);
}
}
return this;
}

// visible for testing
static long registerToHashPrefix(byte register) {
static long unpack(byte register) {
return (4L | (register & 3)) << ((register >>> 2) - 2);
}

// visible for testing
static byte hashPrefixToRegister(long hashPrefix) {
static byte pack(long hashPrefix) {
int nlz = Long.numberOfLeadingZeros(hashPrefix) + 1;
return (byte) (((-nlz) << 2) | ((hashPrefix << nlz) >>> 62));
}
Expand All @@ -407,7 +407,8 @@ public double getDistinctCountEstimate(Estimator estimator) {
return estimator.estimate(this);
}

private static double getRegisterChangeProbability(byte reg, int p) {
// visible for testing
static double getRegisterChangeProbability(byte reg, int p) {
final int off = (p + 1) << 2;
int r = (reg & 0xFF);
int t = r - off;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void doSimulation(
if (trueDistinctCount.compareTo(targetDistinctCount) < 0) {
while (transitionIndex < transitions.length
&& transitions[transitionIndex].distinctCount.compareTo(targetDistinctCount)
< 0) {
<= 0) {
sketch.add(transitions[transitionIndex].hash, martingaleEstimator);
transitionIndex += 1;
}
Expand Down
146 changes: 102 additions & 44 deletions src/test/java/com/dynatrace/hash4j/distinctcount/UltraLogLogTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,47 +77,43 @@ void testRelativeStandardErrorOfOptimalFGRAEstimatorAgainstConstants() {
}

@Test
void testPrefixConversion() {
assertThat(UltraLogLog.hashPrefixToRegister(0x4L)).isEqualTo((byte) 8);
assertThat(UltraLogLog.hashPrefixToRegister(0x5L)).isEqualTo((byte) 9);
assertThat(UltraLogLog.hashPrefixToRegister(0x6L)).isEqualTo((byte) 10);
assertThat(UltraLogLog.hashPrefixToRegister(0x7L)).isEqualTo((byte) 11);
assertThat(UltraLogLog.hashPrefixToRegister(0x8L)).isEqualTo((byte) 12);
assertThat(UltraLogLog.hashPrefixToRegister(0x9L)).isEqualTo((byte) 12);
assertThat(UltraLogLog.hashPrefixToRegister(0xAL)).isEqualTo((byte) 13);
assertThat(UltraLogLog.hashPrefixToRegister(0xBL)).isEqualTo((byte) 13);
assertThat(UltraLogLog.hashPrefixToRegister(12)).isEqualTo((byte) 14);
assertThat(UltraLogLog.hashPrefixToRegister(1L << (12 - 1))).isEqualTo((byte) 44);
assertThat(UltraLogLog.hashPrefixToRegister(1L << 12)).isEqualTo((byte) 48);
assertThat(UltraLogLog.hashPrefixToRegister((1L << (12 - 1)) | (1L << (12))))
.isEqualTo((byte) 50);
assertThat(UltraLogLog.hashPrefixToRegister(1L << (12 + 1))).isEqualTo((byte) 52);
assertThat(UltraLogLog.hashPrefixToRegister(0x8000000000000000L)).isEqualTo((byte) 252);
assertThat(UltraLogLog.hashPrefixToRegister(0xFFFFFFFFFFFFFFFFL)).isEqualTo((byte) 255);

assertThat(UltraLogLog.registerToHashPrefix((byte) 0)).isZero();
assertThat(UltraLogLog.registerToHashPrefix((byte) 4)).isZero();
assertThat(UltraLogLog.registerToHashPrefix((byte) 8)).isEqualTo(4);
assertThat(UltraLogLog.registerToHashPrefix((byte) 9)).isEqualTo(5);
assertThat(UltraLogLog.registerToHashPrefix((byte) 10)).isEqualTo(6);
assertThat(UltraLogLog.registerToHashPrefix((byte) 11)).isEqualTo(7);
assertThat(UltraLogLog.registerToHashPrefix((byte) 12)).isEqualTo(8);
assertThat(UltraLogLog.registerToHashPrefix((byte) 13)).isEqualTo(10);
assertThat(UltraLogLog.registerToHashPrefix((byte) 14)).isEqualTo(12);
assertThat(UltraLogLog.registerToHashPrefix((byte) 44)).isEqualTo(1L << (12 - 1));
assertThat(UltraLogLog.registerToHashPrefix((byte) 45))
.isEqualTo((1L << (12 - 1)) + (1L << (12 - 3)));
assertThat(UltraLogLog.registerToHashPrefix((byte) 46))
.isEqualTo((1L << (12 - 1)) + (1L << (12 - 2)));
assertThat(UltraLogLog.registerToHashPrefix((byte) 47))
void testRegisterPacking() {
assertThat(UltraLogLog.pack(0x4L)).isEqualTo((byte) 8);
assertThat(UltraLogLog.pack(0x5L)).isEqualTo((byte) 9);
assertThat(UltraLogLog.pack(0x6L)).isEqualTo((byte) 10);
assertThat(UltraLogLog.pack(0x7L)).isEqualTo((byte) 11);
assertThat(UltraLogLog.pack(0x8L)).isEqualTo((byte) 12);
assertThat(UltraLogLog.pack(0x9L)).isEqualTo((byte) 12);
assertThat(UltraLogLog.pack(0xAL)).isEqualTo((byte) 13);
assertThat(UltraLogLog.pack(0xBL)).isEqualTo((byte) 13);
assertThat(UltraLogLog.pack(12)).isEqualTo((byte) 14);
assertThat(UltraLogLog.pack(1L << (12 - 1))).isEqualTo((byte) 44);
assertThat(UltraLogLog.pack(1L << 12)).isEqualTo((byte) 48);
assertThat(UltraLogLog.pack((1L << (12 - 1)) | (1L << (12)))).isEqualTo((byte) 50);
assertThat(UltraLogLog.pack(1L << (12 + 1))).isEqualTo((byte) 52);
assertThat(UltraLogLog.pack(0x8000000000000000L)).isEqualTo((byte) 252);
assertThat(UltraLogLog.pack(0xFFFFFFFFFFFFFFFFL)).isEqualTo((byte) 255);

assertThat(UltraLogLog.unpack((byte) 0)).isZero();
assertThat(UltraLogLog.unpack((byte) 4)).isZero();
assertThat(UltraLogLog.unpack((byte) 8)).isEqualTo(4);
assertThat(UltraLogLog.unpack((byte) 9)).isEqualTo(5);
assertThat(UltraLogLog.unpack((byte) 10)).isEqualTo(6);
assertThat(UltraLogLog.unpack((byte) 11)).isEqualTo(7);
assertThat(UltraLogLog.unpack((byte) 12)).isEqualTo(8);
assertThat(UltraLogLog.unpack((byte) 13)).isEqualTo(10);
assertThat(UltraLogLog.unpack((byte) 14)).isEqualTo(12);
assertThat(UltraLogLog.unpack((byte) 44)).isEqualTo(1L << (12 - 1));
assertThat(UltraLogLog.unpack((byte) 45)).isEqualTo((1L << (12 - 1)) + (1L << (12 - 3)));
assertThat(UltraLogLog.unpack((byte) 46)).isEqualTo((1L << (12 - 1)) + (1L << (12 - 2)));
assertThat(UltraLogLog.unpack((byte) 47))
.isEqualTo((1L << (12 - 1)) + (1L << (12 - 2)) + (1L << (12 - 3)));
assertThat(UltraLogLog.registerToHashPrefix((byte) 255)).isEqualTo(0xE000000000000000L);
assertThat(UltraLogLog.unpack((byte) 255)).isEqualTo(0xE000000000000000L);

int smallestRegisterValue = (MIN_P << 2) - 4;
for (int i = smallestRegisterValue; i < 256; i += 1) {
byte b = (byte) i;
assertThat(UltraLogLog.hashPrefixToRegister(UltraLogLog.registerToHashPrefix(b)))
.isEqualTo(b);
assertThat(UltraLogLog.pack(UltraLogLog.unpack(b))).isEqualTo(b);
}
}

Expand All @@ -132,13 +128,13 @@ void testSmallestRegisterValues() {
long hashPrefix6 = 6L << (p - 1);
long hashPrefix7 = 7L << (p - 1);

byte register1 = UltraLogLog.hashPrefixToRegister(hashPrefix1);
byte register2 = UltraLogLog.hashPrefixToRegister(hashPrefix2);
byte register3 = UltraLogLog.hashPrefixToRegister(hashPrefix3);
byte register4 = UltraLogLog.hashPrefixToRegister(hashPrefix4);
byte register5 = UltraLogLog.hashPrefixToRegister(hashPrefix5);
byte register6 = UltraLogLog.hashPrefixToRegister(hashPrefix6);
byte register7 = UltraLogLog.hashPrefixToRegister(hashPrefix7);
byte register1 = UltraLogLog.pack(hashPrefix1);
byte register2 = UltraLogLog.pack(hashPrefix2);
byte register3 = UltraLogLog.pack(hashPrefix3);
byte register4 = UltraLogLog.pack(hashPrefix4);
byte register5 = UltraLogLog.pack(hashPrefix5);
byte register6 = UltraLogLog.pack(hashPrefix6);
byte register7 = UltraLogLog.pack(hashPrefix7);

assertThat(register1).isEqualTo((byte) ((p << 2) - 4));
assertThat(register2).isEqualTo((byte) (p << 2));
Expand All @@ -150,7 +146,7 @@ void testSmallestRegisterValues() {
}

long hashPrefixLargest = 0xFFFFFFFFFFFFFFFFL;
byte registerLargest = UltraLogLog.hashPrefixToRegister(hashPrefixLargest);
byte registerLargest = UltraLogLog.pack(hashPrefixLargest);
assertThat(registerLargest).isEqualTo((byte) 255);
}

Expand Down Expand Up @@ -820,4 +816,66 @@ void testOptimalFGRAEstimatorEstimationFactors() {
.mapToDouble(p -> calculateEstimationFactor(p))
.toArray());
}

// this function maps a register value from the corresponding value as defined in the paper
private static byte mapRegisterFromReferenceDefinition(byte r, int p) {
if (r == 0) return 0;
return (byte) (r + 4 * (p - 2));
}

// this function maps a register value to the corresponding value as defined in the paper
private static byte mapRegisterToReferenceDefinition(byte r, int p) {
if (r == 0) return 0;
return (byte) (r - 4 * (p - 2));
}

@Test
void testRegisterStateChangeProbability() {
for (int p = MIN_P; p <= MAX_P; ++p) {
int m = 1 << p;
assertThat(getRegisterChangeProbability(mapRegisterFromReferenceDefinition((byte) 0, p), p))
.isEqualTo(1. / m);
assertThat(getRegisterChangeProbability(mapRegisterFromReferenceDefinition((byte) 4, p), p))
.isEqualTo(1. / (2. * m));
assertThat(getRegisterChangeProbability(mapRegisterFromReferenceDefinition((byte) 8, p), p))
.isEqualTo(3. / (4. * m));
assertThat(getRegisterChangeProbability(mapRegisterFromReferenceDefinition((byte) 10, p), p))
.isEqualTo(1. / (4. * m));
int w = 65 - p;
for (int u = 3; u < w; ++u) {
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * u + 0), p), p))
.isEqualTo(7. / (Math.pow(2., u) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * u + 1), p), p))
.isEqualTo(3. / (Math.pow(2., u) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * u + 2), p), p))
.isEqualTo(5. / (Math.pow(2., u) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * u + 3), p), p))
.isEqualTo(1. / (Math.pow(2., u) * m));
}
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * w + 0), p), p))
.isEqualTo(3. / (Math.pow(2., w - 1) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * w + 1), p), p))
.isEqualTo(1. / (Math.pow(2., w - 1) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * w + 2), p), p))
.isEqualTo(2. / (Math.pow(2., w - 1) * m));
assertThat(
getRegisterChangeProbability(
mapRegisterFromReferenceDefinition((byte) (4 * w + 3), p), p))
.isZero();
}
}
}
Loading

0 comments on commit 08f5f4f

Please sign in to comment.