Skip to content

Commit

Permalink
Merge pull request #124 from dynatrace-oss/polymurhash
Browse files Browse the repository at this point in the history
upgraded reference for PolymurHash from 1.0 to 2.0
  • Loading branch information
oertl authored Jun 23, 2023
2 parents 9ea6cc0 + 42abdf9 commit efe058f
Show file tree
Hide file tree
Showing 11 changed files with 2,124 additions and 13 deletions.
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@
[submodule "reference-implementations/komihash_5_1/komihash"]
path = reference-implementations/komihash_5_1/komihash
url = https://github.com/avaneev/komihash.git
[submodule "reference-implementations/polymur-hash_1_0/polymur-hash"]
path = reference-implementations/polymur-hash_1_0/polymur-hash
[submodule "reference-implementations/polymur-hash_2_0/polymur-hash"]
path = reference-implementations/polymur-hash_2_0/polymur-hash
url = https://github.com/orlp/polymur-hash.git
8 changes: 8 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ spotless {
'src/main/java/com/dynatrace/hash4j/file/Imohash1_0_2.java',\
'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java',\
'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java',\
'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java',\
'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java',\
'src/main/java/com/dynatrace/hash4j/random/SplitMix64V1.java',\
'src/main/java/com/dynatrace/hash4j/random/RandomExponentialUtil.java'
Expand All @@ -115,6 +116,13 @@ spotless {
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('MIT_KOMIHASH')
target 'src/main/java/com/dynatrace/hash4j/hashing/Komihash4_3.java', 'src/main/java/com/dynatrace/hash4j/hashing/Komihash5_0.java', 'src/main/java/com/dynatrace/hash4j/hashing/AbstractKomihash.java'
}
format 'javaPolymurHash', JavaExtension, {
importOrder()
removeUnusedImports()
googleJavaFormat('1.17.0')
licenseHeader readJavaLicense('APACHE_2_0_DYNATRACE') + '\n\n' + readJavaLicense('ZLIB_POLYMURHASH')
target 'src/main/java/com/dynatrace/hash4j/hashing/PolymurHash2_0.java'
}
format 'javaSplitMix64', JavaExtension, {
importOrder()
removeUnusedImports()
Expand Down
22 changes: 22 additions & 0 deletions licenses/ZLIB_POLYMURHASH.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
This file includes a Java port of the PolymurHash algorithm originally published
at https://github.com/orlp/polymur-hash under the following license:

Copyright (c) 2023 Orson Peters

This software is provided 'as-is', without any express or implied warranty. In
no event will the authors be held liable for any damages arising from the use of
this software.

Permission is granted to anyone to use this software for any purpose, including
commercial applications, and to alter it and redistribute it freely, subject to
the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim
that you wrote the original software. If you use this software in a product,
an acknowledgment in the product documentation would be appreciated but is
not required.

2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.

3. This notice may not be removed or altered from any source distribution.
2 changes: 1 addition & 1 deletion reference-implementations/build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
g++ \
calculate_checksums.cpp \
polymur-hash_1_0/polymur-hash_1_0_checksum_config.cpp \
polymur-hash_2_0/polymur-hash_2_0_checksum_config.cpp \
wyhash_final_3/wyhash_final_3_checksum_config.cpp \
wyhash_final_4/wyhash_final_4_checksum_config.cpp \
komihash_4_3/komihash_4_3_checksum_config.cpp \
Expand Down
4 changes: 2 additions & 2 deletions reference-implementations/calculate_checksums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "komihash_4_7/komihash_4_7_checksum_config.hpp"
#include "komihash_5_0/komihash_5_0_checksum_config.hpp"
#include "komihash_5_1/komihash_5_1_checksum_config.hpp"
#include "polymur-hash_1_0/polymur-hash_1_0_checksum_config.hpp"
#include "polymur-hash_2_0/polymur-hash_2_0_checksum_config.hpp"
#include "wyhash_final_3/wyhash_final_3_checksum_config.hpp"
#include "wyhash_final_4/wyhash_final_4_checksum_config.hpp"
#include "murmur3_128/murmur3_128_checksum_config.hpp"
Expand Down Expand Up @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) {
computeAndPrintChecksum<WyhashFinal4ChecksumConfig>();
computeAndPrintChecksum<Murmur3_128_ChecksumConfig>();
computeAndPrintChecksum<Murmur3_32_ChecksumConfig>();
computeAndPrintChecksum<PolymurHash_1_0_ChecksumConfig>();
computeAndPrintChecksum<PolymurHash_2_0_ChecksumConfig>();

return 0;
}
1 change: 0 additions & 1 deletion reference-implementations/polymur-hash_1_0/polymur-hash
Submodule polymur-hash deleted from ffe06d
2,010 changes: 2,010 additions & 0 deletions reference-implementations/polymur-hash_2_0/out.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions reference-implementations/polymur-hash_2_0/polymur-hash
Submodule polymur-hash added at c6cc68
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "polymur-hash_1_0_checksum_config.hpp"
#include "polymur-hash_2_0_checksum_config.hpp"
#include "polymur-hash/polymur-hash.h"
#include <cstring>

void PolymurHash_1_0_ChecksumConfig::calculateHash(const uint8_t *seedBytes,
void PolymurHash_2_0_ChecksumConfig::calculateHash(const uint8_t *seedBytes,
uint8_t *hashBytes, const uint8_t *dataBytes, uint64_t size) const {

uint64_t seed0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef POLYMURHASH_1_0_CHECKSUM_CONFIG_HPP
#define POLYMURHASH_1_0_CHECKSUM_CONFIG_HPP
#ifndef POLYMURHASH_2_0_CHECKSUM_CONFIG_HPP
#define POLYMURHASH_2_0_CHECKSUM_CONFIG_HPP

#include <string>

class PolymurHash_1_0_ChecksumConfig {
class PolymurHash_2_0_ChecksumConfig {

public:

Expand All @@ -31,12 +31,12 @@ class PolymurHash_1_0_ChecksumConfig {
}

std::string getName() const {
return "PolymurHash 1.0";
return "PolymurHash 2.0";
}

void calculateHash(const uint8_t *seedBytes, uint8_t *hashBytes,
const uint8_t *dataBytes, uint64_t size) const;

};

#endif // POLYMURHASH_1_0_CHECKSUM_CONFIG_HPP
#endif // POLYMURHASH_2_0_CHECKSUM_CONFIG_HPP
71 changes: 71 additions & 0 deletions reference-implementations/polymur-hash_2_0/reference_data.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright 2022-2023 Dynatrace LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "polymur-hash/polymur-hash.h"

#include <iostream>
#include <iomanip>
#include <random>

using namespace std;

int main(int argc, char *argv[]) {

mt19937_64 rng(0);

uint64_t maxSize = 200;
uint64_t numExamplesPerSize = 10;

uniform_int_distribution < uint8_t > dist(0, 255);

for (uint64_t size = 0; size <= maxSize; ++size) {
vector < uint8_t > data(size);
for (uint64_t i = 0; i < numExamplesPerSize; ++i) {
for (uint64_t k = 0; k < size; ++k) {
data[k] = dist(rng);
}
uint64_t tweak = rng();
uint64_t seed0 = rng();
uint64_t seed1 = rng();

PolymurHashParams params0;
PolymurHashParams params1;

polymur_init_params_from_seed(&params0, seed0);
polymur_init_params(&params1, seed0, seed1);

uint64_t hash0 = polymur_hash(&data[0], size, &params0, tweak);
uint64_t hash1 = polymur_hash(&data[0], size, &params1, tweak);

cout << "builder.add(0x";
cout << hex << setfill('0') << setw(16) << hash0;
cout << "L, 0x";
cout << hex << setfill('0') << setw(16) << hash1;
cout << "L, 0x";
cout << hex << setfill('0') << setw(16) << tweak;
cout << "L, 0x";
cout << hex << setfill('0') << setw(16) << seed0;
cout << "L, 0x";
cout << hex << setfill('0') << setw(16) << seed1 << 'L';
cout << ", \"";
for (uint64_t k = 0; k < size; ++k)
cout << hex << setfill('0') << setw(2)
<< static_cast<uint64_t>(data[k]);
cout << "\");";

cout << endl;
}
}
}

0 comments on commit efe058f

Please sign in to comment.