Skip to content

Commit

Permalink
Merge pull request #25 from mailgun/thrawn/develop
Browse files Browse the repository at this point in the history
Consistent hash improvements
  • Loading branch information
thrawn01 authored Jan 13, 2021
2 parents 65c4ea5 + ff66388 commit 6251eaf
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 26 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.2.1] - 2021-01-13
### Changes
* Now uses the much faster fnv1
* Now md5 hashs the keys to help distribute hosts more evenly in some
cases.

## [2.2.0] - 2019-07-09
### Added
* Added `SetLogger()` to pass in a logrus entry for logging peer errors
Expand Down
11 changes: 7 additions & 4 deletions consistenthash/consistenthash.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ limitations under the License.
package consistenthash

import (
"hash/crc32"
"crypto/md5"
"fmt"
"sort"
"strconv"

"github.com/segmentio/fasthash/fnv1"
)

type Hash func(data []byte) uint32
type Hash func(data []byte) uint64

type Map struct {
hash Hash
Expand All @@ -39,7 +42,7 @@ func New(replicas int, fn Hash) *Map {
hashMap: make(map[int]string),
}
if m.hash == nil {
m.hash = crc32.ChecksumIEEE
m.hash = fnv1.HashBytes64
}
return m
}
Expand All @@ -53,7 +56,7 @@ func (m *Map) IsEmpty() bool {
func (m *Map) Add(keys ...string) {
for _, key := range keys {
for i := 0; i < m.replicas; i++ {
hash := int(m.hash([]byte(strconv.Itoa(i) + key)))
hash := int(m.hash([]byte(fmt.Sprintf("%x", md5.Sum([]byte(strconv.Itoa(i)+key))))))
m.keys = append(m.keys, hash)
m.hashMap[hash] = key
}
Expand Down
77 changes: 55 additions & 22 deletions consistenthash/consistenthash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,51 +18,45 @@ package consistenthash

import (
"fmt"
"strconv"
"math/rand"
"net"
"testing"
"time"

"github.com/segmentio/fasthash/fnv1"
)

func TestHashing(t *testing.T) {

// Override the hash function to return easier to reason about values. Assumes
// the keys can be converted to an integer.
hash := New(3, func(key []byte) uint32 {
i, err := strconv.Atoi(string(key))
if err != nil {
panic(err)
}
return uint32(i)
})
hash := New(512, nil)

// Given the above hash function, this will give replicas with "hashes":
// 2, 4, 6, 12, 14, 16, 22, 24, 26
hash.Add("6", "4", "2")

testCases := map[string]string{
"2": "2",
"11": "2",
"23": "4",
"27": "2",
"12,000": "4",
"11": "6",
"500,000": "4",
"1,000,000": "2",
}

for k, v := range testCases {
if hash.Get(k) != v {
t.Errorf("Asking for %s, should have yielded %s", k, v)
if got := hash.Get(k); got != v {
t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got)
}
}

// Adds 8, 18, 28
hash.Add("8")

// 27 should now map to 8.
testCases["27"] = "8"
testCases["11"] = "8"
testCases["1,000,000"] = "8"

for k, v := range testCases {
if hash.Get(k) != v {
t.Errorf("Asking for %s, should have yielded %s", k, v)
if got := hash.Get(k); got != v {
t.Errorf("Asking for %s, should have yielded %s; got %s instead", k, v, got)
}
}

}

func TestConsistency(t *testing.T) {
Expand All @@ -77,13 +71,52 @@ func TestConsistency(t *testing.T) {
}

hash2.Add("Becky", "Ben", "Bobby")
hash1.Add("Becky", "Ben", "Bobby")

if hash1.Get("Ben") != hash2.Get("Ben") ||
hash1.Get("Bob") != hash2.Get("Bob") ||
hash1.Get("Bonny") != hash2.Get("Bonny") {
t.Errorf("Direct matches should always return the same entry")
}
}

func TestDistribution(t *testing.T) {
hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"}
rand.Seed(time.Now().Unix())
const cases = 10000

strings := make([]string, cases)

for i := 0; i < cases; i++ {
r := rand.Int31()
ip := net.IPv4(192, byte(r>>16), byte(r>>8), byte(r))
strings[i] = ip.String()
}

hashFuncs := map[string]Hash{
"fasthash/fnv1": fnv1.HashBytes64,
}

for name, hashFunc := range hashFuncs {
t.Run(name, func(t *testing.T) {
hash := New(512, hashFunc)
hostMap := map[string]int{}

for _, host := range hosts {
hash.Add(host)
hostMap[host] = 0
}

for i := range strings {
host := hash.Get(strings[i])
hostMap[host]++
}

for host, a := range hostMap {
t.Logf("host: %s, percent: %f", host, float64(a)/cases)
}
})
}
}

func BenchmarkGet8(b *testing.B) { benchmarkGet(b, 8) }
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module github.com/mailgun/groupcache/v2

require (
github.com/golang/protobuf v1.3.1
github.com/segmentio/fasthash v1.0.3
github.com/sirupsen/logrus v1.6.0
)

Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
github.com/segmentio/fasthash v1.0.3 h1:EI9+KE1EwvMLBWwjpRDc+fEM+prwxDYbslddQGtrmhM=
github.com/segmentio/fasthash v1.0.3/go.mod h1:waKX8l2N8yckOgmSsXJi7x1ZfdKZ4x7KRMzBtS3oedY=

0 comments on commit 6251eaf

Please sign in to comment.