From b6717c6a27cb7d18a2358619b13e603b79325f6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20D=C3=B6ll?= Date: Mon, 9 Sep 2024 18:29:59 +0000 Subject: [PATCH] feat: add hash struct --- hash/errors.go | 22 +++ hash/hash.go | 449 ++++++++++++++++++++++++++++++++++++++++++++++++ hash/include.go | 22 +++ 3 files changed, 493 insertions(+) create mode 100644 hash/errors.go create mode 100644 hash/hash.go create mode 100644 hash/include.go diff --git a/hash/errors.go b/hash/errors.go new file mode 100644 index 0000000..fff60f3 --- /dev/null +++ b/hash/errors.go @@ -0,0 +1,22 @@ +package hash + +import ( + "fmt" +) + +// ErrNotStringer is returned when there's an error with hash:"string" +type ErrNotStringer struct { + Field string +} + +// Error implements error for ErrNotStringer +func (ens *ErrNotStringer) Error() string { + return fmt.Sprintf("hash: %s has hash:\"string\" set, but does not implement fmt.Stringer", ens.Field) +} + +// ErrFormat is returned when an invalid format is given to the Hash function. +type ErrFormat struct{} + +func (*ErrFormat) Error() string { + return "format must be one of the defined Format values in the hash library" +} diff --git a/hash/hash.go b/hash/hash.go new file mode 100644 index 0000000..ba8f2b5 --- /dev/null +++ b/hash/hash.go @@ -0,0 +1,449 @@ +package hash + +import ( + "encoding/binary" + "fmt" + "hash" + "hash/fnv" + "reflect" + "time" +) + +// HashOptions are options that are available for hashing. +type HashOptions struct { + // Hasher is the hash function to use. If this isn't set, it will + // default to FNV. + Hasher hash.Hash64 + + // TagName is the struct tag to look at when hashing the structure. + // By default this is "hash". + TagName string + + // ZeroNil is flag determining if nil pointer should be treated equal + // to a zero value of pointed type. By default this is false. + ZeroNil bool + + // IgnoreZeroValue is determining if zero value fields should be + // ignored for hash calculation. + IgnoreZeroValue bool + + // SlicesAsSets assumes that a `set` tag is always present for slices. + // Default is false (in which case the tag is used instead) + SlicesAsSets bool + + // UseStringer will attempt to use fmt.Stringer always. If the struct + // doesn't implement fmt.Stringer, it'll fall back to trying usual tricks. + // If this is true, and the "string" tag is also set, the tag takes + // precedence (meaning that if the type doesn't implement fmt.Stringer, we + // panic) + UseStringer bool +} + +// Hash returns the hash value of an arbitrary value. +// +// If opts is nil, then default options will be used. See HashOptions +// for the default values. The same *HashOptions value cannot be used +// concurrently. None of the values within a *HashOptions struct are +// safe to read/write while hashing is being done. +// +// The "format" is required and must be one of the format values defined +// by this library. You should probably just use "FormatV2". This allows +// generated hashes uses alternate logic to maintain compatibility with +// older versions. +// +// Notes on the value: +// +// - Unexported fields on structs are ignored and do not affect the +// hash value. +// +// - Adding an exported field to a struct with the zero value will change +// the hash value. +// +// For structs, the hashing can be controlled using tags. For example: +// +// struct { +// Name string +// UUID string `hash:"ignore"` +// } +// +// The available tag values are: +// +// - "ignore" or "-" - The field will be ignored and not affect the hash code. +// +// - "set" - The field will be treated as a set, where ordering doesn't +// affect the hash code. This only works for slices. +// +// - "string" - The field will be hashed as a string, only works when the +// field implements fmt.Stringer +func Hash(v interface{}, opts *HashOptions) (uint64, error) { + // Create default options + if opts == nil { + opts = &HashOptions{} + } + if opts.Hasher == nil { + opts.Hasher = fnv.New64() + } + if opts.TagName == "" { + opts.TagName = "hash" + } + + // Reset the hash + opts.Hasher.Reset() + + // Create our walker and walk the structure + w := &walker{ + h: opts.Hasher, + tag: opts.TagName, + zeronil: opts.ZeroNil, + ignorezerovalue: opts.IgnoreZeroValue, + sets: opts.SlicesAsSets, + stringer: opts.UseStringer, + } + return w.visit(reflect.ValueOf(v), nil) +} + +type walker struct { + h hash.Hash64 + tag string + zeronil bool + ignorezerovalue bool + sets bool + stringer bool +} + +type visitOpts struct { + // Flags are a bitmask of flags to affect behavior of this visit + Flags visitFlag + + // Information about the struct containing this field + Struct interface{} + StructField string +} + +var timeType = reflect.TypeOf(time.Time{}) + +func (w *walker) visit(v reflect.Value, opts *visitOpts) (uint64, error) { + t := reflect.TypeOf(0) + + // Loop since these can be wrapped in multiple layers of pointers + // and interfaces. + for { + // If we have an interface, dereference it. We have to do this up + // here because it might be a nil in there and the check below must + // catch that. + if v.Kind() == reflect.Interface { + v = v.Elem() + continue + } + + if v.Kind() == reflect.Ptr { + if w.zeronil { + t = v.Type().Elem() + } + v = reflect.Indirect(v) + continue + } + + break + } + + // If it is nil, treat it like a zero. + if !v.IsValid() { + v = reflect.Zero(t) + } + + // Binary writing can use raw ints, we have to convert to + // a sized-int, we'll choose the largest... + switch v.Kind() { + case reflect.Int: + v = reflect.ValueOf(int64(v.Int())) + case reflect.Uint: + v = reflect.ValueOf(uint64(v.Uint())) + case reflect.Bool: + var tmp int8 + if v.Bool() { + tmp = 1 + } + v = reflect.ValueOf(tmp) + } + + k := v.Kind() + + // We can shortcut numeric values by directly binary writing them + if k >= reflect.Int && k <= reflect.Complex64 { + // A direct hash calculation + w.h.Reset() + err := binary.Write(w.h, binary.LittleEndian, v.Interface()) + return w.h.Sum64(), err + } + + switch v.Type() { + case timeType: + w.h.Reset() + b, err := v.Interface().(time.Time).MarshalBinary() + if err != nil { + return 0, err + } + + err = binary.Write(w.h, binary.LittleEndian, b) + return w.h.Sum64(), err + } + + switch k { + case reflect.Array: + var h uint64 + l := v.Len() + for i := 0; i < l; i++ { + current, err := w.visit(v.Index(i), nil) + if err != nil { + return 0, err + } + + h = hashUpdateOrdered(w.h, h, current) + } + + return h, nil + + case reflect.Map: + var includeMap IncludableMap + if opts != nil && opts.Struct != nil { + if v, ok := opts.Struct.(IncludableMap); ok { + includeMap = v + } + } + + // Build the hash for the map. We do this by XOR-ing all the key + // and value hashes. This makes it deterministic despite ordering. + var h uint64 + for _, k := range v.MapKeys() { + v := v.MapIndex(k) + if includeMap != nil { + incl, err := includeMap.HashIncludeMap( + opts.StructField, k.Interface(), v.Interface()) + if err != nil { + return 0, err + } + if !incl { + continue + } + } + + kh, err := w.visit(k, nil) + if err != nil { + return 0, err + } + vh, err := w.visit(v, nil) + if err != nil { + return 0, err + } + + fieldHash := hashUpdateOrdered(w.h, kh, vh) + h = hashUpdateUnordered(h, fieldHash) + } + + // Important: read the docs for hashFinishUnordered + h = hashFinishUnordered(w.h, h) + + return h, nil + + case reflect.Struct: + parent := v.Interface() + var include Includable + if impl, ok := parent.(Includable); ok { + include = impl + } + + if impl, ok := parent.(Hashable); ok { + return impl.Hash() + } + + // If we can address this value, check if the pointer value + // implements our interfaces and use that if so. + if v.CanAddr() { + vptr := v.Addr() + parentptr := vptr.Interface() + if impl, ok := parentptr.(Includable); ok { + include = impl + } + + if impl, ok := parentptr.(Hashable); ok { + return impl.Hash() + } + } + + t := v.Type() + h, err := w.visit(reflect.ValueOf(t.Name()), nil) + if err != nil { + return 0, err + } + + l := v.NumField() + for i := 0; i < l; i++ { + if innerV := v.Field(i); v.CanSet() || t.Field(i).Name != "_" { + var f visitFlag + fieldType := t.Field(i) + if fieldType.PkgPath != "" { + // Unexported + continue + } + + tag := fieldType.Tag.Get(w.tag) + if tag == "ignore" || tag == "-" { + // Ignore this field + continue + } + + if w.ignorezerovalue { + if innerV.IsZero() { + continue + } + } + + // if string is set, use the string value + if tag == "string" || w.stringer { + if impl, ok := innerV.Interface().(fmt.Stringer); ok { + innerV = reflect.ValueOf(impl.String()) + } else if tag == "string" { + // We only show this error if the tag explicitly + // requests a stringer. + return 0, &ErrNotStringer{ + Field: v.Type().Field(i).Name, + } + } + } + + // Check if we implement includable and check it + if include != nil { + incl, err := include.HashInclude(fieldType.Name, innerV) + if err != nil { + return 0, err + } + if !incl { + continue + } + } + + switch tag { + case "set": + f |= visitFlagSet + } + + kh, err := w.visit(reflect.ValueOf(fieldType.Name), nil) + if err != nil { + return 0, err + } + + vh, err := w.visit(innerV, &visitOpts{ + Flags: f, + Struct: parent, + StructField: fieldType.Name, + }) + if err != nil { + return 0, err + } + + fieldHash := hashUpdateOrdered(w.h, kh, vh) + h = hashUpdateUnordered(h, fieldHash) + } + // Important: read the docs for hashFinishUnordered + h = hashFinishUnordered(w.h, h) + } + + return h, nil + + case reflect.Slice: + // We have two behaviors here. If it isn't a set, then we just + // visit all the elements. If it is a set, then we do a deterministic + // hash code. + var h uint64 + var set bool + if opts != nil { + set = (opts.Flags & visitFlagSet) != 0 + } + l := v.Len() + for i := 0; i < l; i++ { + current, err := w.visit(v.Index(i), nil) + if err != nil { + return 0, err + } + + if set || w.sets { + h = hashUpdateUnordered(h, current) + } else { + h = hashUpdateOrdered(w.h, h, current) + } + } + + if set { + // Important: read the docs for hashFinishUnordered + h = hashFinishUnordered(w.h, h) + } + + return h, nil + + case reflect.String: + // Directly hash + w.h.Reset() + _, err := w.h.Write([]byte(v.String())) + return w.h.Sum64(), err + + default: + return 0, fmt.Errorf("unknown kind to hash: %s", k) + } +} + +func hashUpdateOrdered(h hash.Hash64, a, b uint64) uint64 { + // For ordered updates, use a real hash function + h.Reset() + + // We just panic if the binary writes fail because we are writing + // an int64 which should never be fail-able. + e1 := binary.Write(h, binary.LittleEndian, a) + e2 := binary.Write(h, binary.LittleEndian, b) + if e1 != nil { + panic(e1) + } + if e2 != nil { + panic(e2) + } + + return h.Sum64() +} + +func hashUpdateUnordered(a, b uint64) uint64 { + return a ^ b +} + +// After mixing a group of unique hashes with hashUpdateUnordered, it's always +// necessary to call hashFinishUnordered. Why? Because hashUpdateUnordered +// is a simple XOR, and calling hashUpdateUnordered on hashes produced by +// hashUpdateUnordered can effectively cancel out a previous change to the hash +// result if the same hash value appears later on. For example, consider: +// +// hashUpdateUnordered(hashUpdateUnordered("A", "B"), hashUpdateUnordered("A", "C")) = +// H("A") ^ H("B")) ^ (H("A") ^ H("C")) = +// (H("A") ^ H("A")) ^ (H("B") ^ H(C)) = +// H(B) ^ H(C) = +// hashUpdateUnordered(hashUpdateUnordered("Z", "B"), hashUpdateUnordered("Z", "C")) +// +// hashFinishUnordered "hardens" the result, so that encountering partially +// overlapping input data later on in a different context won't cancel out. +func hashFinishUnordered(h hash.Hash64, a uint64) uint64 { + h.Reset() + + // We just panic if the writes fail + e1 := binary.Write(h, binary.LittleEndian, a) + if e1 != nil { + panic(e1) + } + + return h.Sum64() +} + +// visitFlag is used as a bitmask for affecting visit behavior +type visitFlag uint + +const ( + visitFlagInvalid visitFlag = iota + visitFlagSet = iota << 1 +) diff --git a/hash/include.go b/hash/include.go new file mode 100644 index 0000000..f9a6179 --- /dev/null +++ b/hash/include.go @@ -0,0 +1,22 @@ +package hash + +// Includable is an interface that can optionally be implemented by +// a struct. It will be called for each field in the struct to check whether +// it should be included in the hash. +type Includable interface { + HashInclude(field string, v interface{}) (bool, error) +} + +// IncludableMap is an interface that can optionally be implemented by +// a struct. It will be called when a map-type field is found to ask the +// struct if the map item should be included in the hash. +type IncludableMap interface { + HashIncludeMap(field string, k, v interface{}) (bool, error) +} + +// Hashable is an interface that can optionally be implemented by a struct +// to override the hash value. This value will override the hash value for +// the entire struct. Entries in the struct will not be hashed. +type Hashable interface { + Hash() (uint64, error) +}