From 42b1887d9cda8dd408cfe42646d044dfc75c6626 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 11 Feb 2020 14:46:02 +0100 Subject: [PATCH 1/7] file, testutil: Add reference file hasher --- file/hasher/common_test.go | 269 ++++++++++++++++++++++++++++++++++ file/hasher/hasher.go | 2 +- file/hasher/hasher_test.go | 2 +- file/hasher/param.go | 59 ++++++++ file/hasher/reference.go | 117 +++++++++++++++ file/hasher/reference_test.go | 140 ++++++++++++++++++ file/hasher/util.go | 58 ++++++++ file/hasher/util_test.go | 90 ++++++++++++ testutil/data.go | 15 ++ 9 files changed, 750 insertions(+), 2 deletions(-) create mode 100644 file/hasher/common_test.go create mode 100644 file/hasher/param.go create mode 100644 file/hasher/reference.go create mode 100644 file/hasher/reference_test.go create mode 100644 file/hasher/util.go create mode 100644 file/hasher/util_test.go create mode 100644 testutil/data.go diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go new file mode 100644 index 0000000000..07b5db9682 --- /dev/null +++ b/file/hasher/common_test.go @@ -0,0 +1,269 @@ +package hasher + +import ( + "bytes" + "context" + "encoding/binary" + "hash" + "sync" + "testing" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/file" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 + zeroHex = "0000000000000000000000000000000000000000000000000000000000000000" +) + +var ( + dataLengths = []int{31, // 0 + 32, // 1 + 33, // 2 + 63, // 3 + 64, // 4 + 65, // 5 + chunkSize, // 6 + chunkSize + 31, // 7 + chunkSize + 32, // 8 + chunkSize + 63, // 9 + chunkSize + 64, // 10 + chunkSize * 2, // 11 + chunkSize*2 + 32, // 12 + chunkSize * 128, // 13 + chunkSize*128 + 31, // 14 + chunkSize*128 + 32, // 15 + chunkSize*128 + 64, // 16 + chunkSize * 129, // 17 + chunkSize * 130, // 18 + chunkSize * 128 * 128, // 19 + chunkSize*128*128 + 32, // 20 + } + expected = []string{ + "ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0 + "0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1 + "3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2 + "95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3 + "490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4 + "541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5 + "c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6 + "91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7 + "73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8 + "db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9 + "ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10 + "29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11 + "61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12 + "3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13 + "e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14 + "485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15 + "624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16 + "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 + "59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 + "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19 + "ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 + } + + start = 0 + end = len(dataLengths) +) + +func init() { + testutil.Init() +} + +var ( + dummyHashFunc = func(_ context.Context) file.SectionWriter { + return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) + } + + // placeholder for cases where a hasher is not necessary + noHashFunc = func(_ context.Context) file.SectionWriter { + return nil + } + + logErrFunc = func(err error) { + log.Error("SectionWriter pipeline error", "err", err) + } +) + +// simple param.SectionWriter hasher that keeps the data written to it +// for later inspection +// TODO: see if this can be replaced with the fake hasher from storage module +type dummySectionWriter struct { + sectionSize int + digestSize int + branches int + data []byte + digest []byte + size int + span []byte + summed bool + index int + writer hash.Hash + mu sync.Mutex + wg sync.WaitGroup +} + +func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter { + return &dummySectionWriter{ + sectionSize: sectionSize, + digestSize: digestSize, + branches: branches, + data: make([]byte, cp), + writer: sha3.NewLegacyKeccak256(), + digest: make([]byte, digestSize), + } +} + +func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { +} + +func (d *dummySectionWriter) SetWriter(_ file.SectionWriterFunc) file.SectionWriter { + log.Error("dummySectionWriter does not support SectionWriter chaining") + return d +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SeekSection(offset int) { + d.index = offset * d.SectionSize() +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SetLength(length int) { + d.size = length +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SetSpan(length int) { + d.span = make([]byte, 8) + binary.LittleEndian.PutUint64(d.span, uint64(length)) +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Write(data []byte) (int, error) { + d.mu.Lock() + copy(d.data[d.index:], data) + d.size += len(data) + log.Trace("dummywriter write", "index", d.index, "size", d.size, "threshold", d.sectionSize*d.branches) + if d.isFull() { + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } + return len(data), nil +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Sum(_ []byte) []byte { + log.Trace("dummy Sumcall", "size", d.size) + d.mu.Lock() + if !d.summed { + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } + return d.digest +} + +func (d *dummySectionWriter) sum() { + d.mu.Lock() + defer d.mu.Unlock() + d.writer.Write(d.span) + log.Trace("dummy sum writing span", "span", d.span) + for i := 0; i < d.size; i += d.writer.Size() { + sectionData := d.data[i : i+d.writer.Size()] + log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) + d.writer.Write(sectionData) + } + copy(d.digest, d.writer.Sum(nil)) + log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest)) +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Reset() { + d.mu.Lock() + defer d.mu.Unlock() + d.data = make([]byte, len(d.data)) + d.digest = make([]byte, d.digestSize) + d.size = 0 + d.summed = false + d.span = nil + d.writer.Reset() +} + +// implements param.SectionWriter +func (d *dummySectionWriter) BlockSize() int { + return d.sectionSize +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SectionSize() int { + return d.sectionSize +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Size() int { + return d.sectionSize +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Branches() int { + return d.branches +} + +func (d *dummySectionWriter) isFull() bool { + return d.size == d.sectionSize*d.branches +} + +// TestDummySectionWriter +func TestDummySectionWriter(t *testing.T) { + + w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) + w.Reset() + + _, data := testutil.SerialData(sectionSize*2, 255, 0) + + w.SeekSection(branches) + w.Write(data[:sectionSize]) + w.SeekSection(branches + 1) + w.Write(data[sectionSize:]) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + } + + correctDigestHex := "0x52eefd0c37895a8845d4a6cf6c6b56980e448376e55eb45717663ab7b3fc8d53" + w.SetLength(chunkSize * 2) + w.SetSpan(chunkSize * 2) + digest := w.Sum(nil) + digestHex := hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) + } + + w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) + w.Reset() + w.SeekSection(branches / 2) + w.Write(data) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + } + + correctDigestHex += zeroHex + w.SetLength(chunkSize * 2) + w.SetSpan(chunkSize * 2) + digest = w.Sum(nil) + digestHex = hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) + } +} diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 9478fb79b8..5cebba192f 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -14,7 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the Swarm library. If not, see . -package file +package hasher import ( "context" diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index babb981ef3..91ca296d81 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -14,7 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the Swarm library. If not, see . -package file +package hasher import ( "bytes" diff --git a/file/hasher/param.go b/file/hasher/param.go new file mode 100644 index 0000000000..409f180393 --- /dev/null +++ b/file/hasher/param.go @@ -0,0 +1,59 @@ +package hasher + +import ( + "context" + "sync" + + "github.com/ethersphere/swarm/file" + "github.com/ethersphere/swarm/log" +) + +// defines the boundaries of the hashing job and also contains the hash factory functino of the job +// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) +type treeParams struct { + SectionSize int + Branches int + ChunkSize int + Spans []int + Debug bool + hashFunc file.SectionWriterFunc + writerPool sync.Pool + ctx context.Context +} + +func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams { + + h := hashFunc(context.Background()) + p := &treeParams{ + SectionSize: h.SectionSize(), + Branches: h.Branches(), + ChunkSize: h.SectionSize() * h.Branches(), + hashFunc: hashFunc, + } + h.Reset() + log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) + p.writerPool.New = func() interface{} { + hf := p.hashFunc(p.ctx) + //log.Trace("param new hasher", "h", hf) + return hf + } + p.Spans = generateSpanSizes(p.Branches, 9) + return p +} + +func (p *treeParams) SetContext(ctx context.Context) { + p.ctx = ctx +} + +func (p *treeParams) GetContext() context.Context { + return p.ctx +} + +func (p *treeParams) PutWriter(w file.SectionWriter) { + w.Reset() + p.writerPool.Put(w) +} + +func (p *treeParams) GetWriter() file.SectionWriter { + return p.writerPool.Get().(file.SectionWriter) +} diff --git a/file/hasher/reference.go b/file/hasher/reference.go new file mode 100644 index 0000000000..ad67ae0ca1 --- /dev/null +++ b/file/hasher/reference.go @@ -0,0 +1,117 @@ +package hasher + +import ( + "github.com/ethersphere/swarm/file" + "github.com/ethersphere/swarm/log" +) + +// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm +type ReferenceHasher struct { + params *treeParams + cursors []int // section write position, indexed per level + length int // number of bytes written to the data level of the hasher + buffer []byte // keeps data and hashes, indexed by cursors + counts []int // number of sums performed, indexed per level + hasher file.SectionWriter // underlying hasher +} + +// NewReferenceHasher constructs and returns a new ReferenceHasher +func NewReferenceHasher(params *treeParams) *ReferenceHasher { + // TODO: remove when bmt interface is amended + h := params.GetWriter() + return &ReferenceHasher{ + params: params, + cursors: make([]int, 9), + counts: make([]int, 9), + buffer: make([]byte, params.ChunkSize*9), + hasher: h, + } +} + +// Hash computes and returns the root hash of arbitrary data +func (r *ReferenceHasher) Hash(data []byte) []byte { + l := r.params.ChunkSize + for i := 0; i < len(data); i += r.params.ChunkSize { + if len(data)-i < r.params.ChunkSize { + l = len(data) - i + } + r.update(0, data[i:i+l]) + } + for i := 0; i < 9; i++ { + log.Trace("cursor", "lvl", i, "pos", r.cursors[i]) + } + return r.digest() +} + +// write to the data buffer on the specified level +// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash +// adjusts cursors accordingly +func (r *ReferenceHasher) update(lvl int, data []byte) { + if lvl == 0 { + r.length += len(data) + } + copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data) + r.cursors[lvl] += len(data) + if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize { + ref := r.sum(lvl) + r.update(lvl+1, ref) + r.cursors[lvl] = r.cursors[lvl+1] + } +} + +// calculates and returns the bmt sum of the last written data on the level +func (r *ReferenceHasher) sum(lvl int) []byte { + r.counts[lvl]++ + spanSize := r.params.Spans[lvl] * r.params.ChunkSize + span := (r.length-1)%spanSize + 1 + + toSumSize := r.cursors[lvl] - r.cursors[lvl+1] + + r.hasher.Reset() + r.hasher.SetSpan(span) + r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) + ref := r.hasher.Sum(nil) + return ref +} + +// called after all data has been written +// sums the final chunks of each level +// skips intermediate levels that end on span boundary +func (r *ReferenceHasher) digest() []byte { + + // if we did not end on a chunk boundary, the last chunk hasn't been hashed + // we need to do this first + if r.length%r.params.ChunkSize != 0 { + ref := r.sum(0) + copy(r.buffer[r.cursors[1]:], ref) + r.cursors[1] += len(ref) + r.cursors[0] = r.cursors[1] + } + + // calculate the total number of levels needed to represent the data (including the data level) + targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches) + + // sum every intermediate level and write to the level above it + for i := 1; i < targetLevel; i++ { + + // if the tree is balanced or if there is a single reference outside a balanced tree on this level + // don't hash it again but pass it on to the next level + if r.counts[i] > 0 { + // TODO: simplify if possible + if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 { + log.Trace("skip") + r.cursors[i+1] = r.cursors[i] + r.cursors[i] = r.cursors[i-1] + continue + } + } + + ref := r.sum(i) + copy(r.buffer[r.cursors[i+1]:], ref) + r.cursors[i+1] += len(ref) + r.cursors[i] = r.cursors[i+1] + } + + // the first section of the buffer will hold the root hash + return r.buffer[:r.params.SectionSize] +} diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go new file mode 100644 index 0000000000..d4deef5c0b --- /dev/null +++ b/file/hasher/reference_test.go @@ -0,0 +1,140 @@ +package hasher + +import ( + "context" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +// TestManualDanglingChunk is a test script explicitly hashing and writing every individual level in the dangling chunk edge case +// we use a balanced tree with data size of chunkSize*branches, and a single chunk of data +// this case is chosen because it produces the wrong result in the pyramid hasher at the time of writing (master commit hash 4928d989ebd0854d993c10c194e61a5a5455e4f9) +func TestManualDanglingChunk(t *testing.T) { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + h := bmt.New(pool) + + // to execute the job we need buffers with the following capacities: + // level 0: chunkSize*branches+chunkSize + // level 1: chunkSize + // level 2: sectionSize * 2 + var levels [][]byte + levels = append(levels, nil) + levels = append(levels, make([]byte, chunkSize)) + levels = append(levels, make([]byte, sectionSize*2)) + + // hash the balanced tree portion of the data level and write to level 1 + _, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0) + for i := 0; i < chunkSize*branches; i += chunkSize { + h.Reset() + h.SetSpan(chunkSize) + h.Write(levels[0][i : i+chunkSize]) + copy(levels[1][i/branches:], h.Sum(nil)) + } + refHex := hexutil.Encode(levels[1][:sectionSize]) + correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) + } + + // write the dangling chunk + // hash it and write the reference on the second section of level 2 + h.Reset() + h.SetSpan(chunkSize) + h.Write(levels[0][chunkSize*branches:]) + copy(levels[2][sectionSize:], h.Sum(nil)) + refHex = hexutil.Encode(levels[2][sectionSize:]) + correctRefHex = "0x81b31d9a7f6c377523e8769db021091df23edd9fd7bd6bcdf11a22f518db6006" + if refHex != correctRefHex { + t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) + } + + // hash the chunk on level 1 and write into the first section of level 2 + h.Reset() + h.SetSpan(chunkSize * branches) + h.Write(levels[1]) + copy(levels[2], h.Sum(nil)) + refHex = hexutil.Encode(levels[2][:sectionSize]) + correctRefHex = "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" + if refHex != correctRefHex { + t.Fatalf("manual dangling balanced tree; expected %s, got %s", correctRefHex, refHex) + } + + // hash the two sections on level 2 to obtain the root hash + h.Reset() + h.SetSpan(chunkSize*branches + chunkSize) + h.Write(levels[2]) + ref := h.Sum(nil) + refHex = hexutil.Encode(ref) + correctRefHex = "0xb8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199" + if refHex != correctRefHex { + t.Fatalf("manual dangling root; expected %s, got %s", correctRefHex, refHex) + } +} + +// TestReferenceFileHasherVector executes the file hasher algorithms on serial input data of periods of 0-254 +// of lengths defined in common_test.go +// +// the "expected" array in common_test.go is generated by this implementation, and test failure due to +// result mismatch is nothing else than an indication that something has changed in the reference filehasher +// or the underlying hashing algorithm +func TestReferenceHasherVector(t *testing.T) { + + hashFunc := func(_ context.Context) file.SectionWriter { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + return bmt.New(pool) + } + params := newTreeParams(hashFunc) + var mismatch int + for i := start; i < end; i++ { + dataLength := dataLengths[i] + log.Info("start", "i", i, "len", dataLength) + rh := NewReferenceHasher(params) + _, data := testutil.SerialData(dataLength, 255, 0) + refHash := rh.Hash(data) + eq := true + if expected[i] != fmt.Sprintf("%x", refHash) { + mismatch++ + eq = false + } + t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i]) + } + if mismatch > 0 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) + } +} + +// BenchmarkReferenceHasher establishes a baseline for a fully synchronous file hashing operation +// it will be vastly inefficient +func BenchmarkReferenceHasher(b *testing.B) { + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceHasher) + } +} + +func benchmarkReferenceHasher(b *testing.B) { + benchParams := strings.Split(b.Name(), "/") + dataLength, err := strconv.ParseInt(benchParams[1], 10, 64) + if err != nil { + b.Fatal(err) + } + hashFunc := func(_ context.Context) file.SectionWriter { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + return bmt.New(pool) + } + params := newTreeParams(hashFunc) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, data := testutil.SerialData(int(dataLength), 255, 0) + fh := NewReferenceHasher(params) + fh.Hash(data) + } +} diff --git a/file/hasher/util.go b/file/hasher/util.go new file mode 100644 index 0000000000..8dd8b4a27f --- /dev/null +++ b/file/hasher/util.go @@ -0,0 +1,58 @@ +package hasher + +import ( + "math" +) + +// TODO: level 0 should be SectionSize() not Branches() +// generates a dictionary of maximum span lengths per level represented by one SectionSize() of data +func generateSpanSizes(branches int, levels int) []int { + spans := make([]int, levels) + span := 1 + for i := 0; i < 9; i++ { + spans[i] = span + span *= branches + } + return spans +} + +// calculates the section index of the given byte size +func dataSizeToSectionIndex(length int, sectionSize int) int { + return (length - 1) / sectionSize +} + +// calculates the section count of the given byte size +func dataSizeToSectionCount(length int, sectionSize int) int { + return dataSizeToSectionIndex(length, sectionSize) + 1 +} + +// calculates the corresponding level section for a data section +func dataSectionToLevelSection(p *treeParams, lvl int, sections int) int { + span := p.Spans[lvl] + return sections / span +} + +// calculates the lower data section boundary of a level for which a data section is contained +// the higher level use is to determine whether the final data section written falls within +// a certain level's span +func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int { + span := p.Spans[lvl+1] + spans := section / span + spanBytes := spans * span + //log.Trace("levelboundary", "spans", spans, "section", section, "span", span) + return spanBytes +} + +// TODO: use params instead of sectionSize, branches +// calculate the last level index which a particular data section count will result in. +// the returned level will be the level of the root hash +func getLevelsFromLength(l int, sectionSize int, branches int) int { + if l == 0 { + return 0 + } else if l <= sectionSize*branches { + return 1 + } + c := (l - 1) / (sectionSize) + + return int(math.Log(float64(c))/math.Log(float64(branches)) + 1) +} diff --git a/file/hasher/util_test.go b/file/hasher/util_test.go new file mode 100644 index 0000000000..f364a453b1 --- /dev/null +++ b/file/hasher/util_test.go @@ -0,0 +1,90 @@ +package hasher + +import "testing" + +// TestLevelsFromLength verifies getLevelsFromLength +func TestLevelsFromLength(t *testing.T) { + + sizes := []int{sectionSize, chunkSize, chunkSize + sectionSize, chunkSize * branches, chunkSize*branches + 1} + expects := []int{1, 1, 2, 2, 3} + + for i, size := range sizes { + lvl := getLevelsFromLength(size, sectionSize, branches) + if expects[i] != lvl { + t.Fatalf("size %d, expected %d, got %d", size, expects[i], lvl) + } + } +} + +// TestDataSizeToSection verifies testDataSizeToSectionIndex +func TestDataSizeToSectionIndex(t *testing.T) { + + sizes := []int{chunkSize - 1, chunkSize, chunkSize + 1} + expects := []int{branches - 1, branches - 1, branches} + + for j, size := range sizes { + r := dataSizeToSectionIndex(size, sectionSize) + expect := expects[j] + if expect != r { + t.Fatalf("size %d section %d: expected %d, got %d", size, sectionSize, expect, r) + } + } + +} + +// TestsDataSectionToLevelSection verifies dataSectionToLevelSection +func TestDataSectionToLevelSection(t *testing.T) { + + params := newTreeParams(dummyHashFunc) + sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches} + levels := []int{1, 2} + expects := []int{ + 0, 0, 1, 1, 2, 2, 128, + 0, 0, 0, 0, 0, 0, 1, + } + + for i, lvl := range levels { + for j, section := range sections { + r := dataSectionToLevelSection(params, lvl, section) + k := i*len(sections) + j + expect := expects[k] + if expect != r { + t.Fatalf("levelsection size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + } + } +} + +// TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary +func TestDataSectionToLevelBoundary(t *testing.T) { + params := newTreeParams(dummyHashFunc) + size := chunkSize*branches + chunkSize*2 + section := dataSizeToSectionIndex(size, sectionSize) + lvl := 1 + expect := branches * branches + + r := dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + + size = chunkSize*branches*branches + chunkSize*2 + section = dataSizeToSectionIndex(size, sectionSize) + lvl = 1 + expect = branches * branches * branches + + r = dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + + size = chunkSize*branches + chunkSize*2 + section = dataSizeToSectionIndex(size, sectionSize) + lvl = 2 + expect = 0 + + r = dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } +} diff --git a/testutil/data.go b/testutil/data.go new file mode 100644 index 0000000000..f3bea59e91 --- /dev/null +++ b/testutil/data.go @@ -0,0 +1,15 @@ +package testutil + +import ( + "bytes" + "io" +) + +func SerialData(l int, mod int, offset int) (r io.Reader, slice []byte) { + slice = make([]byte, l) + for i := 0; i < len(slice); i++ { + slice[i] = byte((i + offset) % mod) + } + r = io.LimitReader(bytes.NewReader(slice), int64(l)) + return +} From 457b5693973c405afc5081854e980c13606fe2f3 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 11 Feb 2020 14:51:02 +0100 Subject: [PATCH 2/7] file: Remove premature code --- file/hasher/common_test.go | 202 ------------------------------------- file/hasher/util.go | 27 ----- file/hasher/util_test.go | 73 -------------- 3 files changed, 302 deletions(-) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 07b5db9682..feff56526f 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -1,18 +1,7 @@ package hasher import ( - "bytes" - "context" - "encoding/binary" - "hash" - "sync" - "testing" - - "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethersphere/swarm/file" - "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/testutil" - "golang.org/x/crypto/sha3" ) const ( @@ -76,194 +65,3 @@ var ( func init() { testutil.Init() } - -var ( - dummyHashFunc = func(_ context.Context) file.SectionWriter { - return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) - } - - // placeholder for cases where a hasher is not necessary - noHashFunc = func(_ context.Context) file.SectionWriter { - return nil - } - - logErrFunc = func(err error) { - log.Error("SectionWriter pipeline error", "err", err) - } -) - -// simple param.SectionWriter hasher that keeps the data written to it -// for later inspection -// TODO: see if this can be replaced with the fake hasher from storage module -type dummySectionWriter struct { - sectionSize int - digestSize int - branches int - data []byte - digest []byte - size int - span []byte - summed bool - index int - writer hash.Hash - mu sync.Mutex - wg sync.WaitGroup -} - -func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter { - return &dummySectionWriter{ - sectionSize: sectionSize, - digestSize: digestSize, - branches: branches, - data: make([]byte, cp), - writer: sha3.NewLegacyKeccak256(), - digest: make([]byte, digestSize), - } -} - -func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { -} - -func (d *dummySectionWriter) SetWriter(_ file.SectionWriterFunc) file.SectionWriter { - log.Error("dummySectionWriter does not support SectionWriter chaining") - return d -} - -// implements param.SectionWriter -func (d *dummySectionWriter) SeekSection(offset int) { - d.index = offset * d.SectionSize() -} - -// implements param.SectionWriter -func (d *dummySectionWriter) SetLength(length int) { - d.size = length -} - -// implements param.SectionWriter -func (d *dummySectionWriter) SetSpan(length int) { - d.span = make([]byte, 8) - binary.LittleEndian.PutUint64(d.span, uint64(length)) -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Write(data []byte) (int, error) { - d.mu.Lock() - copy(d.data[d.index:], data) - d.size += len(data) - log.Trace("dummywriter write", "index", d.index, "size", d.size, "threshold", d.sectionSize*d.branches) - if d.isFull() { - d.summed = true - d.mu.Unlock() - d.sum() - } else { - d.mu.Unlock() - } - return len(data), nil -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Sum(_ []byte) []byte { - log.Trace("dummy Sumcall", "size", d.size) - d.mu.Lock() - if !d.summed { - d.summed = true - d.mu.Unlock() - d.sum() - } else { - d.mu.Unlock() - } - return d.digest -} - -func (d *dummySectionWriter) sum() { - d.mu.Lock() - defer d.mu.Unlock() - d.writer.Write(d.span) - log.Trace("dummy sum writing span", "span", d.span) - for i := 0; i < d.size; i += d.writer.Size() { - sectionData := d.data[i : i+d.writer.Size()] - log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) - d.writer.Write(sectionData) - } - copy(d.digest, d.writer.Sum(nil)) - log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest)) -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Reset() { - d.mu.Lock() - defer d.mu.Unlock() - d.data = make([]byte, len(d.data)) - d.digest = make([]byte, d.digestSize) - d.size = 0 - d.summed = false - d.span = nil - d.writer.Reset() -} - -// implements param.SectionWriter -func (d *dummySectionWriter) BlockSize() int { - return d.sectionSize -} - -// implements param.SectionWriter -func (d *dummySectionWriter) SectionSize() int { - return d.sectionSize -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Size() int { - return d.sectionSize -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Branches() int { - return d.branches -} - -func (d *dummySectionWriter) isFull() bool { - return d.size == d.sectionSize*d.branches -} - -// TestDummySectionWriter -func TestDummySectionWriter(t *testing.T) { - - w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) - w.Reset() - - _, data := testutil.SerialData(sectionSize*2, 255, 0) - - w.SeekSection(branches) - w.Write(data[:sectionSize]) - w.SeekSection(branches + 1) - w.Write(data[sectionSize:]) - if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) - } - - correctDigestHex := "0x52eefd0c37895a8845d4a6cf6c6b56980e448376e55eb45717663ab7b3fc8d53" - w.SetLength(chunkSize * 2) - w.SetSpan(chunkSize * 2) - digest := w.Sum(nil) - digestHex := hexutil.Encode(digest) - if digestHex != correctDigestHex { - t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) - } - - w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) - w.Reset() - w.SeekSection(branches / 2) - w.Write(data) - if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) - } - - correctDigestHex += zeroHex - w.SetLength(chunkSize * 2) - w.SetSpan(chunkSize * 2) - digest = w.Sum(nil) - digestHex = hexutil.Encode(digest) - if digestHex != correctDigestHex { - t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) - } -} diff --git a/file/hasher/util.go b/file/hasher/util.go index 8dd8b4a27f..141fd1d114 100644 --- a/file/hasher/util.go +++ b/file/hasher/util.go @@ -16,33 +16,6 @@ func generateSpanSizes(branches int, levels int) []int { return spans } -// calculates the section index of the given byte size -func dataSizeToSectionIndex(length int, sectionSize int) int { - return (length - 1) / sectionSize -} - -// calculates the section count of the given byte size -func dataSizeToSectionCount(length int, sectionSize int) int { - return dataSizeToSectionIndex(length, sectionSize) + 1 -} - -// calculates the corresponding level section for a data section -func dataSectionToLevelSection(p *treeParams, lvl int, sections int) int { - span := p.Spans[lvl] - return sections / span -} - -// calculates the lower data section boundary of a level for which a data section is contained -// the higher level use is to determine whether the final data section written falls within -// a certain level's span -func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int { - span := p.Spans[lvl+1] - spans := section / span - spanBytes := spans * span - //log.Trace("levelboundary", "spans", spans, "section", section, "span", span) - return spanBytes -} - // TODO: use params instead of sectionSize, branches // calculate the last level index which a particular data section count will result in. // the returned level will be the level of the root hash diff --git a/file/hasher/util_test.go b/file/hasher/util_test.go index f364a453b1..51640e4ad5 100644 --- a/file/hasher/util_test.go +++ b/file/hasher/util_test.go @@ -15,76 +15,3 @@ func TestLevelsFromLength(t *testing.T) { } } } - -// TestDataSizeToSection verifies testDataSizeToSectionIndex -func TestDataSizeToSectionIndex(t *testing.T) { - - sizes := []int{chunkSize - 1, chunkSize, chunkSize + 1} - expects := []int{branches - 1, branches - 1, branches} - - for j, size := range sizes { - r := dataSizeToSectionIndex(size, sectionSize) - expect := expects[j] - if expect != r { - t.Fatalf("size %d section %d: expected %d, got %d", size, sectionSize, expect, r) - } - } - -} - -// TestsDataSectionToLevelSection verifies dataSectionToLevelSection -func TestDataSectionToLevelSection(t *testing.T) { - - params := newTreeParams(dummyHashFunc) - sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches} - levels := []int{1, 2} - expects := []int{ - 0, 0, 1, 1, 2, 2, 128, - 0, 0, 0, 0, 0, 0, 1, - } - - for i, lvl := range levels { - for j, section := range sections { - r := dataSectionToLevelSection(params, lvl, section) - k := i*len(sections) + j - expect := expects[k] - if expect != r { - t.Fatalf("levelsection size %d level %d: expected %d, got %d", section, lvl, expect, r) - } - } - } -} - -// TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary -func TestDataSectionToLevelBoundary(t *testing.T) { - params := newTreeParams(dummyHashFunc) - size := chunkSize*branches + chunkSize*2 - section := dataSizeToSectionIndex(size, sectionSize) - lvl := 1 - expect := branches * branches - - r := dataSectionToLevelBoundary(params, lvl, section) - if expect != r { - t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) - } - - size = chunkSize*branches*branches + chunkSize*2 - section = dataSizeToSectionIndex(size, sectionSize) - lvl = 1 - expect = branches * branches * branches - - r = dataSectionToLevelBoundary(params, lvl, section) - if expect != r { - t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) - } - - size = chunkSize*branches + chunkSize*2 - section = dataSizeToSectionIndex(size, sectionSize) - lvl = 2 - expect = 0 - - r = dataSectionToLevelBoundary(params, lvl, section) - if expect != r { - t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) - } -} From 65a444ed24205cb6d05b229005665b44600a04d2 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 11 Feb 2020 15:22:18 +0100 Subject: [PATCH 3/7] file: Remove unused zeroHex and unused logs --- file/hasher/common_test.go | 1 - file/hasher/param.go | 3 --- 2 files changed, 4 deletions(-) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index feff56526f..bad3556420 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -8,7 +8,6 @@ const ( sectionSize = 32 branches = 128 chunkSize = 4096 - zeroHex = "0000000000000000000000000000000000000000000000000000000000000000" ) var ( diff --git a/file/hasher/param.go b/file/hasher/param.go index 409f180393..1ad25d823f 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -5,7 +5,6 @@ import ( "sync" "github.com/ethersphere/swarm/file" - "github.com/ethersphere/swarm/log" ) // defines the boundaries of the hashing job and also contains the hash factory functino of the job @@ -31,10 +30,8 @@ func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams { hashFunc: hashFunc, } h.Reset() - log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) p.writerPool.New = func() interface{} { hf := p.hashFunc(p.ctx) - //log.Trace("param new hasher", "h", hf) return hf } p.Spans = generateSpanSizes(p.Branches, 9) From 93bdad95f68df8c4df10be69630bb452726837d5 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 12 Feb 2020 16:23:00 +0100 Subject: [PATCH 4/7] file: Add comments --- file/hasher/reference.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/file/hasher/reference.go b/file/hasher/reference.go index ad67ae0ca1..0d1831d7ef 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -16,6 +16,8 @@ type ReferenceHasher struct { } // NewReferenceHasher constructs and returns a new ReferenceHasher +// This implementation is limited to a tree of 9 levels, where level 0 is the data level +// With 32 section size and 128 branches this means a capacity of 4096 bytes * (128^(9-1)) func NewReferenceHasher(params *treeParams) *ReferenceHasher { // TODO: remove when bmt interface is amended h := params.GetWriter() From d603c6deb6dc6486a3172a4ffe0ea02822d4b4fa Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 20 Feb 2020 10:03:27 +0100 Subject: [PATCH 5/7] file: Elaborate comments, remove redundant loglines, var rename --- file/hasher/param.go | 2 +- file/hasher/reference.go | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/file/hasher/param.go b/file/hasher/param.go index 1ad25d823f..6de12f1065 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -7,7 +7,7 @@ import ( "github.com/ethersphere/swarm/file" ) -// defines the boundaries of the hashing job and also contains the hash factory functino of the job +// defines the boundaries of the hashing job and also contains the hash factory function of the job // setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) type treeParams struct { SectionSize int diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 0d1831d7ef..638aa70b1e 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -2,7 +2,6 @@ package hasher import ( "github.com/ethersphere/swarm/file" - "github.com/ethersphere/swarm/log" ) // ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm @@ -17,7 +16,8 @@ type ReferenceHasher struct { // NewReferenceHasher constructs and returns a new ReferenceHasher // This implementation is limited to a tree of 9 levels, where level 0 is the data level -// With 32 section size and 128 branches this means a capacity of 4096 bytes * (128^(9-1)) +// With 32 section size and 128 branches (i.e. unencrypted, non erasure-coded content) this means +// a capacity of 4096 bytes * (128^(9-1)) ~ 295.148 * (10^18) bytes func NewReferenceHasher(params *treeParams) *ReferenceHasher { // TODO: remove when bmt interface is amended h := params.GetWriter() @@ -39,9 +39,6 @@ func (r *ReferenceHasher) Hash(data []byte) []byte { } r.update(0, data[i:i+l]) } - for i := 0; i < 9; i++ { - log.Trace("cursor", "lvl", i, "pos", r.cursors[i]) - } return r.digest() } @@ -67,11 +64,11 @@ func (r *ReferenceHasher) sum(lvl int) []byte { spanSize := r.params.Spans[lvl] * r.params.ChunkSize span := (r.length-1)%spanSize + 1 - toSumSize := r.cursors[lvl] - r.cursors[lvl+1] + sizeToSum := r.cursors[lvl] - r.cursors[lvl+1] r.hasher.Reset() r.hasher.SetSpan(span) - r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) + r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+sizeToSum]) ref := r.hasher.Sum(nil) return ref } @@ -101,7 +98,6 @@ func (r *ReferenceHasher) digest() []byte { if r.counts[i] > 0 { // TODO: simplify if possible if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 { - log.Trace("skip") r.cursors[i+1] = r.cursors[i] r.cursors[i] = r.cursors[i-1] continue From 028aa1e3ded6bfa1adc1c3605407aff65b67eb53 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 24 Feb 2020 06:07:21 +0100 Subject: [PATCH 6/7] file: Split up digest function, add explanations --- file/hasher/reference.go | 41 ++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 638aa70b1e..4dd6e20534 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -78,14 +78,46 @@ func (r *ReferenceHasher) sum(lvl int) []byte { // skips intermediate levels that end on span boundary func (r *ReferenceHasher) digest() []byte { - // if we did not end on a chunk boundary, the last chunk hasn't been hashed - // we need to do this first + // if we didn't end on a chunk boundary we need to hash remaining chunks first + r.hashUnfinished() + + // if the already hashed parts tree is balanced + r.moveDanglingChunk() + + // the first section of the buffer will hold the root hash + return r.buffer[:r.params.SectionSize] +} + +// hashes the remaining unhashed chunks at the end of each level +func (r *ReferenceHasher) hashUnfinished() { if r.length%r.params.ChunkSize != 0 { ref := r.sum(0) copy(r.buffer[r.cursors[1]:], ref) r.cursors[1] += len(ref) r.cursors[0] = r.cursors[1] } +} + +// in case of a balanced tree this method concatenates the reference to the single reference +// at the highest level of the tree. +// +// Let F be full chunks (disregarding branching factor) and S be single references +// in the following scenario: +// +// S +// F F +// F F F +// F F F F S +// +// The result will be: +// +// SS +// F F +// F F F +// F F F F +// +// After which the SS will be hashed to obtain the final root hash +func (r *ReferenceHasher) moveDanglingChunk() { // calculate the total number of levels needed to represent the data (including the data level) targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches) @@ -93,7 +125,7 @@ func (r *ReferenceHasher) digest() []byte { // sum every intermediate level and write to the level above it for i := 1; i < targetLevel; i++ { - // if the tree is balanced or if there is a single reference outside a balanced tree on this level + // and if there is a single reference outside a balanced tree on this level // don't hash it again but pass it on to the next level if r.counts[i] > 0 { // TODO: simplify if possible @@ -109,7 +141,4 @@ func (r *ReferenceHasher) digest() []byte { r.cursors[i+1] += len(ref) r.cursors[i] = r.cursors[i+1] } - - // the first section of the buffer will hold the root hash - return r.buffer[:r.params.SectionSize] } From fe7ddee7badeb1ef3ea48dea2b043e11f25fcf7d Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 24 Feb 2020 06:20:17 +0100 Subject: [PATCH 7/7] file: Purify digest method --- file/hasher/reference.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 4dd6e20534..0ceb570ee8 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -39,6 +39,13 @@ func (r *ReferenceHasher) Hash(data []byte) []byte { } r.update(0, data[i:i+l]) } + + // if we didn't end on a chunk boundary we need to hash remaining chunks first + r.hashUnfinished() + + // if the already hashed parts tree is balanced + r.moveDanglingChunk() + return r.digest() } @@ -78,12 +85,6 @@ func (r *ReferenceHasher) sum(lvl int) []byte { // skips intermediate levels that end on span boundary func (r *ReferenceHasher) digest() []byte { - // if we didn't end on a chunk boundary we need to hash remaining chunks first - r.hashUnfinished() - - // if the already hashed parts tree is balanced - r.moveDanglingChunk() - // the first section of the buffer will hold the root hash return r.buffer[:r.params.SectionSize] }