From 42b1887d9cda8dd408cfe42646d044dfc75c6626 Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Tue, 11 Feb 2020 14:46:02 +0100
Subject: [PATCH 1/7] file, testutil: Add reference file hasher

---
 file/hasher/common_test.go    | 269 ++++++++++++++++++++++++++++++++++
 file/hasher/hasher.go         |   2 +-
 file/hasher/hasher_test.go    |   2 +-
 file/hasher/param.go          |  59 ++++++++
 file/hasher/reference.go      | 117 +++++++++++++++
 file/hasher/reference_test.go | 140 ++++++++++++++++++
 file/hasher/util.go           |  58 ++++++++
 file/hasher/util_test.go      |  90 ++++++++++++
 testutil/data.go              |  15 ++
 9 files changed, 750 insertions(+), 2 deletions(-)
 create mode 100644 file/hasher/common_test.go
 create mode 100644 file/hasher/param.go
 create mode 100644 file/hasher/reference.go
 create mode 100644 file/hasher/reference_test.go
 create mode 100644 file/hasher/util.go
 create mode 100644 file/hasher/util_test.go
 create mode 100644 testutil/data.go

diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go
new file mode 100644
index 0000000000..07b5db9682
--- /dev/null
+++ b/file/hasher/common_test.go
@@ -0,0 +1,269 @@
+package hasher
+
+import (
+	"bytes"
+	"context"
+	"encoding/binary"
+	"hash"
+	"sync"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common/hexutil"
+	"github.com/ethersphere/swarm/file"
+	"github.com/ethersphere/swarm/log"
+	"github.com/ethersphere/swarm/testutil"
+	"golang.org/x/crypto/sha3"
+)
+
+const (
+	sectionSize = 32
+	branches    = 128
+	chunkSize   = 4096
+	zeroHex     = "0000000000000000000000000000000000000000000000000000000000000000"
+)
+
+var (
+	dataLengths = []int{31, // 0
+		32,                     // 1
+		33,                     // 2
+		63,                     // 3
+		64,                     // 4
+		65,                     // 5
+		chunkSize,              // 6
+		chunkSize + 31,         // 7
+		chunkSize + 32,         // 8
+		chunkSize + 63,         // 9
+		chunkSize + 64,         // 10
+		chunkSize * 2,          // 11
+		chunkSize*2 + 32,       // 12
+		chunkSize * 128,        // 13
+		chunkSize*128 + 31,     // 14
+		chunkSize*128 + 32,     // 15
+		chunkSize*128 + 64,     // 16
+		chunkSize * 129,        // 17
+		chunkSize * 130,        // 18
+		chunkSize * 128 * 128,  // 19
+		chunkSize*128*128 + 32, // 20
+	}
+	expected = []string{
+		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0
+		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1
+		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2
+		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3
+		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4
+		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5
+		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6
+		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7
+		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8
+		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9
+		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10
+		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11
+		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12
+		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13
+		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14
+		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15
+		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16
+		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17
+		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18
+		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19
+		"ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20
+	}
+
+	start = 0
+	end   = len(dataLengths)
+)
+
+func init() {
+	testutil.Init()
+}
+
+var (
+	dummyHashFunc = func(_ context.Context) file.SectionWriter {
+		return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches)
+	}
+
+	// placeholder for cases where a hasher is not necessary
+	noHashFunc = func(_ context.Context) file.SectionWriter {
+		return nil
+	}
+
+	logErrFunc = func(err error) {
+		log.Error("SectionWriter pipeline error", "err", err)
+	}
+)
+
+// simple param.SectionWriter hasher that keeps the data written to it
+// for later inspection
+// TODO: see if this can be replaced with the fake hasher from storage module
+type dummySectionWriter struct {
+	sectionSize int
+	digestSize  int
+	branches    int
+	data        []byte
+	digest      []byte
+	size        int
+	span        []byte
+	summed      bool
+	index       int
+	writer      hash.Hash
+	mu          sync.Mutex
+	wg          sync.WaitGroup
+}
+
+func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter {
+	return &dummySectionWriter{
+		sectionSize: sectionSize,
+		digestSize:  digestSize,
+		branches:    branches,
+		data:        make([]byte, cp),
+		writer:      sha3.NewLegacyKeccak256(),
+		digest:      make([]byte, digestSize),
+	}
+}
+
+func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) {
+}
+
+func (d *dummySectionWriter) SetWriter(_ file.SectionWriterFunc) file.SectionWriter {
+	log.Error("dummySectionWriter does not support SectionWriter chaining")
+	return d
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) SeekSection(offset int) {
+	d.index = offset * d.SectionSize()
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) SetLength(length int) {
+	d.size = length
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) SetSpan(length int) {
+	d.span = make([]byte, 8)
+	binary.LittleEndian.PutUint64(d.span, uint64(length))
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) Write(data []byte) (int, error) {
+	d.mu.Lock()
+	copy(d.data[d.index:], data)
+	d.size += len(data)
+	log.Trace("dummywriter write", "index", d.index, "size", d.size, "threshold", d.sectionSize*d.branches)
+	if d.isFull() {
+		d.summed = true
+		d.mu.Unlock()
+		d.sum()
+	} else {
+		d.mu.Unlock()
+	}
+	return len(data), nil
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) Sum(_ []byte) []byte {
+	log.Trace("dummy Sumcall", "size", d.size)
+	d.mu.Lock()
+	if !d.summed {
+		d.summed = true
+		d.mu.Unlock()
+		d.sum()
+	} else {
+		d.mu.Unlock()
+	}
+	return d.digest
+}
+
+func (d *dummySectionWriter) sum() {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	d.writer.Write(d.span)
+	log.Trace("dummy sum writing span", "span", d.span)
+	for i := 0; i < d.size; i += d.writer.Size() {
+		sectionData := d.data[i : i+d.writer.Size()]
+		log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size)
+		d.writer.Write(sectionData)
+	}
+	copy(d.digest, d.writer.Sum(nil))
+	log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest))
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) Reset() {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	d.data = make([]byte, len(d.data))
+	d.digest = make([]byte, d.digestSize)
+	d.size = 0
+	d.summed = false
+	d.span = nil
+	d.writer.Reset()
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) BlockSize() int {
+	return d.sectionSize
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) SectionSize() int {
+	return d.sectionSize
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) Size() int {
+	return d.sectionSize
+}
+
+// implements param.SectionWriter
+func (d *dummySectionWriter) Branches() int {
+	return d.branches
+}
+
+func (d *dummySectionWriter) isFull() bool {
+	return d.size == d.sectionSize*d.branches
+}
+
+// TestDummySectionWriter
+func TestDummySectionWriter(t *testing.T) {
+
+	w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches)
+	w.Reset()
+
+	_, data := testutil.SerialData(sectionSize*2, 255, 0)
+
+	w.SeekSection(branches)
+	w.Write(data[:sectionSize])
+	w.SeekSection(branches + 1)
+	w.Write(data[sectionSize:])
+	if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) {
+		t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data)
+	}
+
+	correctDigestHex := "0x52eefd0c37895a8845d4a6cf6c6b56980e448376e55eb45717663ab7b3fc8d53"
+	w.SetLength(chunkSize * 2)
+	w.SetSpan(chunkSize * 2)
+	digest := w.Sum(nil)
+	digestHex := hexutil.Encode(digest)
+	if digestHex != correctDigestHex {
+		t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex)
+	}
+
+	w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2)
+	w.Reset()
+	w.SeekSection(branches / 2)
+	w.Write(data)
+	if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) {
+		t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data)
+	}
+
+	correctDigestHex += zeroHex
+	w.SetLength(chunkSize * 2)
+	w.SetSpan(chunkSize * 2)
+	digest = w.Sum(nil)
+	digestHex = hexutil.Encode(digest)
+	if digestHex != correctDigestHex {
+		t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex)
+	}
+}
diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go
index 9478fb79b8..5cebba192f 100644
--- a/file/hasher/hasher.go
+++ b/file/hasher/hasher.go
@@ -14,7 +14,7 @@
 // You should have received a copy of the GNU Lesser General Public License
 // along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.
 
-package file
+package hasher
 
 import (
 	"context"
diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go
index babb981ef3..91ca296d81 100644
--- a/file/hasher/hasher_test.go
+++ b/file/hasher/hasher_test.go
@@ -14,7 +14,7 @@
 // You should have received a copy of the GNU Lesser General Public License
 // along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.
 
-package file
+package hasher
 
 import (
 	"bytes"
diff --git a/file/hasher/param.go b/file/hasher/param.go
new file mode 100644
index 0000000000..409f180393
--- /dev/null
+++ b/file/hasher/param.go
@@ -0,0 +1,59 @@
+package hasher
+
+import (
+	"context"
+	"sync"
+
+	"github.com/ethersphere/swarm/file"
+	"github.com/ethersphere/swarm/log"
+)
+
+// defines the boundaries of the hashing job and also contains the hash factory functino of the job
+// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start)
+type treeParams struct {
+	SectionSize int
+	Branches    int
+	ChunkSize   int
+	Spans       []int
+	Debug       bool
+	hashFunc    file.SectionWriterFunc
+	writerPool  sync.Pool
+	ctx         context.Context
+}
+
+func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams {
+
+	h := hashFunc(context.Background())
+	p := &treeParams{
+		SectionSize: h.SectionSize(),
+		Branches:    h.Branches(),
+		ChunkSize:   h.SectionSize() * h.Branches(),
+		hashFunc:    hashFunc,
+	}
+	h.Reset()
+	log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize)
+	p.writerPool.New = func() interface{} {
+		hf := p.hashFunc(p.ctx)
+		//log.Trace("param new hasher", "h", hf)
+		return hf
+	}
+	p.Spans = generateSpanSizes(p.Branches, 9)
+	return p
+}
+
+func (p *treeParams) SetContext(ctx context.Context) {
+	p.ctx = ctx
+}
+
+func (p *treeParams) GetContext() context.Context {
+	return p.ctx
+}
+
+func (p *treeParams) PutWriter(w file.SectionWriter) {
+	w.Reset()
+	p.writerPool.Put(w)
+}
+
+func (p *treeParams) GetWriter() file.SectionWriter {
+	return p.writerPool.Get().(file.SectionWriter)
+}
diff --git a/file/hasher/reference.go b/file/hasher/reference.go
new file mode 100644
index 0000000000..ad67ae0ca1
--- /dev/null
+++ b/file/hasher/reference.go
@@ -0,0 +1,117 @@
+package hasher
+
+import (
+	"github.com/ethersphere/swarm/file"
+	"github.com/ethersphere/swarm/log"
+)
+
+// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm
+type ReferenceHasher struct {
+	params  *treeParams
+	cursors []int              // section write position, indexed per level
+	length  int                // number of bytes written to the data level of the hasher
+	buffer  []byte             // keeps data and hashes, indexed by cursors
+	counts  []int              // number of sums performed, indexed per level
+	hasher  file.SectionWriter // underlying hasher
+}
+
+// NewReferenceHasher constructs and returns a new ReferenceHasher
+func NewReferenceHasher(params *treeParams) *ReferenceHasher {
+	// TODO: remove when bmt interface is amended
+	h := params.GetWriter()
+	return &ReferenceHasher{
+		params:  params,
+		cursors: make([]int, 9),
+		counts:  make([]int, 9),
+		buffer:  make([]byte, params.ChunkSize*9),
+		hasher:  h,
+	}
+}
+
+// Hash computes and returns the root hash of arbitrary data
+func (r *ReferenceHasher) Hash(data []byte) []byte {
+	l := r.params.ChunkSize
+	for i := 0; i < len(data); i += r.params.ChunkSize {
+		if len(data)-i < r.params.ChunkSize {
+			l = len(data) - i
+		}
+		r.update(0, data[i:i+l])
+	}
+	for i := 0; i < 9; i++ {
+		log.Trace("cursor", "lvl", i, "pos", r.cursors[i])
+	}
+	return r.digest()
+}
+
+// write to the data buffer on the specified level
+// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash
+// adjusts cursors accordingly
+func (r *ReferenceHasher) update(lvl int, data []byte) {
+	if lvl == 0 {
+		r.length += len(data)
+	}
+	copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data)
+	r.cursors[lvl] += len(data)
+	if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize {
+		ref := r.sum(lvl)
+		r.update(lvl+1, ref)
+		r.cursors[lvl] = r.cursors[lvl+1]
+	}
+}
+
+// calculates and returns the bmt sum of the last written data on the level
+func (r *ReferenceHasher) sum(lvl int) []byte {
+	r.counts[lvl]++
+	spanSize := r.params.Spans[lvl] * r.params.ChunkSize
+	span := (r.length-1)%spanSize + 1
+
+	toSumSize := r.cursors[lvl] - r.cursors[lvl+1]
+
+	r.hasher.Reset()
+	r.hasher.SetSpan(span)
+	r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize])
+	ref := r.hasher.Sum(nil)
+	return ref
+}
+
+// called after all data has been written
+// sums the final chunks of each level
+// skips intermediate levels that end on span boundary
+func (r *ReferenceHasher) digest() []byte {
+
+	// if we did not end on a chunk boundary, the last chunk hasn't been hashed
+	// we need to do this first
+	if r.length%r.params.ChunkSize != 0 {
+		ref := r.sum(0)
+		copy(r.buffer[r.cursors[1]:], ref)
+		r.cursors[1] += len(ref)
+		r.cursors[0] = r.cursors[1]
+	}
+
+	// calculate the total number of levels needed to represent the data (including the data level)
+	targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches)
+
+	// sum every intermediate level and write to the level above it
+	for i := 1; i < targetLevel; i++ {
+
+		// if the tree is balanced or if there is a single reference outside a balanced tree on this level
+		// don't hash it again but pass it on to the next level
+		if r.counts[i] > 0 {
+			// TODO: simplify if possible
+			if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 {
+				log.Trace("skip")
+				r.cursors[i+1] = r.cursors[i]
+				r.cursors[i] = r.cursors[i-1]
+				continue
+			}
+		}
+
+		ref := r.sum(i)
+		copy(r.buffer[r.cursors[i+1]:], ref)
+		r.cursors[i+1] += len(ref)
+		r.cursors[i] = r.cursors[i+1]
+	}
+
+	// the first section of the buffer will hold the root hash
+	return r.buffer[:r.params.SectionSize]
+}
diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go
new file mode 100644
index 0000000000..d4deef5c0b
--- /dev/null
+++ b/file/hasher/reference_test.go
@@ -0,0 +1,140 @@
+package hasher
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common/hexutil"
+	"github.com/ethersphere/swarm/bmt"
+	"github.com/ethersphere/swarm/file"
+	"github.com/ethersphere/swarm/log"
+	"github.com/ethersphere/swarm/testutil"
+	"golang.org/x/crypto/sha3"
+)
+
+// TestManualDanglingChunk is a test script explicitly hashing and writing every individual level in the dangling chunk edge case
+// we use a balanced tree with data size of chunkSize*branches, and a single chunk of data
+// this case is chosen because it produces the wrong result in the pyramid hasher at the time of writing (master commit hash 4928d989ebd0854d993c10c194e61a5a5455e4f9)
+func TestManualDanglingChunk(t *testing.T) {
+	pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize)
+	h := bmt.New(pool)
+
+	// to execute the job we need buffers with the following capacities:
+	// level 0: chunkSize*branches+chunkSize
+	// level 1: chunkSize
+	// level 2: sectionSize * 2
+	var levels [][]byte
+	levels = append(levels, nil)
+	levels = append(levels, make([]byte, chunkSize))
+	levels = append(levels, make([]byte, sectionSize*2))
+
+	// hash the balanced tree portion of the data level and write to level 1
+	_, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0)
+	for i := 0; i < chunkSize*branches; i += chunkSize {
+		h.Reset()
+		h.SetSpan(chunkSize)
+		h.Write(levels[0][i : i+chunkSize])
+		copy(levels[1][i/branches:], h.Sum(nil))
+	}
+	refHex := hexutil.Encode(levels[1][:sectionSize])
+	correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef"
+	if refHex != correctRefHex {
+		t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex)
+	}
+
+	// write the dangling chunk
+	// hash it and write the reference on the second section of level 2
+	h.Reset()
+	h.SetSpan(chunkSize)
+	h.Write(levels[0][chunkSize*branches:])
+	copy(levels[2][sectionSize:], h.Sum(nil))
+	refHex = hexutil.Encode(levels[2][sectionSize:])
+	correctRefHex = "0x81b31d9a7f6c377523e8769db021091df23edd9fd7bd6bcdf11a22f518db6006"
+	if refHex != correctRefHex {
+		t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex)
+	}
+
+	// hash the chunk on level 1 and write into the first section of level 2
+	h.Reset()
+	h.SetSpan(chunkSize * branches)
+	h.Write(levels[1])
+	copy(levels[2], h.Sum(nil))
+	refHex = hexutil.Encode(levels[2][:sectionSize])
+	correctRefHex = "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09"
+	if refHex != correctRefHex {
+		t.Fatalf("manual dangling balanced tree; expected %s, got %s", correctRefHex, refHex)
+	}
+
+	// hash the two sections on level 2 to obtain the root hash
+	h.Reset()
+	h.SetSpan(chunkSize*branches + chunkSize)
+	h.Write(levels[2])
+	ref := h.Sum(nil)
+	refHex = hexutil.Encode(ref)
+	correctRefHex = "0xb8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199"
+	if refHex != correctRefHex {
+		t.Fatalf("manual dangling root; expected %s, got %s", correctRefHex, refHex)
+	}
+}
+
+// TestReferenceFileHasherVector executes the file hasher algorithms on serial input data of periods of 0-254
+// of lengths defined in common_test.go
+//
+// the "expected" array in common_test.go is generated by this implementation, and test failure due to
+// result mismatch is nothing else than an indication that something has changed in the reference filehasher
+// or the underlying hashing algorithm
+func TestReferenceHasherVector(t *testing.T) {
+
+	hashFunc := func(_ context.Context) file.SectionWriter {
+		pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize)
+		return bmt.New(pool)
+	}
+	params := newTreeParams(hashFunc)
+	var mismatch int
+	for i := start; i < end; i++ {
+		dataLength := dataLengths[i]
+		log.Info("start", "i", i, "len", dataLength)
+		rh := NewReferenceHasher(params)
+		_, data := testutil.SerialData(dataLength, 255, 0)
+		refHash := rh.Hash(data)
+		eq := true
+		if expected[i] != fmt.Sprintf("%x", refHash) {
+			mismatch++
+			eq = false
+		}
+		t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
+	}
+	if mismatch > 0 {
+		t.Fatalf("mismatches: %d/%d", mismatch, end-start)
+	}
+}
+
+// BenchmarkReferenceHasher establishes a baseline for a fully synchronous file hashing operation
+// it will be vastly inefficient
+func BenchmarkReferenceHasher(b *testing.B) {
+	for i := start; i < end; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceHasher)
+	}
+}
+
+func benchmarkReferenceHasher(b *testing.B) {
+	benchParams := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(benchParams[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	hashFunc := func(_ context.Context) file.SectionWriter {
+		pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize)
+		return bmt.New(pool)
+	}
+	params := newTreeParams(hashFunc)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, data := testutil.SerialData(int(dataLength), 255, 0)
+		fh := NewReferenceHasher(params)
+		fh.Hash(data)
+	}
+}
diff --git a/file/hasher/util.go b/file/hasher/util.go
new file mode 100644
index 0000000000..8dd8b4a27f
--- /dev/null
+++ b/file/hasher/util.go
@@ -0,0 +1,58 @@
+package hasher
+
+import (
+	"math"
+)
+
+// TODO: level 0 should be SectionSize() not Branches()
+// generates a dictionary of maximum span lengths per level represented by one SectionSize() of data
+func generateSpanSizes(branches int, levels int) []int {
+	spans := make([]int, levels)
+	span := 1
+	for i := 0; i < 9; i++ {
+		spans[i] = span
+		span *= branches
+	}
+	return spans
+}
+
+// calculates the section index of the given byte size
+func dataSizeToSectionIndex(length int, sectionSize int) int {
+	return (length - 1) / sectionSize
+}
+
+// calculates the section count of the given byte size
+func dataSizeToSectionCount(length int, sectionSize int) int {
+	return dataSizeToSectionIndex(length, sectionSize) + 1
+}
+
+// calculates the corresponding level section for a data section
+func dataSectionToLevelSection(p *treeParams, lvl int, sections int) int {
+	span := p.Spans[lvl]
+	return sections / span
+}
+
+// calculates the lower data section boundary of a level for which a data section is contained
+// the higher level use is to determine whether the final data section written falls within
+// a certain level's span
+func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int {
+	span := p.Spans[lvl+1]
+	spans := section / span
+	spanBytes := spans * span
+	//log.Trace("levelboundary", "spans", spans, "section", section, "span", span)
+	return spanBytes
+}
+
+// TODO: use params instead of sectionSize, branches
+// calculate the last level index which a particular data section count will result in.
+// the returned level will be the level of the root hash
+func getLevelsFromLength(l int, sectionSize int, branches int) int {
+	if l == 0 {
+		return 0
+	} else if l <= sectionSize*branches {
+		return 1
+	}
+	c := (l - 1) / (sectionSize)
+
+	return int(math.Log(float64(c))/math.Log(float64(branches)) + 1)
+}
diff --git a/file/hasher/util_test.go b/file/hasher/util_test.go
new file mode 100644
index 0000000000..f364a453b1
--- /dev/null
+++ b/file/hasher/util_test.go
@@ -0,0 +1,90 @@
+package hasher
+
+import "testing"
+
+// TestLevelsFromLength verifies getLevelsFromLength
+func TestLevelsFromLength(t *testing.T) {
+
+	sizes := []int{sectionSize, chunkSize, chunkSize + sectionSize, chunkSize * branches, chunkSize*branches + 1}
+	expects := []int{1, 1, 2, 2, 3}
+
+	for i, size := range sizes {
+		lvl := getLevelsFromLength(size, sectionSize, branches)
+		if expects[i] != lvl {
+			t.Fatalf("size %d, expected %d, got %d", size, expects[i], lvl)
+		}
+	}
+}
+
+// TestDataSizeToSection verifies testDataSizeToSectionIndex
+func TestDataSizeToSectionIndex(t *testing.T) {
+
+	sizes := []int{chunkSize - 1, chunkSize, chunkSize + 1}
+	expects := []int{branches - 1, branches - 1, branches}
+
+	for j, size := range sizes {
+		r := dataSizeToSectionIndex(size, sectionSize)
+		expect := expects[j]
+		if expect != r {
+			t.Fatalf("size %d section %d: expected %d, got %d", size, sectionSize, expect, r)
+		}
+	}
+
+}
+
+// TestsDataSectionToLevelSection verifies dataSectionToLevelSection
+func TestDataSectionToLevelSection(t *testing.T) {
+
+	params := newTreeParams(dummyHashFunc)
+	sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches}
+	levels := []int{1, 2}
+	expects := []int{
+		0, 0, 1, 1, 2, 2, 128,
+		0, 0, 0, 0, 0, 0, 1,
+	}
+
+	for i, lvl := range levels {
+		for j, section := range sections {
+			r := dataSectionToLevelSection(params, lvl, section)
+			k := i*len(sections) + j
+			expect := expects[k]
+			if expect != r {
+				t.Fatalf("levelsection size %d level %d: expected %d, got %d", section, lvl, expect, r)
+			}
+		}
+	}
+}
+
+// TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary
+func TestDataSectionToLevelBoundary(t *testing.T) {
+	params := newTreeParams(dummyHashFunc)
+	size := chunkSize*branches + chunkSize*2
+	section := dataSizeToSectionIndex(size, sectionSize)
+	lvl := 1
+	expect := branches * branches
+
+	r := dataSectionToLevelBoundary(params, lvl, section)
+	if expect != r {
+		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
+	}
+
+	size = chunkSize*branches*branches + chunkSize*2
+	section = dataSizeToSectionIndex(size, sectionSize)
+	lvl = 1
+	expect = branches * branches * branches
+
+	r = dataSectionToLevelBoundary(params, lvl, section)
+	if expect != r {
+		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
+	}
+
+	size = chunkSize*branches + chunkSize*2
+	section = dataSizeToSectionIndex(size, sectionSize)
+	lvl = 2
+	expect = 0
+
+	r = dataSectionToLevelBoundary(params, lvl, section)
+	if expect != r {
+		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
+	}
+}
diff --git a/testutil/data.go b/testutil/data.go
new file mode 100644
index 0000000000..f3bea59e91
--- /dev/null
+++ b/testutil/data.go
@@ -0,0 +1,15 @@
+package testutil
+
+import (
+	"bytes"
+	"io"
+)
+
+func SerialData(l int, mod int, offset int) (r io.Reader, slice []byte) {
+	slice = make([]byte, l)
+	for i := 0; i < len(slice); i++ {
+		slice[i] = byte((i + offset) % mod)
+	}
+	r = io.LimitReader(bytes.NewReader(slice), int64(l))
+	return
+}

From 457b5693973c405afc5081854e980c13606fe2f3 Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Tue, 11 Feb 2020 14:51:02 +0100
Subject: [PATCH 2/7] file: Remove premature code

---
 file/hasher/common_test.go | 202 -------------------------------------
 file/hasher/util.go        |  27 -----
 file/hasher/util_test.go   |  73 --------------
 3 files changed, 302 deletions(-)

diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go
index 07b5db9682..feff56526f 100644
--- a/file/hasher/common_test.go
+++ b/file/hasher/common_test.go
@@ -1,18 +1,7 @@
 package hasher
 
 import (
-	"bytes"
-	"context"
-	"encoding/binary"
-	"hash"
-	"sync"
-	"testing"
-
-	"github.com/ethereum/go-ethereum/common/hexutil"
-	"github.com/ethersphere/swarm/file"
-	"github.com/ethersphere/swarm/log"
 	"github.com/ethersphere/swarm/testutil"
-	"golang.org/x/crypto/sha3"
 )
 
 const (
@@ -76,194 +65,3 @@ var (
 func init() {
 	testutil.Init()
 }
-
-var (
-	dummyHashFunc = func(_ context.Context) file.SectionWriter {
-		return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches)
-	}
-
-	// placeholder for cases where a hasher is not necessary
-	noHashFunc = func(_ context.Context) file.SectionWriter {
-		return nil
-	}
-
-	logErrFunc = func(err error) {
-		log.Error("SectionWriter pipeline error", "err", err)
-	}
-)
-
-// simple param.SectionWriter hasher that keeps the data written to it
-// for later inspection
-// TODO: see if this can be replaced with the fake hasher from storage module
-type dummySectionWriter struct {
-	sectionSize int
-	digestSize  int
-	branches    int
-	data        []byte
-	digest      []byte
-	size        int
-	span        []byte
-	summed      bool
-	index       int
-	writer      hash.Hash
-	mu          sync.Mutex
-	wg          sync.WaitGroup
-}
-
-func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter {
-	return &dummySectionWriter{
-		sectionSize: sectionSize,
-		digestSize:  digestSize,
-		branches:    branches,
-		data:        make([]byte, cp),
-		writer:      sha3.NewLegacyKeccak256(),
-		digest:      make([]byte, digestSize),
-	}
-}
-
-func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) {
-}
-
-func (d *dummySectionWriter) SetWriter(_ file.SectionWriterFunc) file.SectionWriter {
-	log.Error("dummySectionWriter does not support SectionWriter chaining")
-	return d
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) SeekSection(offset int) {
-	d.index = offset * d.SectionSize()
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) SetLength(length int) {
-	d.size = length
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) SetSpan(length int) {
-	d.span = make([]byte, 8)
-	binary.LittleEndian.PutUint64(d.span, uint64(length))
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) Write(data []byte) (int, error) {
-	d.mu.Lock()
-	copy(d.data[d.index:], data)
-	d.size += len(data)
-	log.Trace("dummywriter write", "index", d.index, "size", d.size, "threshold", d.sectionSize*d.branches)
-	if d.isFull() {
-		d.summed = true
-		d.mu.Unlock()
-		d.sum()
-	} else {
-		d.mu.Unlock()
-	}
-	return len(data), nil
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) Sum(_ []byte) []byte {
-	log.Trace("dummy Sumcall", "size", d.size)
-	d.mu.Lock()
-	if !d.summed {
-		d.summed = true
-		d.mu.Unlock()
-		d.sum()
-	} else {
-		d.mu.Unlock()
-	}
-	return d.digest
-}
-
-func (d *dummySectionWriter) sum() {
-	d.mu.Lock()
-	defer d.mu.Unlock()
-	d.writer.Write(d.span)
-	log.Trace("dummy sum writing span", "span", d.span)
-	for i := 0; i < d.size; i += d.writer.Size() {
-		sectionData := d.data[i : i+d.writer.Size()]
-		log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size)
-		d.writer.Write(sectionData)
-	}
-	copy(d.digest, d.writer.Sum(nil))
-	log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest))
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) Reset() {
-	d.mu.Lock()
-	defer d.mu.Unlock()
-	d.data = make([]byte, len(d.data))
-	d.digest = make([]byte, d.digestSize)
-	d.size = 0
-	d.summed = false
-	d.span = nil
-	d.writer.Reset()
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) BlockSize() int {
-	return d.sectionSize
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) SectionSize() int {
-	return d.sectionSize
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) Size() int {
-	return d.sectionSize
-}
-
-// implements param.SectionWriter
-func (d *dummySectionWriter) Branches() int {
-	return d.branches
-}
-
-func (d *dummySectionWriter) isFull() bool {
-	return d.size == d.sectionSize*d.branches
-}
-
-// TestDummySectionWriter
-func TestDummySectionWriter(t *testing.T) {
-
-	w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches)
-	w.Reset()
-
-	_, data := testutil.SerialData(sectionSize*2, 255, 0)
-
-	w.SeekSection(branches)
-	w.Write(data[:sectionSize])
-	w.SeekSection(branches + 1)
-	w.Write(data[sectionSize:])
-	if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) {
-		t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data)
-	}
-
-	correctDigestHex := "0x52eefd0c37895a8845d4a6cf6c6b56980e448376e55eb45717663ab7b3fc8d53"
-	w.SetLength(chunkSize * 2)
-	w.SetSpan(chunkSize * 2)
-	digest := w.Sum(nil)
-	digestHex := hexutil.Encode(digest)
-	if digestHex != correctDigestHex {
-		t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex)
-	}
-
-	w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2)
-	w.Reset()
-	w.SeekSection(branches / 2)
-	w.Write(data)
-	if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) {
-		t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data)
-	}
-
-	correctDigestHex += zeroHex
-	w.SetLength(chunkSize * 2)
-	w.SetSpan(chunkSize * 2)
-	digest = w.Sum(nil)
-	digestHex = hexutil.Encode(digest)
-	if digestHex != correctDigestHex {
-		t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex)
-	}
-}
diff --git a/file/hasher/util.go b/file/hasher/util.go
index 8dd8b4a27f..141fd1d114 100644
--- a/file/hasher/util.go
+++ b/file/hasher/util.go
@@ -16,33 +16,6 @@ func generateSpanSizes(branches int, levels int) []int {
 	return spans
 }
 
-// calculates the section index of the given byte size
-func dataSizeToSectionIndex(length int, sectionSize int) int {
-	return (length - 1) / sectionSize
-}
-
-// calculates the section count of the given byte size
-func dataSizeToSectionCount(length int, sectionSize int) int {
-	return dataSizeToSectionIndex(length, sectionSize) + 1
-}
-
-// calculates the corresponding level section for a data section
-func dataSectionToLevelSection(p *treeParams, lvl int, sections int) int {
-	span := p.Spans[lvl]
-	return sections / span
-}
-
-// calculates the lower data section boundary of a level for which a data section is contained
-// the higher level use is to determine whether the final data section written falls within
-// a certain level's span
-func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int {
-	span := p.Spans[lvl+1]
-	spans := section / span
-	spanBytes := spans * span
-	//log.Trace("levelboundary", "spans", spans, "section", section, "span", span)
-	return spanBytes
-}
-
 // TODO: use params instead of sectionSize, branches
 // calculate the last level index which a particular data section count will result in.
 // the returned level will be the level of the root hash
diff --git a/file/hasher/util_test.go b/file/hasher/util_test.go
index f364a453b1..51640e4ad5 100644
--- a/file/hasher/util_test.go
+++ b/file/hasher/util_test.go
@@ -15,76 +15,3 @@ func TestLevelsFromLength(t *testing.T) {
 		}
 	}
 }
-
-// TestDataSizeToSection verifies testDataSizeToSectionIndex
-func TestDataSizeToSectionIndex(t *testing.T) {
-
-	sizes := []int{chunkSize - 1, chunkSize, chunkSize + 1}
-	expects := []int{branches - 1, branches - 1, branches}
-
-	for j, size := range sizes {
-		r := dataSizeToSectionIndex(size, sectionSize)
-		expect := expects[j]
-		if expect != r {
-			t.Fatalf("size %d section %d: expected %d, got %d", size, sectionSize, expect, r)
-		}
-	}
-
-}
-
-// TestsDataSectionToLevelSection verifies dataSectionToLevelSection
-func TestDataSectionToLevelSection(t *testing.T) {
-
-	params := newTreeParams(dummyHashFunc)
-	sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches}
-	levels := []int{1, 2}
-	expects := []int{
-		0, 0, 1, 1, 2, 2, 128,
-		0, 0, 0, 0, 0, 0, 1,
-	}
-
-	for i, lvl := range levels {
-		for j, section := range sections {
-			r := dataSectionToLevelSection(params, lvl, section)
-			k := i*len(sections) + j
-			expect := expects[k]
-			if expect != r {
-				t.Fatalf("levelsection size %d level %d: expected %d, got %d", section, lvl, expect, r)
-			}
-		}
-	}
-}
-
-// TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary
-func TestDataSectionToLevelBoundary(t *testing.T) {
-	params := newTreeParams(dummyHashFunc)
-	size := chunkSize*branches + chunkSize*2
-	section := dataSizeToSectionIndex(size, sectionSize)
-	lvl := 1
-	expect := branches * branches
-
-	r := dataSectionToLevelBoundary(params, lvl, section)
-	if expect != r {
-		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
-	}
-
-	size = chunkSize*branches*branches + chunkSize*2
-	section = dataSizeToSectionIndex(size, sectionSize)
-	lvl = 1
-	expect = branches * branches * branches
-
-	r = dataSectionToLevelBoundary(params, lvl, section)
-	if expect != r {
-		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
-	}
-
-	size = chunkSize*branches + chunkSize*2
-	section = dataSizeToSectionIndex(size, sectionSize)
-	lvl = 2
-	expect = 0
-
-	r = dataSectionToLevelBoundary(params, lvl, section)
-	if expect != r {
-		t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r)
-	}
-}

From 65a444ed24205cb6d05b229005665b44600a04d2 Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Tue, 11 Feb 2020 15:22:18 +0100
Subject: [PATCH 3/7] file: Remove unused zeroHex and unused logs

---
 file/hasher/common_test.go | 1 -
 file/hasher/param.go       | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go
index feff56526f..bad3556420 100644
--- a/file/hasher/common_test.go
+++ b/file/hasher/common_test.go
@@ -8,7 +8,6 @@ const (
 	sectionSize = 32
 	branches    = 128
 	chunkSize   = 4096
-	zeroHex     = "0000000000000000000000000000000000000000000000000000000000000000"
 )
 
 var (
diff --git a/file/hasher/param.go b/file/hasher/param.go
index 409f180393..1ad25d823f 100644
--- a/file/hasher/param.go
+++ b/file/hasher/param.go
@@ -5,7 +5,6 @@ import (
 	"sync"
 
 	"github.com/ethersphere/swarm/file"
-	"github.com/ethersphere/swarm/log"
 )
 
 // defines the boundaries of the hashing job and also contains the hash factory functino of the job
@@ -31,10 +30,8 @@ func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams {
 		hashFunc:    hashFunc,
 	}
 	h.Reset()
-	log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize)
 	p.writerPool.New = func() interface{} {
 		hf := p.hashFunc(p.ctx)
-		//log.Trace("param new hasher", "h", hf)
 		return hf
 	}
 	p.Spans = generateSpanSizes(p.Branches, 9)

From 93bdad95f68df8c4df10be69630bb452726837d5 Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Wed, 12 Feb 2020 16:23:00 +0100
Subject: [PATCH 4/7] file: Add comments

---
 file/hasher/reference.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/file/hasher/reference.go b/file/hasher/reference.go
index ad67ae0ca1..0d1831d7ef 100644
--- a/file/hasher/reference.go
+++ b/file/hasher/reference.go
@@ -16,6 +16,8 @@ type ReferenceHasher struct {
 }
 
 // NewReferenceHasher constructs and returns a new ReferenceHasher
+// This implementation is limited to a tree of 9 levels, where level 0 is the data level
+// With 32 section size and 128 branches this means a capacity of 4096 bytes * (128^(9-1))
 func NewReferenceHasher(params *treeParams) *ReferenceHasher {
 	// TODO: remove when bmt interface is amended
 	h := params.GetWriter()

From d603c6deb6dc6486a3172a4ffe0ea02822d4b4fa Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Thu, 20 Feb 2020 10:03:27 +0100
Subject: [PATCH 5/7] file: Elaborate comments, remove redundant loglines, var
 rename

---
 file/hasher/param.go     |  2 +-
 file/hasher/reference.go | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/file/hasher/param.go b/file/hasher/param.go
index 1ad25d823f..6de12f1065 100644
--- a/file/hasher/param.go
+++ b/file/hasher/param.go
@@ -7,7 +7,7 @@ import (
 	"github.com/ethersphere/swarm/file"
 )
 
-// defines the boundaries of the hashing job and also contains the hash factory functino of the job
+// defines the boundaries of the hashing job and also contains the hash factory function of the job
 // setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start)
 type treeParams struct {
 	SectionSize int
diff --git a/file/hasher/reference.go b/file/hasher/reference.go
index 0d1831d7ef..638aa70b1e 100644
--- a/file/hasher/reference.go
+++ b/file/hasher/reference.go
@@ -2,7 +2,6 @@ package hasher
 
 import (
 	"github.com/ethersphere/swarm/file"
-	"github.com/ethersphere/swarm/log"
 )
 
 // ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm
@@ -17,7 +16,8 @@ type ReferenceHasher struct {
 
 // NewReferenceHasher constructs and returns a new ReferenceHasher
 // This implementation is limited to a tree of 9 levels, where level 0 is the data level
-// With 32 section size and 128 branches this means a capacity of 4096 bytes * (128^(9-1))
+// With 32 section size and 128 branches (i.e. unencrypted, non erasure-coded content) this means
+// a capacity of 4096 bytes * (128^(9-1)) ~ 295.148 * (10^18) bytes
 func NewReferenceHasher(params *treeParams) *ReferenceHasher {
 	// TODO: remove when bmt interface is amended
 	h := params.GetWriter()
@@ -39,9 +39,6 @@ func (r *ReferenceHasher) Hash(data []byte) []byte {
 		}
 		r.update(0, data[i:i+l])
 	}
-	for i := 0; i < 9; i++ {
-		log.Trace("cursor", "lvl", i, "pos", r.cursors[i])
-	}
 	return r.digest()
 }
 
@@ -67,11 +64,11 @@ func (r *ReferenceHasher) sum(lvl int) []byte {
 	spanSize := r.params.Spans[lvl] * r.params.ChunkSize
 	span := (r.length-1)%spanSize + 1
 
-	toSumSize := r.cursors[lvl] - r.cursors[lvl+1]
+	sizeToSum := r.cursors[lvl] - r.cursors[lvl+1]
 
 	r.hasher.Reset()
 	r.hasher.SetSpan(span)
-	r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize])
+	r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+sizeToSum])
 	ref := r.hasher.Sum(nil)
 	return ref
 }
@@ -101,7 +98,6 @@ func (r *ReferenceHasher) digest() []byte {
 		if r.counts[i] > 0 {
 			// TODO: simplify if possible
 			if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 {
-				log.Trace("skip")
 				r.cursors[i+1] = r.cursors[i]
 				r.cursors[i] = r.cursors[i-1]
 				continue

From 028aa1e3ded6bfa1adc1c3605407aff65b67eb53 Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Mon, 24 Feb 2020 06:07:21 +0100
Subject: [PATCH 6/7] file: Split up digest function, add explanations

---
 file/hasher/reference.go | 41 ++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/file/hasher/reference.go b/file/hasher/reference.go
index 638aa70b1e..4dd6e20534 100644
--- a/file/hasher/reference.go
+++ b/file/hasher/reference.go
@@ -78,14 +78,46 @@ func (r *ReferenceHasher) sum(lvl int) []byte {
 // skips intermediate levels that end on span boundary
 func (r *ReferenceHasher) digest() []byte {
 
-	// if we did not end on a chunk boundary, the last chunk hasn't been hashed
-	// we need to do this first
+	// if we didn't end on a chunk boundary we need to hash remaining chunks first
+	r.hashUnfinished()
+
+	// if the already hashed parts tree is balanced
+	r.moveDanglingChunk()
+
+	// the first section of the buffer will hold the root hash
+	return r.buffer[:r.params.SectionSize]
+}
+
+// hashes the remaining unhashed chunks at the end of each level
+func (r *ReferenceHasher) hashUnfinished() {
 	if r.length%r.params.ChunkSize != 0 {
 		ref := r.sum(0)
 		copy(r.buffer[r.cursors[1]:], ref)
 		r.cursors[1] += len(ref)
 		r.cursors[0] = r.cursors[1]
 	}
+}
+
+// in case of a balanced tree this method concatenates the reference to the single reference
+// at the highest level of the tree.
+//
+// Let F be full chunks (disregarding branching factor) and S be single references
+// in the following scenario:
+//
+//       S
+//     F   F
+//   F   F   F
+// F   F   F   F S
+//
+// The result will be:
+//
+//       SS
+//     F    F
+//   F   F   F
+// F   F   F   F
+//
+// After which the SS will be hashed to obtain the final root hash
+func (r *ReferenceHasher) moveDanglingChunk() {
 
 	// calculate the total number of levels needed to represent the data (including the data level)
 	targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches)
@@ -93,7 +125,7 @@ func (r *ReferenceHasher) digest() []byte {
 	// sum every intermediate level and write to the level above it
 	for i := 1; i < targetLevel; i++ {
 
-		// if the tree is balanced or if there is a single reference outside a balanced tree on this level
+		// and if there is a single reference outside a balanced tree on this level
 		// don't hash it again but pass it on to the next level
 		if r.counts[i] > 0 {
 			// TODO: simplify if possible
@@ -109,7 +141,4 @@ func (r *ReferenceHasher) digest() []byte {
 		r.cursors[i+1] += len(ref)
 		r.cursors[i] = r.cursors[i+1]
 	}
-
-	// the first section of the buffer will hold the root hash
-	return r.buffer[:r.params.SectionSize]
 }

From fe7ddee7badeb1ef3ea48dea2b043e11f25fcf7d Mon Sep 17 00:00:00 2001
From: nolash <dev@holbrook.no>
Date: Mon, 24 Feb 2020 06:20:17 +0100
Subject: [PATCH 7/7] file: Purify digest method

---
 file/hasher/reference.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/file/hasher/reference.go b/file/hasher/reference.go
index 4dd6e20534..0ceb570ee8 100644
--- a/file/hasher/reference.go
+++ b/file/hasher/reference.go
@@ -39,6 +39,13 @@ func (r *ReferenceHasher) Hash(data []byte) []byte {
 		}
 		r.update(0, data[i:i+l])
 	}
+
+	// if we didn't end on a chunk boundary we need to hash remaining chunks first
+	r.hashUnfinished()
+
+	// if the already hashed parts tree is balanced
+	r.moveDanglingChunk()
+
 	return r.digest()
 }
 
@@ -78,12 +85,6 @@ func (r *ReferenceHasher) sum(lvl int) []byte {
 // skips intermediate levels that end on span boundary
 func (r *ReferenceHasher) digest() []byte {
 
-	// if we didn't end on a chunk boundary we need to hash remaining chunks first
-	r.hashUnfinished()
-
-	// if the already hashed parts tree is balanced
-	r.moveDanglingChunk()
-
 	// the first section of the buffer will hold the root hash
 	return r.buffer[:r.params.SectionSize]
 }