From a8f3fc42b2900a8c8f3f9de16aa0c7e93dfa577d Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 16:12:58 +0100 Subject: [PATCH 01/67] storage: Elaborate comment on Encryption.Reset From 22839fba32405646813947c7a2ec52becde3cfbd Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 16:26:49 +0100 Subject: [PATCH 02/67] storage: Add Reset() to interface From df3f3b742fbd7ed9cd0e9e4d9a2c16e28b773376 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 27 Nov 2019 12:24:48 +0100 Subject: [PATCH 03/67] file: Add reference filehasher pre-cleanup state --- file/common_test.go | 64 ++++++++++++++++++++ file/hasher_r.go | 137 ++++++++++++++++++++++++++++++++++++++++++ file/hasher_r_test.go | 58 ++++++++++++++++++ file/tree.go | 28 +++++++++ file/util.go | 57 ++++++++++++++++++ file/util_test.go | 91 ++++++++++++++++++++++++++++ testutil/data.go | 15 +++++ 7 files changed, 450 insertions(+) create mode 100644 file/common_test.go create mode 100644 file/hasher_r.go create mode 100644 file/hasher_r_test.go create mode 100644 file/tree.go create mode 100644 file/util.go create mode 100644 file/util_test.go create mode 100644 testutil/data.go diff --git a/file/common_test.go b/file/common_test.go new file mode 100644 index 0000000000..4cccc645fe --- /dev/null +++ b/file/common_test.go @@ -0,0 +1,64 @@ +package file + +import ( + "github.com/ethersphere/swarm/testutil" +) + +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 +) + +var ( + dataLengths = []int{31, // 0 + 32, // 1 + 33, // 2 + 63, // 3 + 64, // 4 + 65, // 5 + chunkSize, // 6 + chunkSize + 31, // 7 + chunkSize + 32, // 8 + chunkSize + 63, // 9 + chunkSize + 64, // 10 + chunkSize * 2, // 11 + chunkSize*2 + 32, // 12 + chunkSize * 128, // 13 + chunkSize*128 + 31, // 14 + chunkSize*128 + 32, // 15 + chunkSize*128 + 64, // 16 + chunkSize * 129, // 17 + chunkSize * 130, // 18 + chunkSize*128*128 - 32, // 19 + } + expected = []string{ + "ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0 + "0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1 + "3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2 + "95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3 + "490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4 + "541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5 + "c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6 + "91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7 + "73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8 + "db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9 + "ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10 + "29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11 + "61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12 + "3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13 + "e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14 + "485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15 + "624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16 + "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 + "59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 + "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19, reference file hasher returns false on this match, claims 3606dd2e51a2a71fd12bc5cacfd4d4191073e0bf3dd2a60528c2515f15920006 + } + + start = 0 + end = len(dataLengths) +) + +func init() { + testutil.Init() +} diff --git a/file/hasher_r.go b/file/hasher_r.go new file mode 100644 index 0000000000..53ac7d89d2 --- /dev/null +++ b/file/hasher_r.go @@ -0,0 +1,137 @@ +package file + +import ( + "encoding/binary" + "io" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" +) + +type ReferenceFileHasher struct { + hasher *bmt.Hasher + branches int + segmentSize int + buffer []byte + cursors []int + chunkSize int + totalBytes int + writeByteCount int + writeCount int +} + +func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { + return &ReferenceFileHasher{ + hasher: hasher, + branches: branches, + segmentSize: hasher.Size(), + chunkSize: branches * hasher.Size(), + } +} + +// reads segmentwise from input data and writes +// TODO: Write directly to f.buffer instead of input +// TODO: See if level 0 data can be written directly to hasher without complicating code +func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash { + f.totalBytes = l + // TODO: old implementation of function skewed the level by 1, realign code to new, correct results + levelCount := getLevelsFromLength(l, f.segmentSize, f.branches) + 1 + log.Trace("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize) + bufLen := f.segmentSize + for i := 1; i < levelCount; i++ { + bufLen *= f.branches + } + f.cursors = make([]int, levelCount) + f.buffer = make([]byte, bufLen) + var res bool + for !res { + input := make([]byte, f.segmentSize) + c, err := r.Read(input) + log.Trace("read", "c", c, "wbc", f.writeByteCount) + if err != nil { + if err == io.EOF { + log.Debug("haveeof") + res = true + } else { + panic(err) + } + } else if c < f.segmentSize { + input = input[:c] + } + f.writeByteCount += c + if f.writeByteCount == f.totalBytes { + res = true + } + f.write(input, 0, res) + } + return common.BytesToHash(f.buffer[f.cursors[levelCount-1] : f.cursors[levelCount-1]+f.segmentSize]) +} + +// TODO: check if length 0 +// performs recursive hashing on complete batches or data end +func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { + log.Debug("write", "l", level, "len", len(b), "b", hexutil.Encode(b), "end", end, "wbc", f.writeByteCount) + + // copy data from buffer to current position of corresponding level in buffer + copy(f.buffer[f.cursors[level]*f.segmentSize:], b) + for i, l := range f.cursors { + log.Trace("cursor", "#", i, "pos", l) + } + + // if we are at the tree root the result will be in the first segmentSize bytes of the buffer. Return + if level == len(f.cursors)-1 { + return true + } + + // if the offset is the same one level up, then we have a dangling chunk and we merely pass it down the tree + if end && level > 0 && f.cursors[level] == f.cursors[level+1] { + res := f.write(b, level+1, end) + return res + } + + // we've written to the buffer of this level, so we increment the cursor + f.cursors[level]++ + + // perform recursive writes down the tree if end of output or on batch boundary + var res bool + if f.cursors[level]-f.cursors[level+1] == f.branches || end { + + // calculate what the potential span under this chunk will be + span := f.chunkSize + for i := 0; i < level; i++ { + span *= f.branches + } + + // calculate the data in this chunk (the data to be hashed) + var dataUnderSpan int + if end { + dataUnderSpan = (f.totalBytes-1)%span + 1 + } else { + dataUnderSpan = span + } + + // calculate the actual data under this span + var hashDataSize int + if level == 0 { + hashDataSize = dataUnderSpan + } else { + hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize + } + + // hash the chunk and write it to the current cursor position on the next level + meta := make([]byte, 8) + binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan)) + f.hasher.ResetWithLength(meta) + writeHashOffset := f.cursors[level+1] * f.segmentSize + f.hasher.Write(f.buffer[writeHashOffset : writeHashOffset+hashDataSize]) + hashResult := f.hasher.Sum(nil) + log.Debug("summed", "b", hexutil.Encode(hashResult), "l", f.cursors[level], "l+1", f.cursors[level+1], "spanlength", dataUnderSpan, "span", span, "meta", meta, "from", writeHashOffset, "to", writeHashOffset+hashDataSize, "data", f.buffer[writeHashOffset:writeHashOffset+hashDataSize]) + res = f.write(hashResult, level+1, end) + + // recycle buffer space from the threshold of just written hash + f.cursors[level] = f.cursors[level+1] + } + return res +} diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go new file mode 100644 index 0000000000..336539c75c --- /dev/null +++ b/file/hasher_r_test.go @@ -0,0 +1,58 @@ +package file + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +func TestReferenceFileHasher(t *testing.T) { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, 128, bmt.PoolSize) + h := bmt.New(pool) + var mismatch int + for i := start; i < end; i++ { + dataLength := dataLengths[i] + log.Info("start", "i", i, "len", dataLength) + fh := NewReferenceFileHasher(h, 128) + _, data := testutil.SerialData(dataLength, 255, 0) + refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes() + eq := true + if expected[i] != fmt.Sprintf("%x", refHash) { + mismatch++ + eq = false + } + t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i]) + } + if mismatch > 0 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) + } +} + +func BenchmarkReferenceHasher(b *testing.B) { + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceFileHasher) + } +} + +func benchmarkReferenceFileHasher(b *testing.B) { + params := strings.Split(b.Name(), "/") + dataLength, err := strconv.ParseInt(params[1], 10, 64) + if err != nil { + b.Fatal(err) + } + _, data := testutil.SerialData(int(dataLength), 255, 0) + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, 128, bmt.PoolSize) + b.ResetTimer() + for i := 0; i < b.N; i++ { + h := bmt.New(pool) + fh := NewReferenceFileHasher(h, 128) + fh.Hash(bytes.NewReader(data), len(data)).Bytes() + } +} diff --git a/file/tree.go b/file/tree.go new file mode 100644 index 0000000000..7ce5c7fa6c --- /dev/null +++ b/file/tree.go @@ -0,0 +1,28 @@ +package file + +import "github.com/ethersphere/swarm/bmt" + +// defines the boundaries of the hashing job and also contains the hash factory functino of the job +// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) +type treeParams struct { + SectionSize int + Branches int + Spans []int + Debug bool + hashFunc func() bmt.SectionWriter +} + +func newTreeParams(section int, branches int, hashFunc func() bmt.SectionWriter) *treeParams { + + p := &treeParams{ + SectionSize: section, + Branches: branches, + hashFunc: hashFunc, + } + span := 1 + for i := 0; i < 9; i++ { + p.Spans = append(p.Spans, span) + span *= p.Branches + } + return p +} diff --git a/file/util.go b/file/util.go new file mode 100644 index 0000000000..1efcb5e012 --- /dev/null +++ b/file/util.go @@ -0,0 +1,57 @@ +package file + +import ( + "encoding/binary" + "math" + + "github.com/ethersphere/swarm/log" +) + +// creates a binary span size representation +// to pass to bmt.SectionWriter +// TODO: move to bmt.SectionWriter, which is the object for which this is actually relevant +func lengthToSpan(length int) []byte { + spanBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(spanBytes, uint64(length)) + return spanBytes +} + +// calculates the section index of the given byte size +func dataSizeToSectionIndex(length int, sectionSize int) int { + return (length - 1) / sectionSize +} + +// calculates the section count of the given byte size +func dataSizeToSectionCount(length int, sectionSize int) int { + return dataSizeToSectionIndex(length, sectionSize) + 1 +} + +// calculates the corresponding level section for a data section +func dataSectionToLevelSection(p *treeParams, lvl int, sections int) int { + span := p.Spans[lvl] + return sections / span +} + +// calculates the lower data section boundary of a level for which a data section is contained +// the higher level use is to determine whether the final data section written falls within +// a certain level's span +func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int { + span := p.Spans[lvl+1] + spans := section / span + spanBytes := spans * span + log.Trace("levelboundary", "spans", spans, "section", section, "span", span) + return spanBytes +} + +// calculate how many levels a particular section count will result in. +// the returned level will be the level of the root hash +func getLevelsFromLength(l int, sectionSize int, branches int) int { + if l == 0 { + return 0 + } else if l <= sectionSize*branches { + return 1 + } + c := (l - 1) / (sectionSize) + + return int(math.Log(float64(c))/math.Log(float64(branches)) + 1) +} diff --git a/file/util_test.go b/file/util_test.go new file mode 100644 index 0000000000..d9ace7bac3 --- /dev/null +++ b/file/util_test.go @@ -0,0 +1,91 @@ +package file + +import "testing" + +// TestLevelsFromLength verifies getLevelsFromLength +func TestLevelsFromLength(t *testing.T) { + + sizes := []int{sectionSize, chunkSize, chunkSize + sectionSize, chunkSize * branches, chunkSize*branches + 1} + expects := []int{1, 1, 2, 2, 3} + + for i, size := range sizes { + lvl := getLevelsFromLength(size, sectionSize, branches) + if expects[i] != lvl { + t.Fatalf("size %d, expected %d, got %d", size, expects[i], lvl) + } + } +} + +// TestDataSizeToSection verifies testDataSizeToSectionIndex +func TestDataSizeToSectionIndex(t *testing.T) { + + sizes := []int{chunkSize - 1, chunkSize, chunkSize + 1} + expects := []int{branches - 1, branches - 1, branches} + + for j, size := range sizes { + r := dataSizeToSectionIndex(size, sectionSize) + expect := expects[j] + if expect != r { + t.Fatalf("size %d section %d: expected %d, got %d", size, sectionSize, expect, r) + } + } + +} + +// TestsDataSectionToLevelSection verifies dataSectionToLevelSection +func TestDataSectionToLevelSection(t *testing.T) { + + params := newTreeParams(sectionSize, branches, nil) + sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches} + levels := []int{1, 2} + expects := []int{ + 0, 0, 1, 1, 2, 2, 128, + 0, 0, 0, 0, 0, 0, 1, + } + + for i, lvl := range levels { + for j, section := range sections { + r := dataSectionToLevelSection(params, lvl, section) + k := i*len(sections) + j + expect := expects[k] + if expect != r { + t.Fatalf("levelsection size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + } + } + +} + +// TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary +func TestDataSectionToLevelBoundary(t *testing.T) { + params := newTreeParams(sectionSize, branches, nil) + size := chunkSize*branches + chunkSize*2 + section := dataSizeToSectionIndex(size, sectionSize) + lvl := 1 + expect := branches * branches + + r := dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + + size = chunkSize*branches*branches + chunkSize*2 + section = dataSizeToSectionIndex(size, sectionSize) + lvl = 1 + expect = branches * branches * branches + + r = dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } + + size = chunkSize*branches + chunkSize*2 + section = dataSizeToSectionIndex(size, sectionSize) + lvl = 2 + expect = 0 + + r = dataSectionToLevelBoundary(params, lvl, section) + if expect != r { + t.Fatalf("levelboundary size %d level %d: expected %d, got %d", section, lvl, expect, r) + } +} diff --git a/testutil/data.go b/testutil/data.go new file mode 100644 index 0000000000..f3bea59e91 --- /dev/null +++ b/testutil/data.go @@ -0,0 +1,15 @@ +package testutil + +import ( + "bytes" + "io" +) + +func SerialData(l int, mod int, offset int) (r io.Reader, slice []byte) { + slice = make([]byte, l) + for i := 0; i < len(slice); i++ { + slice[i] = byte((i + offset) % mod) + } + r = io.LimitReader(bytes.NewReader(slice), int64(l)) + return +} From 472b6fd9580f49e151c68d0e39204433c792537f Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 00:25:57 +0100 Subject: [PATCH 04/67] file: Reference hasher commenting and partial cleanup --- file/hasher_r.go | 144 +++++++++++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 55 deletions(-) diff --git a/file/hasher_r.go b/file/hasher_r.go index 53ac7d89d2..a10d3cf4f3 100644 --- a/file/hasher_r.go +++ b/file/hasher_r.go @@ -1,7 +1,6 @@ package file import ( - "encoding/binary" "io" "github.com/ethereum/go-ethereum/common" @@ -11,123 +10,158 @@ import ( ) type ReferenceFileHasher struct { - hasher *bmt.Hasher - branches int - segmentSize int - buffer []byte - cursors []int - chunkSize int - totalBytes int - writeByteCount int - writeCount int + hasher *bmt.Hasher // synchronous hasher + branches int // branching factor + sectionSize int // write section size, equals digest length of hasher + chunkSize int // cached chunk size, equals branches * sectionSize + spans []int // potential spans per level + buffer []byte // keeps intermediate chunks during hashing + cursors []int // write cursors in sectionSize units for each tree level + totalBytes int // total data bytes to be written + totalLevel int // total number of levels in tree. (level 0 is the data level) + writeByteCount int // amount of bytes currently written + writeCount int // amount of sections currently written } func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { - return &ReferenceFileHasher{ + f := &ReferenceFileHasher{ hasher: hasher, branches: branches, - segmentSize: hasher.Size(), + sectionSize: hasher.Size(), chunkSize: branches * hasher.Size(), } + return f } -// reads segmentwise from input data and writes -// TODO: Write directly to f.buffer instead of input -// TODO: See if level 0 data can be written directly to hasher without complicating code +// Hash makes l reads of up to sectionSize bytes from r func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash { + f.totalBytes = l // TODO: old implementation of function skewed the level by 1, realign code to new, correct results - levelCount := getLevelsFromLength(l, f.segmentSize, f.branches) + 1 - log.Trace("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize) - bufLen := f.segmentSize - for i := 1; i < levelCount; i++ { + f.totalLevel = getLevelsFromLength(l, f.sectionSize, f.branches) + 1 + log.Trace("Starting reference file hasher", "levels", f.totalLevel, "length", f.totalBytes, "b", f.branches, "s", f.sectionSize) + + // prepare a buffer for intermediate the chunks + bufLen := f.sectionSize + for i := 1; i < f.totalLevel; i++ { bufLen *= f.branches } - f.cursors = make([]int, levelCount) f.buffer = make([]byte, bufLen) + f.cursors = make([]int, f.totalLevel) + + // calculate what the potential span under this chunk will be + span := f.sectionSize + for i := 0; i < f.totalLevel; i++ { + f.spans = append(f.spans, span) + span *= f.branches + } + var res bool for !res { - input := make([]byte, f.segmentSize) + + // read a data section into input copy buffer + input := make([]byte, f.sectionSize) c, err := r.Read(input) - log.Trace("read", "c", c, "wbc", f.writeByteCount) + log.Trace("read", "bytes", c, "total read", f.writeByteCount) if err != nil { if err == io.EOF { - log.Debug("haveeof") - res = true + panic("EOF") } else { panic(err) } - } else if c < f.segmentSize { - input = input[:c] } - f.writeByteCount += c - if f.writeByteCount == f.totalBytes { + + // read only up to the announced length, since we dimensioned buffer and level count accordingly + readSize := f.sectionSize + remainingBytes := f.totalBytes - f.writeByteCount + if remainingBytes <= f.sectionSize { + readSize = remainingBytes + input = input[:remainingBytes] res = true } + f.writeByteCount += readSize f.write(input, 0, res) } - return common.BytesToHash(f.buffer[f.cursors[levelCount-1] : f.cursors[levelCount-1]+f.segmentSize]) + // TODO: logically this should merely be f.buffer[0:f.sectionSize] + //return common.BytesToHash(f.buffer[f.cursors[f.totalLevel-1] : f.cursors[f.totalLevel-1]+f.sectionSize]) + if f.cursors[f.totalLevel-1] != 0 { + panic("totallevel cursor misaligned") + } + return common.BytesToHash(f.buffer[0:f.sectionSize]) } -// TODO: check if length 0 // performs recursive hashing on complete batches or data end func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { - log.Debug("write", "l", level, "len", len(b), "b", hexutil.Encode(b), "end", end, "wbc", f.writeByteCount) - // copy data from buffer to current position of corresponding level in buffer - copy(f.buffer[f.cursors[level]*f.segmentSize:], b) + log.Trace("write", "level", level, "bytes", len(b), "total written", f.writeByteCount, "end", end, "data", hexutil.Encode(b)) + + // copy data from input copy buffer to current position of corresponding level in intermediate chunk buffer + copy(f.buffer[f.cursors[level]*f.sectionSize:], b) for i, l := range f.cursors { - log.Trace("cursor", "#", i, "pos", l) + log.Trace("cursor", "level", i, "position", l) } - // if we are at the tree root the result will be in the first segmentSize bytes of the buffer. Return - if level == len(f.cursors)-1 { + // if we are at the tree root the result will be in the first sectionSize bytes of the buffer. + // the true bool return will bubble up to the data write frame in the call stack and terminate the loop + //if level == len(f.cursors)-1 { + if level == f.totalLevel-1 { return true } - // if the offset is the same one level up, then we have a dangling chunk and we merely pass it down the tree + // if we are at the end of the write, AND + // if the offset of a chunk reference is the same one level up, THEN + // we have a "dangling chunk" and we merely pass it to the next level if end && level > 0 && f.cursors[level] == f.cursors[level+1] { res := f.write(b, level+1, end) return res } - // we've written to the buffer of this level, so we increment the cursor + // we've written to the buffer a particular level + // so we increment the cursor of that level f.cursors[level]++ - // perform recursive writes down the tree if end of output or on batch boundary + // hash the intermediate chunk buffer data for this level if: + // - the difference of cursors between this level and the one above equals the branch factor (equals one full chunk of data) + // - end is set + // the resulting digest will be written to the corresponding section of the level above var res bool if f.cursors[level]-f.cursors[level+1] == f.branches || end { - // calculate what the potential span under this chunk will be - span := f.chunkSize - for i := 0; i < level; i++ { - span *= f.branches - } - - // calculate the data in this chunk (the data to be hashed) + // calculate the actual data under this span + // if we're at end, the span is given by the period of the potential span + // if not, it will be the full span (since we then must have full chunk writes in the levels below) var dataUnderSpan int + span := f.spans[level] * branches if end { dataUnderSpan = (f.totalBytes-1)%span + 1 } else { dataUnderSpan = span } - // calculate the actual data under this span + // calculate the data in this chunk (the data to be hashed) + // on level 0 it is merely the actual spanned data + // on levels above the + // TODO: can this be replaced by dataSectionToLevelSection var hashDataSize int if level == 0 { hashDataSize = dataUnderSpan } else { - hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize + hashSectionCount := (dataUnderSpan-1)/(span/f.branches) + 1 + hashDataSize = hashSectionCount * f.sectionSize } - // hash the chunk and write it to the current cursor position on the next level - meta := make([]byte, 8) - binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan)) - f.hasher.ResetWithLength(meta) - writeHashOffset := f.cursors[level+1] * f.segmentSize - f.hasher.Write(f.buffer[writeHashOffset : writeHashOffset+hashDataSize]) + // prepare the hasher, + // write data since previous hash operation from the current level cursor position + // and sum + spanBytes := lengthToSpan(dataUnderSpan) + f.hasher.ResetWithLength(spanBytes) + hasherWriteOffset := f.cursors[level+1] * f.sectionSize + f.hasher.Write(f.buffer[hasherWriteOffset : hasherWriteOffset+hashDataSize]) hashResult := f.hasher.Sum(nil) - log.Debug("summed", "b", hexutil.Encode(hashResult), "l", f.cursors[level], "l+1", f.cursors[level+1], "spanlength", dataUnderSpan, "span", span, "meta", meta, "from", writeHashOffset, "to", writeHashOffset+hashDataSize, "data", f.buffer[writeHashOffset:writeHashOffset+hashDataSize]) + log.Debug("summed", "level", level, "cursor", f.cursors[level], "parent cursor", f.cursors[level+1], "span", spanBytes, "digest", hexutil.Encode(hashResult)) + + // write the digest to the current cursor position of the next level + // note the f.write() call will move the next level's cursor according to the write and possible hash operation res = f.write(hashResult, level+1, end) // recycle buffer space from the threshold of just written hash From 01d5af23f9143ffe32248cd201c3bc1c832ac808 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 01:10:18 +0100 Subject: [PATCH 05/67] file: Replaced hash data size calc with util.go function, add comments --- file/common_test.go | 2 +- file/hasher_r.go | 66 ++++++++++++++++++++----------------------- file/hasher_r_test.go | 12 ++++++-- file/util.go | 5 +++- 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 4cccc645fe..d3d66b6b51 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -56,7 +56,7 @@ var ( } start = 0 - end = len(dataLengths) + end = len(dataLengths) - 1 ) func init() { diff --git a/file/hasher_r.go b/file/hasher_r.go index a10d3cf4f3..eec2582702 100644 --- a/file/hasher_r.go +++ b/file/hasher_r.go @@ -3,18 +3,18 @@ package file import ( "io" - "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" ) +// ReferenceFileHasher is a non-performant source of truth implementation for the file hashing algorithm used in Swarm +// the aim of its design is that is should be easy to understand +// TODO: bmt.Hasher should instead be passed as hash.Hash and ResetWithLength() should be abolished type ReferenceFileHasher struct { + params *treeParams hasher *bmt.Hasher // synchronous hasher - branches int // branching factor - sectionSize int // write section size, equals digest length of hasher chunkSize int // cached chunk size, equals branches * sectionSize - spans []int // potential spans per level buffer []byte // keeps intermediate chunks during hashing cursors []int // write cursors in sectionSize units for each tree level totalBytes int // total data bytes to be written @@ -23,44 +23,39 @@ type ReferenceFileHasher struct { writeCount int // amount of sections currently written } +// NewReferenceFileHasher creates a new file hasher with the supplied branch factor +// the section count will be the Size() of the hasher func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { f := &ReferenceFileHasher{ - hasher: hasher, - branches: branches, - sectionSize: hasher.Size(), - chunkSize: branches * hasher.Size(), + params: newTreeParams(hasher.Size(), branches, nil), + hasher: hasher, + chunkSize: branches * hasher.Size(), } return f } -// Hash makes l reads of up to sectionSize bytes from r -func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash { +// Hash executes l reads of up to sectionSize bytes from r +// and performs the filehashing algorithm on the data +// it returns the root hash +func (f *ReferenceFileHasher) Hash(r io.Reader, l int) []byte { f.totalBytes = l - // TODO: old implementation of function skewed the level by 1, realign code to new, correct results - f.totalLevel = getLevelsFromLength(l, f.sectionSize, f.branches) + 1 - log.Trace("Starting reference file hasher", "levels", f.totalLevel, "length", f.totalBytes, "b", f.branches, "s", f.sectionSize) + f.totalLevel = getLevelsFromLength(l, f.params.SectionSize, f.params.Branches) + 1 + log.Trace("Starting reference file hasher", "levels", f.totalLevel, "length", f.totalBytes, "b", f.params.Branches, "s", f.params.SectionSize) // prepare a buffer for intermediate the chunks - bufLen := f.sectionSize + bufLen := f.params.SectionSize for i := 1; i < f.totalLevel; i++ { - bufLen *= f.branches + bufLen *= f.params.Branches } f.buffer = make([]byte, bufLen) f.cursors = make([]int, f.totalLevel) - // calculate what the potential span under this chunk will be - span := f.sectionSize - for i := 0; i < f.totalLevel; i++ { - f.spans = append(f.spans, span) - span *= f.branches - } - var res bool for !res { // read a data section into input copy buffer - input := make([]byte, f.sectionSize) + input := make([]byte, f.params.SectionSize) c, err := r.Read(input) log.Trace("read", "bytes", c, "total read", f.writeByteCount) if err != nil { @@ -72,9 +67,9 @@ func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash { } // read only up to the announced length, since we dimensioned buffer and level count accordingly - readSize := f.sectionSize + readSize := f.params.SectionSize remainingBytes := f.totalBytes - f.writeByteCount - if remainingBytes <= f.sectionSize { + if remainingBytes <= f.params.SectionSize { readSize = remainingBytes input = input[:remainingBytes] res = true @@ -82,12 +77,10 @@ func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash { f.writeByteCount += readSize f.write(input, 0, res) } - // TODO: logically this should merely be f.buffer[0:f.sectionSize] - //return common.BytesToHash(f.buffer[f.cursors[f.totalLevel-1] : f.cursors[f.totalLevel-1]+f.sectionSize]) if f.cursors[f.totalLevel-1] != 0 { panic("totallevel cursor misaligned") } - return common.BytesToHash(f.buffer[0:f.sectionSize]) + return f.buffer[0:f.params.SectionSize] } // performs recursive hashing on complete batches or data end @@ -96,7 +89,7 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { log.Trace("write", "level", level, "bytes", len(b), "total written", f.writeByteCount, "end", end, "data", hexutil.Encode(b)) // copy data from input copy buffer to current position of corresponding level in intermediate chunk buffer - copy(f.buffer[f.cursors[level]*f.sectionSize:], b) + copy(f.buffer[f.cursors[level]*f.params.SectionSize:], b) for i, l := range f.cursors { log.Trace("cursor", "level", i, "position", l) } @@ -125,13 +118,13 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { // - end is set // the resulting digest will be written to the corresponding section of the level above var res bool - if f.cursors[level]-f.cursors[level+1] == f.branches || end { + if f.cursors[level]-f.cursors[level+1] == f.params.Branches || end { // calculate the actual data under this span // if we're at end, the span is given by the period of the potential span // if not, it will be the full span (since we then must have full chunk writes in the levels below) var dataUnderSpan int - span := f.spans[level] * branches + span := f.params.Spans[level] * chunkSize if end { dataUnderSpan = (f.totalBytes-1)%span + 1 } else { @@ -140,14 +133,15 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { // calculate the data in this chunk (the data to be hashed) // on level 0 it is merely the actual spanned data - // on levels above the - // TODO: can this be replaced by dataSectionToLevelSection + // on levels above data level, we get number of sections the data equals, and divide by the level span var hashDataSize int if level == 0 { hashDataSize = dataUnderSpan } else { - hashSectionCount := (dataUnderSpan-1)/(span/f.branches) + 1 - hashDataSize = hashSectionCount * f.sectionSize + dataSectionCount := dataSizeToSectionCount(dataUnderSpan, f.params.SectionSize) + // TODO: this is the same as dataSectionToLevelSection, but without wrap to 0 on end boundary. Inspect whether the function should be amended, and necessary changes made to Hasher + levelSectionCount := (dataSectionCount-1)/f.params.Spans[level] + 1 + hashDataSize = levelSectionCount * f.params.SectionSize } // prepare the hasher, @@ -155,7 +149,7 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { // and sum spanBytes := lengthToSpan(dataUnderSpan) f.hasher.ResetWithLength(spanBytes) - hasherWriteOffset := f.cursors[level+1] * f.sectionSize + hasherWriteOffset := f.cursors[level+1] * f.params.SectionSize f.hasher.Write(f.buffer[hasherWriteOffset : hasherWriteOffset+hashDataSize]) hashResult := f.hasher.Sum(nil) log.Debug("summed", "level", level, "cursor", f.cursors[level], "parent cursor", f.cursors[level+1], "span", spanBytes, "digest", hexutil.Encode(hashResult)) diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index 336539c75c..6654e4ef24 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -13,6 +13,12 @@ import ( "golang.org/x/crypto/sha3" ) +// TestReferenceFileHasher executes the file hasher algorithms on serial input data of periods of 0-254 +// of lengths defined in common_test.go +// +// the "expected" array in common_test.go is generated by this implementation, and test failure due to +// result mismatch is nothing else than an indication that something has changed in the reference filehasher +// or the underlying hashing algorithm func TestReferenceFileHasher(t *testing.T) { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, 128, bmt.PoolSize) h := bmt.New(pool) @@ -22,7 +28,7 @@ func TestReferenceFileHasher(t *testing.T) { log.Info("start", "i", i, "len", dataLength) fh := NewReferenceFileHasher(h, 128) _, data := testutil.SerialData(dataLength, 255, 0) - refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes() + refHash := fh.Hash(bytes.NewReader(data), len(data)) eq := true if expected[i] != fmt.Sprintf("%x", refHash) { mismatch++ @@ -35,6 +41,8 @@ func TestReferenceFileHasher(t *testing.T) { } } +// BenchmarkReferenceHasher establishes a baseline for a fully synchronous file hashing operation +// it will be vastly inefficient func BenchmarkReferenceHasher(b *testing.B) { for i := start; i < end; i++ { b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceFileHasher) @@ -53,6 +61,6 @@ func benchmarkReferenceFileHasher(b *testing.B) { for i := 0; i < b.N; i++ { h := bmt.New(pool) fh := NewReferenceFileHasher(h, 128) - fh.Hash(bytes.NewReader(data), len(data)).Bytes() + fh.Hash(bytes.NewReader(data), len(data)) } } diff --git a/file/util.go b/file/util.go index 1efcb5e012..cd75f09bd7 100644 --- a/file/util.go +++ b/file/util.go @@ -16,11 +16,13 @@ func lengthToSpan(length int) []byte { return spanBytes } +// TODO: use params instead of sectionSize // calculates the section index of the given byte size func dataSizeToSectionIndex(length int, sectionSize int) int { return (length - 1) / sectionSize } +// TODO: use params instead of sectionSize // calculates the section count of the given byte size func dataSizeToSectionCount(length int, sectionSize int) int { return dataSizeToSectionIndex(length, sectionSize) + 1 @@ -43,7 +45,8 @@ func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int { return spanBytes } -// calculate how many levels a particular section count will result in. +// TODO: use params instead of sectionSize, branches +// calculate the last level index which a particular data section count will result in. // the returned level will be the level of the root hash func getLevelsFromLength(l int, sectionSize int, branches int) int { if l == 0 { From 872e93ebaa2145c8ba9c2e63d6d06404bd328bf4 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 09:55:16 +0100 Subject: [PATCH 06/67] file: Add test for dangling chunk, correct last test case param --- file/common_test.go | 10 ++++++---- file/hasher_r_test.go | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index d3d66b6b51..4da69e8d94 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -30,7 +30,8 @@ var ( chunkSize*128 + 64, // 16 chunkSize * 129, // 17 chunkSize * 130, // 18 - chunkSize*128*128 - 32, // 19 + chunkSize * 128 * 128, // 19 + chunkSize*128*128 + 32, // 20 } expected = []string{ "ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0 @@ -52,11 +53,12 @@ var ( "624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16 "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 "59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 - "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19, reference file hasher returns false on this match, claims 3606dd2e51a2a71fd12bc5cacfd4d4191073e0bf3dd2a60528c2515f15920006 + "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19 + "1d1bae3a0f2d3ef6b58df4fd6c55c2d3752339b6b474eaab52c579fafe336bfa", // 20 } - start = 0 - end = len(dataLengths) - 1 + start = 20 + end = 21 //len(dataLengths) - 1 ) func init() { diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index 6654e4ef24..04200f6ed7 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -7,12 +7,37 @@ import ( "strings" "testing" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) +// TestReferenceFileHasherDanglingChunk explicitly tests the edge case where a single chunk hash after a balanced tree +// should skip to the level with a single reference +func TestReferenceFileHasherDanglingChunk(t *testing.T) { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + h := bmt.New(pool) + r, data := testutil.SerialData(chunkSize*branches*branches+sectionSize, 255, 0) + fh := NewReferenceFileHasher(h, branches) + leftHash := fh.Hash(r, chunkSize*branches*branches) + + h = bmt.New(pool) + fh = NewReferenceFileHasher(h, branches) + rightHash := fh.Hash(bytes.NewBuffer(data[chunkSize*branches*branches:]), sectionSize) + log.Info("left", "h", hexutil.Encode(leftHash)) + log.Info("right", "h", hexutil.Encode(rightHash)) + + h = bmt.New(pool) + span := lengthToSpan(chunkSize * branches * branches * sectionSize) + h.ResetWithLength(span) + h.Write(leftHash) + h.Write(rightHash) + topHash := h.Sum(nil) + log.Info("top", "h", hexutil.Encode(topHash)) +} + // TestReferenceFileHasher executes the file hasher algorithms on serial input data of periods of 0-254 // of lengths defined in common_test.go // @@ -20,15 +45,15 @@ import ( // result mismatch is nothing else than an indication that something has changed in the reference filehasher // or the underlying hashing algorithm func TestReferenceFileHasher(t *testing.T) { - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, 128, bmt.PoolSize) + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) h := bmt.New(pool) var mismatch int for i := start; i < end; i++ { dataLength := dataLengths[i] log.Info("start", "i", i, "len", dataLength) - fh := NewReferenceFileHasher(h, 128) - _, data := testutil.SerialData(dataLength, 255, 0) - refHash := fh.Hash(bytes.NewReader(data), len(data)) + fh := NewReferenceFileHasher(h, branches) + r, data := testutil.SerialData(dataLength, 255, 0) + refHash := fh.Hash(r, len(data)) eq := true if expected[i] != fmt.Sprintf("%x", refHash) { mismatch++ @@ -55,12 +80,12 @@ func benchmarkReferenceFileHasher(b *testing.B) { if err != nil { b.Fatal(err) } - _, data := testutil.SerialData(int(dataLength), 255, 0) - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, 128, bmt.PoolSize) + r, data := testutil.SerialData(int(dataLength), 255, 0) + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) b.ResetTimer() for i := 0; i < b.N; i++ { h := bmt.New(pool) - fh := NewReferenceFileHasher(h, 128) - fh.Hash(bytes.NewReader(data), len(data)) + fh := NewReferenceFileHasher(h, branches) + fh.Hash(r, len(data)) } } From 181e0bf390e396e790a1e9a45f5c6b6594333534 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 27 Nov 2019 12:29:54 +0100 Subject: [PATCH 07/67] file: Add correct, non-optimized new hasher implementation --- file/hasher.go | 65 +++++ file/hasher_test.go | 1 + file/job.go | 385 +++++++++++++++++++++++++ file/job_test.go | 664 +++++++++++++++++++++++++++++++++++++++++++ file/pyramid_test.go | 49 ++++ file/type.go | 8 + 6 files changed, 1172 insertions(+) create mode 100644 file/hasher.go create mode 100644 file/hasher_test.go create mode 100644 file/job.go create mode 100644 file/job_test.go create mode 100644 file/pyramid_test.go create mode 100644 file/type.go diff --git a/file/hasher.go b/file/hasher.go new file mode 100644 index 0000000000..462fff81eb --- /dev/null +++ b/file/hasher.go @@ -0,0 +1,65 @@ +package file + +import ( + "sync" + + "github.com/ethersphere/swarm/bmt" +) + +// Hasher implements file.SectionWriter +// it is intended to be chainable to accommodate for arbitrary chunk manipulation +// like encryption, erasure coding etc +type Hasher struct { + writer *bmt.Hasher + target *target + params *treeParams + lastJob *job + jobMu sync.Mutex + writerPool sync.Pool + size int +} + +// New creates a new Hasher object +func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc func() bmt.SectionWriter) *Hasher { + h := &Hasher{ + writer: dataWriter, + target: newTarget(), + } + h.writerPool.New = func() interface{} { + return refWriterFunc() + } + h.params = newTreeParams(sectionSize, branches, h.getWriter) + + return h +} + +// Write implements hash.Hash +func (h *Hasher) Write(b []byte) { + _, err := h.writer.Write(b) + if err != nil { + panic(err) + } +} + +// Sum implements hash.Hash +func (h *Hasher) Sum(_ []byte) []byte { + sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) + 1 + targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) + h.target.Set(h.size, sectionCount, targetLevel) + var ref []byte + select { + case ref = <-h.target.Done(): + } + return ref +} + +// proxy for sync.Pool +func (h *Hasher) putWriter(w bmt.SectionWriter) { + w.Reset() + h.writerPool.Put(w) +} + +// proxy for sync.Pool +func (h *Hasher) getWriter() bmt.SectionWriter { + return h.writerPool.Get().(bmt.SectionWriter) +} diff --git a/file/hasher_test.go b/file/hasher_test.go new file mode 100644 index 0000000000..b691ba57a4 --- /dev/null +++ b/file/hasher_test.go @@ -0,0 +1 @@ +package file diff --git a/file/job.go b/file/job.go new file mode 100644 index 0000000000..e8ecfbe7ae --- /dev/null +++ b/file/job.go @@ -0,0 +1,385 @@ +package file + +import ( + "fmt" + "sync" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" +) + +// keeps an index of all the existing jobs for a file hashing operation +// sorted by level +// +// it also keeps all the "top hashes", ie hashes on first data section index of every level +// these are needed in case of balanced tree results, since the hashing result would be +// lost otherwise, due to the job not having any intermediate storage of any data +type jobIndex struct { + maxLevels int + jobs []sync.Map + topHashes [][]byte + mu sync.Mutex +} + +func newJobIndex(maxLevels int) *jobIndex { + ji := &jobIndex{ + maxLevels: maxLevels, + } + for i := 0; i < maxLevels; i++ { + ji.jobs = append(ji.jobs, sync.Map{}) + } + return ji +} + +// implements Stringer interface +func (ji *jobIndex) String() string { + return fmt.Sprintf("%p", ji) +} + +// Add adds a job to the index at the level +// and data section index specified in the job +func (ji *jobIndex) Add(jb *job) { + log.Trace("adding job", "job", jb) + ji.jobs[jb.level].Store(jb.dataSection, jb) +} + +// Get retrieves a job from the job index +// based on the level of the job and its data section index +// if a job for the level and section index does not exist this method returns nil +func (ji *jobIndex) Get(lvl int, section int) *job { + jb, ok := ji.jobs[lvl].Load(section) + if !ok { + return nil + } + return jb.(*job) +} + +// Delete removes a job from the job index +// leaving it to be garbage collected when +// the reference in the main code is relinquished +func (ji *jobIndex) Delete(jb *job) { + ji.jobs[jb.level].Delete(jb.dataSection) +} + +// AddTopHash should be called by a job when a hash is written to the first index of a level +// since the job doesn't store any data written to it (just passing it through to the underlying writer) +// this is needed for the edge case of balanced trees +func (ji *jobIndex) AddTopHash(ref []byte) { + ji.mu.Lock() + defer ji.mu.Unlock() + ji.topHashes = append(ji.topHashes, ref) + log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) +} + +// GetJobHash gets the current top hash for a particular level set by AddTopHash +func (ji *jobIndex) GetTopHash(lvl int) []byte { + ji.mu.Lock() + defer ji.mu.Unlock() + return ji.topHashes[lvl-1] +} + +// passed to a job to determine at which data lengths and levels a job should terminate +type target struct { + size int32 // bytes written + sections int32 // sections written + level int32 // target level calculated from bytes written against branching factor and sector size + resultC chan []byte // channel to receive root hash + doneC chan struct{} // when this channel is closed all jobs will calculate their end write count +} + +func newTarget() *target { + return &target{ + resultC: make(chan []byte), + doneC: make(chan struct{}), + } +} + +// Set is called when the final length of the data to be written is known +// TODO: method can be simplified to calculate sections and level internally +func (t *target) Set(size int, sections int, level int) { + atomic.StoreInt32(&t.size, int32(size)) + atomic.StoreInt32(&t.sections, int32(sections)) + atomic.StoreInt32(&t.level, int32(level)) + log.Trace("target set", "size", size, "sections", sections, "level", level) + close(t.doneC) +} + +// Count returns the total section count for the target +// it should only be called after Set() +func (t *target) Count() int { + return int(atomic.LoadInt32(&t.sections)) + 1 +} + +// Done returns the channel in which the root hash will be sent +func (t *target) Done() <-chan []byte { + return t.resultC +} + +type jobUnit struct { + index int + data []byte +} + +// encapsulates one single chunk to be hashed +type job struct { + target *target + params *treeParams + index *jobIndex + + level int // level in tree + dataSection int // data section index + cursorSection int32 // next write position in job + endCount int32 // number of writes to be written to this job (0 means write to capacity) + lastSectionSize int // data size on the last data section write + firstSectionData []byte // store first section of data written to solve the dangling chunk edge case + + writeC chan jobUnit + writer bmt.SectionWriter // underlying data processor + + mu sync.Mutex +} + +func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSection int) *job { + jb := &job{ + params: params, + index: jobIndex, + level: lvl, + dataSection: dataSection, + writer: params.hashFunc(), + writeC: make(chan jobUnit), + target: tgt, + } + if jb.index == nil { + jb.index = newJobIndex(9) + } + + jb.index.Add(jb) + if !params.Debug { + go jb.process() + } + return jb +} + +// implements Stringer interface +func (jb *job) String() string { + return fmt.Sprintf("job: l:%d,s:%d,c:%d", jb.level, jb.dataSection, jb.count()) +} + +// atomically increments the write counter of the job +func (jb *job) inc() int { + return int(atomic.AddInt32(&jb.cursorSection, 1)) +} + +// atomically returns the write counter of the job +func (jb *job) count() int { + return int(atomic.LoadInt32(&jb.cursorSection)) +} + +// size returns the byte size of the span the job represents +// if job is last index in a level and writes have been finalized, it will return the target size +// otherwise, regardless of job index, it will return the size according to the current write count +// TODO: returning expected size in one case and actual size in another can lead to confusion +func (jb *job) size() int { + count := jb.count() + endCount := int(atomic.LoadInt32(&jb.endCount)) + if endCount == 0 { + return count * jb.params.SectionSize * jb.params.Spans[jb.level] + } + log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) + return int(jb.target.size) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) +} + +// add data to job +// does no checking for data length or index validity +func (jb *job) write(index int, data []byte) { + + // if a write is received at the first datasection of a level we need to store this hash + // in case of a balanced tree and we need to send it to resultC later + // at the time of hasing of a balanced tree we have no way of knowing for sure whether + // that is the end of the job or not + if len(jb.index.topHashes) < jb.level && jb.dataSection == 0 { + log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + jb.index.AddTopHash(data) + } + jb.writeC <- jobUnit{ + index: index, + data: data, + } +} + +// runs in loop until: +// - sectionSize number of job writes have occurred (one full chunk) +// - data write is finalized and targetcount for this chunk was already reached +// - data write is finalized and targetcount is reached on a subsequent job write +func (jb *job) process() { + + defer jb.destroy() + + // is set when data write is finished, AND + // the final data section falls within the span of this job + // if not, loop will only exit on Branches writes + endCount := 0 +OUTER: + for { + select { + + // enter here if new data is written to the job + case entry := <-jb.writeC: + if entry.index == 0 { + jb.firstSectionData = entry.data + } + newCount := jb.inc() + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", jb.count(), "index", entry.index, "data", hexutil.Encode(entry.data)) + // this write is superfluous when the received data is the root hash + jb.writer.Write(entry.index, entry.data) + + // since newcount is incremented above it can only equal endcount if this has been set in the case below, + // which means data write has been completed + // otherwise if we reached the chunk limit we also continue to hashing + if newCount == endCount { + log.Trace("quitting writec - endcount") + break OUTER + } + if newCount == jb.params.Branches { + log.Trace("quitting writec - branches") + break OUTER + } + + // enter here if data writes have been completed + // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved + case <-jb.target.doneC: + + // we can never have count 0 and have a completed job + // this is the easiest check we can make + //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) + count := jb.count() + if count == 0 { + continue + } + + // if we have reached the end count for this chunk, we proceed to hashing + // this case is important when write to the level happen after this goroutine + // registers that data writes have been completed + if count == int(endCount) { + log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + break OUTER + } + + // if endcount is already calculated, don't calculate it again + if endCount > 0 { + continue + } + + // if the target count falls within the span of this job + // set the endcount so we know we have to do extra calculations for + // determining span in case of unbalanced tree + targetCount := jb.target.Count() + endCount = jb.targetCountToEndCount(targetCount) + atomic.StoreInt32(&jb.endCount, int32(endCount)) + } + } + + if int(jb.target.level) == jb.level { + jb.target.resultC <- jb.index.GetTopHash(jb.level) + return + } + + // get the size of the span and execute the hash digest of the content + size := jb.size() + span := lengthToSpan(size) + refSize := jb.count() * jb.params.SectionSize + log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", jb.target.level, "endcount", endCount) + ref := jb.writer.Sum(nil, refSize, span) + + // endCount > 0 means this is the last chunk on the level + // the hash from the level below the target level will be the result + belowRootLevel := int(jb.target.level) - 1 + if endCount > 0 && jb.level == belowRootLevel { + jb.target.resultC <- ref + return + } + + // retrieve the parent and the corresponding section in it to write to + parent := jb.parent() + nextLevel := jb.level + 1 + parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) + + // in the event that we have a balanced tree and a chunk with single reference below the target level + // we move the single reference up to the penultimate level + if endCount == 1 { + ref = jb.firstSectionData + for parent.level < belowRootLevel { + log.Trace("parent write skip", "level", parent.level) + oldParent := parent + parent = parent.parent() + oldParent.destroy() + nextLevel += 1 + parentSection = dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) + } + } + parent.write(parentSection, ref) + +} + +// determine whether the given data section count falls within the span of the current job +func (jb *job) targetWithinJob(targetSection int) (int, bool) { + var endCount int + var ok bool + + // span one level above equals the data size of 128 units of one section on this level + // using the span table saves one multiplication + //dataBoundary := dataSectionToLevelBoundary(jb.params, jb.level, jb.dataSection) + dataBoundary := dataSectionToLevelBoundary(jb.params, jb.level, jb.dataSection) + upperLimit := dataBoundary + jb.params.Spans[jb.level+1] + + // the data section is the data section index where the span of this job starts + if targetSection >= dataBoundary && targetSection < upperLimit { + + // data section index must be divided by corresponding section size on the job's level + // then wrap on branch period to find the correct section within this job + endCount = (targetSection / jb.params.Spans[jb.level]) % jb.params.Branches + + ok = true + } + log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endcount", endCount, "ok", ok) + return int(endCount), ok +} + +// if last data index falls within the span, return the appropriate end count for the level +// otherwise return 0 (which means job write until limit) +func (jb *job) targetCountToEndCount(targetCount int) int { + endIndex, ok := jb.targetWithinJob(targetCount - 1) + if !ok { + return 0 + } + return endIndex + 1 +} + +// returns the parent job of the receiver job +// a new parent job is created if none exists for the slot +func (jb *job) parent() *job { + jb.index.mu.Lock() + defer jb.index.mu.Unlock() + newLevel := jb.level + 1 + // Truncate to even quotient which is the actual logarithmic boundary of the data section under the span + newDataSection := dataSectionToLevelBoundary(jb.params, jb.level+1, jb.dataSection) + parent := jb.index.Get(newLevel, newDataSection) + if parent != nil { + return parent + } + return newJob(jb.params, jb.target, jb.index, jb.level+1, newDataSection) +} + +// Next creates the job for the next data section span on the same level as the receiver job +// this is only meant to be called once for each job, consequtive calls will overwrite index with new empty job +func (jb *job) Next() *job { + return newJob(jb.params, jb.target, jb.index, jb.level, jb.dataSection+jb.params.Spans[jb.level+1]) +} + +// cleans up the job; reset hasher and remove pointer to job from index +func (jb *job) destroy() { + jb.writer.Reset() + jb.index.Delete(jb) +} diff --git a/file/job_test.go b/file/job_test.go new file mode 100644 index 0000000000..7c23a96090 --- /dev/null +++ b/file/job_test.go @@ -0,0 +1,664 @@ +package file + +import ( + "bytes" + "context" + "fmt" + "hash" + "math/rand" + "strconv" + "strings" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +var ( + dummyHashFunc = func() bmt.SectionWriter { + return newDummySectionWriter(chunkSize*branches, sectionSize) + } + // placeholder for cases where a hasher is not necessary + noHashFunc = func() bmt.SectionWriter { + return nil + } +) + +// simple bmt.SectionWriter hasher that keeps the data written to it +// for later inspection +// TODO: see if this can be replaced with the fake hasher from storage module +type dummySectionWriter struct { + data []byte + sectionSize int + writer hash.Hash +} + +func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { + return &dummySectionWriter{ + data: make([]byte, cp), + sectionSize: sectionSize, + writer: sha3.NewLegacyKeccak256(), + } +} + +// implements bmt.SectionWriter +// BUG: not actually writing to hasher +func (d *dummySectionWriter) Write(index int, data []byte) { + copy(d.data[index*sectionSize:], data) +} + +// implements bmt.SectionWriter +func (d *dummySectionWriter) Sum(b []byte, size int, span []byte) []byte { + return d.writer.Sum(b) +} + +// implements bmt.SectionWriter +func (d *dummySectionWriter) Reset() { + d.data = make([]byte, len(d.data)) + d.writer.Reset() +} + +// implements bmt.SectionWriter +func (d *dummySectionWriter) SectionSize() int { + return d.sectionSize +} + +// TestDummySectionWriter +func TestDummySectionWriter(t *testing.T) { + + w := newDummySectionWriter(chunkSize*2, sectionSize) + w.Reset() + + data := make([]byte, 32) + rand.Seed(23115) + c, err := rand.Read(data) + if err != nil { + t.Fatal(err) + } + if c < 32 { + t.Fatalf("short read %d", c) + } + + w.Write(branches, data) + if !bytes.Equal(w.data[chunkSize:chunkSize+32], data) { + t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+32], data) + } + + correctDigest := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + digest := w.Sum(nil, chunkSize*2, nil) + if hexutil.Encode(digest) != correctDigest { + t.Fatalf("Digest: expected %s, got %x", correctDigest, digest) + } +} + +// TestTreeParams verifies that params are set correctly by the param constructor +func TestTreeParams(t *testing.T) { + + params := newTreeParams(sectionSize, branches, noHashFunc) + + if params.SectionSize != 32 { + t.Fatalf("section: expected %d, got %d", sectionSize, params.SectionSize) + } + + if params.Branches != 128 { + t.Fatalf("branches: expected %d, got %d", branches, params.SectionSize) + } + + if params.Spans[2] != branches*branches { + t.Fatalf("span %d: expected %d, got %d", 2, branches*branches, params.Spans[1]) + } + +} + +// TestTarget verifies that params are set correctly by the target constructor +func TestTarget(t *testing.T) { + + tgt := newTarget() + tgt.Set(32, 1, 2) + + if tgt.size != 32 { + t.Fatalf("target size expected %d, got %d", 32, tgt.size) + } + + if tgt.sections != 1 { + t.Fatalf("target sections expected %d, got %d", 1, tgt.sections) + } + + if tgt.level != 2 { + t.Fatalf("target level expected %d, got %d", 2, tgt.level) + } +} + +// TestTargetWithinJob verifies the calculation of whether a final data section index +// falls within a particular job's span +func TestTargetWithinJob(t *testing.T) { + params := newTreeParams(sectionSize, branches, dummyHashFunc) + params.Debug = true + index := newJobIndex(9) + + jb := newJob(params, nil, index, 1, branches*branches) + defer jb.destroy() + + finalSize := chunkSize*branches + chunkSize*2 + finalCount := dataSizeToSectionCount(finalSize, sectionSize) + log.Trace("within test", "size", finalSize, "count", finalCount) + c, ok := jb.targetWithinJob(finalCount - 1) + if !ok { + t.Fatalf("target %d within %d: expected true", finalCount, jb.level) + } + if c != 1 { + t.Fatalf("target %d within %d: expected %d, got %d", finalCount, jb.level, 2, c) + } +} + +// TestNewJob verifies that a job is initialized with the correct values +func TestNewJob(t *testing.T) { + + params := newTreeParams(sectionSize, branches, dummyHashFunc) + params.Debug = true + + tgt := newTarget() + jb := newJob(params, tgt, nil, 1, branches*branches+1) + if jb.level != 1 { + t.Fatalf("job level expected 1, got %d", jb.level) + } + if jb.dataSection != branches*branches+1 { + t.Fatalf("datasectionindex: expected %d, got %d", branches+1, jb.dataSection) + } + tgt.Set(0, 0, 0) + jb.destroy() +} + +// TestJobSize verifies the data size calculation used for calculating the span of data +// under a particular level reference +// it tests both a balanced and an unbalanced tree +func TestJobSize(t *testing.T) { + params := newTreeParams(sectionSize, branches, dummyHashFunc) + params.Debug = true + index := newJobIndex(9) + + tgt := newTarget() + jb := newJob(params, tgt, index, 3, 0) + jb.cursorSection = 1 + jb.endCount = 1 + size := chunkSize*branches + chunkSize + sections := dataSizeToSectionIndex(size, sectionSize) + 1 + tgt.Set(size, sections, 3) + jobSize := jb.size() + if jobSize != size { + t.Fatalf("job size: expected %d, got %d", size, jobSize) + } + jb.destroy() + + tgt = newTarget() + jb = newJob(params, tgt, index, 3, 0) + jb.cursorSection = 1 + jb.endCount = 1 + size = chunkSize * branches * branches + sections = dataSizeToSectionIndex(size, sectionSize) + 1 + tgt.Set(size, sections, 3) + jobSize = jb.size() + if jobSize != size { + t.Fatalf("job size: expected %d, got %d", size, jobSize) + } + jb.destroy() + +} + +// TestJobTarget verifies that the underlying calculation for determining whether +// a data section index is within a level's span is correct +func TestJobTarget(t *testing.T) { + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + params.Debug = true + index := newJobIndex(9) + + jb := newJob(params, tgt, index, 1, branches*branches) + + // this is less than chunksize * 128 + // it will not be in the job span + finalSize := chunkSize + sectionSize + 1 + finalSection := dataSizeToSectionIndex(finalSize, sectionSize) + c, ok := jb.targetWithinJob(finalSection) + if ok { + t.Fatalf("targetwithinjob: expected false") + } + jb.destroy() + + // chunkSize*128+chunkSize*2 (532480) is within chunksize*128 (524288) and chunksize*128*2 (1048576) + // it will be within the job span + finalSize = chunkSize*branches + chunkSize*2 + finalSection = dataSizeToSectionIndex(finalSize, sectionSize) + c, ok = jb.targetWithinJob(finalSection) + if !ok { + t.Fatalf("targetwithinjob section %d: expected true", branches*branches) + } + if c != 1 { + t.Fatalf("targetwithinjob section %d: expected %d, got %d", branches*branches, 1, c) + } + c = jb.targetCountToEndCount(finalSection + 1) + if c != 2 { + t.Fatalf("targetcounttoendcount section %d: expected %d, got %d", branches*branches, 2, c) + } + jb.destroy() +} + +// TestJobIndex verifies that the job constructor adds the job to the job index +// and removes it on job destruction +func TestJobIndex(t *testing.T) { + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + + jb := newJob(params, tgt, nil, 1, branches) + jobIndex := jb.index + jbGot := jobIndex.Get(1, branches) + if jb != jbGot { + t.Fatalf("jbIndex get: expect %p, got %p", jb, jbGot) + } + jbGot.destroy() + if jobIndex.Get(1, branches) != nil { + t.Fatalf("jbIndex delete: expected nil") + } +} + +// TestGetJobNext verifies that the new job constructed through the job.Next() method +// has the correct level and data section index +func TestGetJobNext(t *testing.T) { + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + params.Debug = true + + jb := newJob(params, tgt, nil, 1, branches*branches) + jbn := jb.Next() + if jbn == nil { + t.Fatalf("parent: nil") + } + if jbn.level != 1 { + t.Fatalf("nextjob level: expected %d, got %d", 2, jbn.level) + } + if jbn.dataSection != jb.dataSection+branches*branches { + t.Fatalf("nextjob section: expected %d, got %d", jb.dataSection+branches*branches, jbn.dataSection) + } +} + +// TestJobWriteTwoAndFinish writes two references to a job and sets the job target to two chunks +// it verifies that the job count after the writes is two, and the hash is correct +func TestJobWriteTwoAndFinish(t *testing.T) { + + tgt := newTarget() + params := newTreeParams(sectionSize*2, branches, dummyHashFunc) + + jb := newJob(params, tgt, nil, 1, 0) + _, data := testutil.SerialData(sectionSize*2, 255, 0) + jb.write(0, data[:sectionSize]) + jb.write(1, data[:sectionSize]) + + finalSize := chunkSize * 2 + finalSection := dataSizeToSectionIndex(finalSize, sectionSize) + tgt.Set(finalSize, finalSection, 2) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*199) + defer cancel() + select { + case ref := <-tgt.Done(): + correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + refHex := hexutil.Encode(ref) + if refHex != correctRefHex { + t.Fatalf("job write full: expected %s, got %s", correctRefHex, refHex) + } + case <-ctx.Done(): + t.Fatalf("timeout: %v", ctx.Err()) + } + + if jb.count() != 2 { + t.Fatalf("jobcount: expected %d, got %d", 2, jb.count()) + } +} + +// TestGetJobParent verifies that the parent returned from two jobs' parent() calls +// that are within the same span as the parent chunk of references is the same +// BUG: not guaranteed to return same parent when run with eg -count 100 +func TestGetJobParent(t *testing.T) { + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + + jb := newJob(params, tgt, nil, 1, branches*branches) + jbp := jb.parent() + if jbp == nil { + t.Fatalf("parent: nil") + } + if jbp.level != 2 { + t.Fatalf("parent level: expected %d, got %d", 2, jbp.level) + } + if jbp.dataSection != 0 { + t.Fatalf("parent data section: expected %d, got %d", 0, jbp.dataSection) + } + jbGot := jb.index.Get(2, 0) + if jbGot == nil { + t.Fatalf("index get: nil") + } + + jbNext := jb.Next() + jbpNext := jbNext.parent() + if jbpNext != jbp { + t.Fatalf("next parent: expected %p, got %p", jbp, jbpNext) + } +} + +// TestWriteParentSection verifies that a data write translates to a write +// in the correct section of its parent +func TestWriteParentSection(t *testing.T) { + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + index := newJobIndex(9) + + jb := newJob(params, tgt, index, 1, 0) + jbn := jb.Next() + _, data := testutil.SerialData(sectionSize*2, 255, 0) + jbn.write(0, data[:sectionSize]) + jbn.write(1, data[sectionSize:]) + + finalSize := chunkSize*branches + chunkSize*2 + finalSection := dataSizeToSectionIndex(finalSize, sectionSize) + tgt.Set(finalSize, finalSection, 3) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + select { + case <-tgt.Done(): + t.Fatalf("unexpected done") + case <-ctx.Done(): + } + jbnp := jbn.parent() + if jbnp.count() != 1 { + t.Fatalf("parent count: expected %d, got %d", 1, jbnp.count()) + } + correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + parentRef := jbnp.writer.(*dummySectionWriter).data[32:64] + parentRefHex := hexutil.Encode(parentRef) + if parentRefHex != correctRefHex { + t.Fatalf("parent data: expected %s, got %s", correctRefHex, parentRefHex) + } +} + +// TestJobWriteFull verifies the hashing result of the write of a balanced tree +// where the simulated tree is chunkSize*branches worth of data +func TestJobWriteFull(t *testing.T) { + + tgt := newTarget() + params := newTreeParams(sectionSize, branches, dummyHashFunc) + + jb := newJob(params, tgt, nil, 1, 0) + + _, data := testutil.SerialData(chunkSize, 255, 0) + for i := 0; i < branches; i++ { + jb.write(i, data[i*sectionSize:i*sectionSize+sectionSize]) + } + + tgt.Set(chunkSize, branches, 2) + correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + select { + case ref := <-tgt.Done(): + refHex := hexutil.Encode(ref) + if refHex != correctRefHex { + t.Fatalf("job write full: expected %s, got %s", correctRefHex, refHex) + } + case <-ctx.Done(): + t.Fatalf("timeout: %v", ctx.Err()) + } + if jb.count() != branches { + t.Fatalf("jobcount: expected %d, got %d", 32, jb.count()) + } +} + +// TestJobWriteSpan uses the bmt asynchronous hasher +// it verifies that a result can be attained at chunkSize+sectionSize*2 references +// which translates to chunkSize*branches+chunkSize*2 bytes worth of data +func TestJobWriteSpan(t *testing.T) { + + tgt := newTarget() + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + hashFunc := func() bmt.SectionWriter { + return bmt.New(pool).NewAsyncWriter(false) + } + params := newTreeParams(sectionSize, branches, hashFunc) + + jb := newJob(params, tgt, nil, 1, 0) + _, data := testutil.SerialData(chunkSize+sectionSize*2, 255, 0) + + for i := 0; i < chunkSize; i += sectionSize { + jb.write(i/sectionSize, data[i:i+sectionSize]) + } + jbn := jb.Next() + jbn.write(0, data[chunkSize:chunkSize+sectionSize]) + jbn.write(1, data[chunkSize+sectionSize:]) + finalSize := chunkSize*branches + chunkSize*2 + finalSection := dataSizeToSectionIndex(finalSize, sectionSize) + tgt.Set(finalSize, finalSection, 3) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + select { + case ref := <-tgt.Done(): + // TODO: double check that this hash if correct!! + refCorrectHex := "0xee56134cab34a5a612648dcc22d88b7cb543081bd144906dfc4fa93802c9addf" + refHex := hexutil.Encode(ref) + if refHex != refCorrectHex { + t.Fatalf("writespan sequential: expected %s, got %s", refCorrectHex, refHex) + } + case <-ctx.Done(): + t.Fatalf("timeout: %v", ctx.Err()) + } + + sz := jb.size() + if sz != chunkSize*branches { + t.Fatalf("job 1 size: expected %d, got %d", chunkSize, sz) + } + + sz = jbn.size() + if sz != chunkSize*2 { + t.Fatalf("job 2 size: expected %d, got %d", sectionSize, sz) + } +} + +// TestJobWriteSpanShuffle does the same as TestJobWriteSpan but +// shuffles the indices of the first chunk write +// verifying that sequential use of the underlying hasher is not required +func TestJobWriteSpanShuffle(t *testing.T) { + + tgt := newTarget() + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + hashFunc := func() bmt.SectionWriter { + return bmt.New(pool).NewAsyncWriter(false) + } + params := newTreeParams(sectionSize, branches, hashFunc) + + jb := newJob(params, tgt, nil, 1, 0) + _, data := testutil.SerialData(chunkSize+sectionSize*2, 255, 0) + + var idxs []int + for i := 0; i < branches; i++ { + idxs = append(idxs, i) + } + rand.Shuffle(branches, func(i int, j int) { + idxs[i], idxs[j] = idxs[j], idxs[i] + }) + for _, idx := range idxs { + jb.write(idx, data[idx*sectionSize:idx*sectionSize+sectionSize]) + } + + jbn := jb.Next() + jbn.write(0, data[chunkSize:chunkSize+sectionSize]) + jbn.write(1, data[chunkSize+sectionSize:]) + finalSize := chunkSize*branches + chunkSize*2 + finalSection := dataSizeToSectionIndex(finalSize, sectionSize) + tgt.Set(finalSize, finalSection, 3) + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + select { + case ref := <-tgt.Done(): + refCorrectHex := "0xee56134cab34a5a612648dcc22d88b7cb543081bd144906dfc4fa93802c9addf" + refHex := hexutil.Encode(ref) + jbparent := jb.parent() + jbnparent := jbn.parent() + log.Info("succeeding", "jb count", jb.count(), "jbn count", jbn.count(), "jb parent count", jbparent.count(), "jbn parent count", jbnparent.count()) + if refHex != refCorrectHex { + t.Fatalf("writespan sequential: expected %s, got %s", refCorrectHex, refHex) + } + case <-ctx.Done(): + + jbparent := jb.parent() + jbnparent := jbn.parent() + log.Error("failing", "jb count", jb.count(), "jbn count", jbn.count(), "jb parent count", jbparent.count(), "jbn parent count", jbnparent.count()) + t.Fatalf("timeout: %v", ctx.Err()) + } + + sz := jb.size() + if sz != chunkSize*branches { + t.Fatalf("job size: expected %d, got %d", chunkSize*branches, sz) + } + + sz = jbn.size() + if sz != chunkSize*2 { + t.Fatalf("job size: expected %d, got %d", chunkSize*branches, sz) + } +} + +// TestVectors executes the barebones functionality of the hasher +// and verifies against source of truth results generated from the reference hasher +// for the same data +// TODO: vet dynamically against the referencefilehasher instead of expect vector +func TestVectors(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHash := bmt.New(poolSync) + params := newTreeParams(sectionSize, branches, refHashFunc) + + for i := start; i < end; i++ { + tgt := newTarget() + dataLength := dataLengths[i] + _, data := testutil.SerialData(dataLength, 255, 0) + jb := newJob(params, tgt, nil, 1, 0) + count := 0 + log.Info("test vector", "length", dataLength) + for i := 0; i < dataLength; i += chunkSize { + ie := i + chunkSize + if ie > dataLength { + ie = dataLength + } + writeSize := ie - i + span := lengthToSpan(writeSize) + log.Debug("data write", "i", i, "length", writeSize, "span", span) + dataHash.ResetWithLength(span) + c, err := dataHash.Write(data[i:ie]) + if err != nil { + jb.destroy() + t.Fatalf("data ref fail: %v", err) + } + if c != ie-i { + jb.destroy() + t.Fatalf("data ref short write: expect %d, got %d", ie-i, c) + } + ref := dataHash.Sum(nil) + log.Debug("data ref", "i", i, "ie", ie, "data", hexutil.Encode(ref)) + jb.write(count, ref) + count += 1 + if ie%(chunkSize*branches) == 0 { + jb = jb.Next() + count = 0 + } + } + dataSections := dataSizeToSectionIndex(dataLength, params.SectionSize) + tgt.Set(dataLength, dataSections, getLevelsFromLength(dataLength, params.SectionSize, params.Branches)) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*1000) + defer cancel() + select { + case ref := <-tgt.Done(): + refCorrectHex := "0x" + expected[i] + refHex := hexutil.Encode(ref) + if refHex != refCorrectHex { + t.Fatalf("writespan sequential %d/%d: expected %s, got %s", i, dataLength, refCorrectHex, refHex) + } + case <-ctx.Done(): + t.Fatalf("timeout: %v", ctx.Err()) + } + } +} + +// BenchmarkVector generates benchmarks that are comparable to the pyramid hasher +func BenchmarkVector(b *testing.B) { + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkVector) + } +} + +func benchmarkVector(b *testing.B) { + params := strings.Split(b.Name(), "/") + dataLengthParam, err := strconv.ParseInt(params[2], 10, 64) + if err != nil { + b.Fatal(err) + } + dataLength := int(dataLengthParam) + + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHash := bmt.New(poolSync) + treeParams := newTreeParams(sectionSize, branches, refHashFunc) + + for j := 0; j < b.N; j++ { + tgt := newTarget() + _, data := testutil.SerialData(dataLength, 255, 0) + jb := newJob(treeParams, tgt, nil, 1, 0) + count := 0 + log.Info("test vector", "length", dataLength) + for i := 0; i < dataLength; i += chunkSize { + ie := i + chunkSize + if ie > dataLength { + ie = dataLength + } + writeSize := ie - i + span := lengthToSpan(writeSize) + log.Debug("data write", "i", i, "length", writeSize, "span", span) + dataHash.ResetWithLength(span) + c, err := dataHash.Write(data[i:ie]) + if err != nil { + jb.destroy() + b.Fatalf("data ref fail: %v", err) + } + if c != ie-i { + jb.destroy() + b.Fatalf("data ref short write: expect %d, got %d", ie-i, c) + } + ref := dataHash.Sum(nil) + log.Debug("data ref", "i", i, "ie", ie, "data", hexutil.Encode(ref)) + jb.write(count, ref) + count += 1 + if ie%(chunkSize*branches) == 0 { + jb = jb.Next() + count = 0 + } + } + dataSections := dataSizeToSectionIndex(dataLength, treeParams.SectionSize) + tgt.Set(dataLength, dataSections, getLevelsFromLength(dataLength, treeParams.SectionSize, treeParams.Branches)) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*1000) + defer cancel() + select { + case <-tgt.Done(): + case <-ctx.Done(): + b.Fatalf("timeout: %v", ctx.Err()) + } + } +} diff --git a/file/pyramid_test.go b/file/pyramid_test.go new file mode 100644 index 0000000000..17a5f7185e --- /dev/null +++ b/file/pyramid_test.go @@ -0,0 +1,49 @@ +package file + +import ( + "bytes" + "context" + "fmt" + "io" + "strconv" + "strings" + "testing" + + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/storage" + "github.com/ethersphere/swarm/testutil" +) + +func BenchmarkPyramidHasher(b *testing.B) { + + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkPyramidHasher) + } +} + +func benchmarkPyramidHasher(b *testing.B) { + params := strings.Split(b.Name(), "/") + dataLength, err := strconv.ParseInt(params[1], 10, 64) + if err != nil { + b.Fatal(err) + } + _, data := testutil.SerialData(int(dataLength), 255, 0) + buf := bytes.NewReader(data) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + buf.Seek(0, io.SeekStart) + //putGetter := newTestHasherStore(&storage.FakeChunkStore{}, storage.BMTHash) + putGetter := storage.NewHasherStore(&storage.FakeChunkStore{}, storage.MakeHashFunc(storage.BMTHash), false, chunk.NewTag(0, "foo", 0, false)) + + ctx := context.Background() + _, wait, err := storage.PyramidSplit(ctx, buf, putGetter, putGetter, chunk.NewTag(0, "foo", dataLength/4096+1, false)) + if err != nil { + b.Fatalf(err.Error()) + } + err = wait(ctx) + if err != nil { + b.Fatalf(err.Error()) + } + } +} diff --git a/file/type.go b/file/type.go new file mode 100644 index 0000000000..eba4a3a26b --- /dev/null +++ b/file/type.go @@ -0,0 +1,8 @@ +package file + +import "hash" + +// SectionWriter is a chainable interface for file-based operations in swarm +type SectionWriter interface { + hash.Hash +} From bc1e288b0c918106ce078f86f121a97d5602d72e Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 08:38:52 +0100 Subject: [PATCH 08/67] file: Fix races --- file/common_test.go | 4 ++-- file/job.go | 22 ++++++++++++++++------ file/job_test.go | 4 +--- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 4da69e8d94..7ee21d0036 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -57,8 +57,8 @@ var ( "1d1bae3a0f2d3ef6b58df4fd6c55c2d3752339b6b474eaab52c579fafe336bfa", // 20 } - start = 20 - end = 21 //len(dataLengths) - 1 + start = 0 + end = len(dataLengths) - 1 ) func init() { diff --git a/file/job.go b/file/job.go index e8ecfbe7ae..a8d0415445 100644 --- a/file/job.go +++ b/file/job.go @@ -80,6 +80,12 @@ func (ji *jobIndex) GetTopHash(lvl int) []byte { return ji.topHashes[lvl-1] } +func (ji *jobIndex) GetTopHashLevel() int { + ji.mu.Lock() + defer ji.mu.Unlock() + return len(ji.topHashes) +} + // passed to a job to determine at which data lengths and levels a job should terminate type target struct { size int32 // bytes written @@ -199,9 +205,12 @@ func (jb *job) write(index int, data []byte) { // in case of a balanced tree and we need to send it to resultC later // at the time of hasing of a balanced tree we have no way of knowing for sure whether // that is the end of the job or not - if len(jb.index.topHashes) < jb.level && jb.dataSection == 0 { - log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) - jb.index.AddTopHash(data) + if jb.dataSection == 0 { + topHashLevel := jb.index.GetTopHashLevel() + if topHashLevel < jb.level { + log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + jb.index.AddTopHash(data) + } } jb.writeC <- jobUnit{ index: index, @@ -281,7 +290,8 @@ OUTER: } } - if int(jb.target.level) == jb.level { + targetLevel := atomic.LoadInt32(&jb.target.level) + if int(targetLevel) == jb.level { jb.target.resultC <- jb.index.GetTopHash(jb.level) return } @@ -290,12 +300,12 @@ OUTER: size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", jb.target.level, "endcount", endCount) + log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level // the hash from the level below the target level will be the result - belowRootLevel := int(jb.target.level) - 1 + belowRootLevel := int(targetLevel) - 1 if endCount > 0 && jb.level == belowRootLevel { jb.target.resultC <- ref return diff --git a/file/job_test.go b/file/job_test.go index 7c23a96090..8ecb8dc92e 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -516,7 +516,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { jbparent := jb.parent() jbnparent := jbn.parent() - log.Error("failing", "jb count", jb.count(), "jbn count", jbn.count(), "jb parent count", jbparent.count(), "jbn parent count", jbnparent.count()) + log.Error("failing", "jb count", jb.count(), "jbn count", jbn.count(), "jb parent count", jbparent.count(), "jbn parent count", jbnparent.count(), "jb parent p", fmt.Sprintf("%p", jbparent), "jbn parent p", fmt.Sprintf("%p", jbnparent)) t.Fatalf("timeout: %v", ctx.Err()) } @@ -631,7 +631,6 @@ func benchmarkVector(b *testing.B) { } writeSize := ie - i span := lengthToSpan(writeSize) - log.Debug("data write", "i", i, "length", writeSize, "span", span) dataHash.ResetWithLength(span) c, err := dataHash.Write(data[i:ie]) if err != nil { @@ -643,7 +642,6 @@ func benchmarkVector(b *testing.B) { b.Fatalf("data ref short write: expect %d, got %d", ie-i, c) } ref := dataHash.Sum(nil) - log.Debug("data ref", "i", i, "ie", ie, "data", hexutil.Encode(ref)) jb.write(count, ref) count += 1 if ie%(chunkSize*branches) == 0 { From 300d373c16116db23cd4802f8633a4db477e8a22 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 10:06:52 +0100 Subject: [PATCH 09/67] file: Use all test cases in vector test even if fail --- file/common_test.go | 2 +- file/job_test.go | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 7ee21d0036..524778d6b6 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = len(dataLengths) - 1 + end = len(dataLengths) ) func init() { diff --git a/file/job_test.go b/file/job_test.go index 8ecb8dc92e..9ffe11135f 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -543,6 +543,7 @@ func TestVectors(t *testing.T) { } dataHash := bmt.New(poolSync) params := newTreeParams(sectionSize, branches, refHashFunc) + var mismatch int for i := start; i < end; i++ { tgt := newTarget() @@ -580,6 +581,7 @@ func TestVectors(t *testing.T) { } dataSections := dataSizeToSectionIndex(dataLength, params.SectionSize) tgt.Set(dataLength, dataSections, getLevelsFromLength(dataLength, params.SectionSize, params.Branches)) + eq := true ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*1000) defer cancel() select { @@ -587,11 +589,17 @@ func TestVectors(t *testing.T) { refCorrectHex := "0x" + expected[i] refHex := hexutil.Encode(ref) if refHex != refCorrectHex { - t.Fatalf("writespan sequential %d/%d: expected %s, got %s", i, dataLength, refCorrectHex, refHex) + mismatch++ + eq = false } + t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, ref, expected[i]) case <-ctx.Done(): t.Fatalf("timeout: %v", ctx.Err()) } + + } + if mismatch > 0 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) } } From 8d3e02062957a6fd8123d1437321d2f26a20097f Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 13:41:34 +0100 Subject: [PATCH 10/67] file: Use local chan pointer to avoid tight loop in doneC --- file/common_test.go | 2 +- file/hasher_r_test.go | 1 + file/job.go | 63 ++++++++++++++++++++++++++++--------------- file/job_test.go | 4 +-- 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 524778d6b6..b0afd64143 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = len(dataLengths) + end = 1 //len(dataLengths) ) func init() { diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index 04200f6ed7..8be8aab010 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -17,6 +17,7 @@ import ( // TestReferenceFileHasherDanglingChunk explicitly tests the edge case where a single chunk hash after a balanced tree // should skip to the level with a single reference func TestReferenceFileHasherDanglingChunk(t *testing.T) { + t.Skip("too big") pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) h := bmt.New(pool) r, data := testutil.SerialData(chunkSize*branches*branches+sectionSize, 255, 0) diff --git a/file/job.go b/file/job.go index a8d0415445..14003957cd 100644 --- a/file/job.go +++ b/file/job.go @@ -93,6 +93,7 @@ type target struct { level int32 // target level calculated from bytes written against branching factor and sector size resultC chan []byte // channel to receive root hash doneC chan struct{} // when this channel is closed all jobs will calculate their end write count + mu sync.Mutex } func newTarget() *target { @@ -105,9 +106,14 @@ func newTarget() *target { // Set is called when the final length of the data to be written is known // TODO: method can be simplified to calculate sections and level internally func (t *target) Set(size int, sections int, level int) { - atomic.StoreInt32(&t.size, int32(size)) - atomic.StoreInt32(&t.sections, int32(sections)) - atomic.StoreInt32(&t.level, int32(level)) + t.mu.Lock() + defer t.mu.Unlock() + t.size = int32(size) + t.sections = int32(sections) + t.level = int32(level) + // atomic.StoreInt32(&t.size, int32(size)) + // atomic.StoreInt32(&t.sections, int32(sections)) + // atomic.StoreInt32(&t.level, int32(level)) log.Trace("target set", "size", size, "sections", sections, "level", level) close(t.doneC) } @@ -176,17 +182,21 @@ func (jb *job) String() string { // atomically increments the write counter of the job func (jb *job) inc() int { return int(atomic.AddInt32(&jb.cursorSection, 1)) + //jb.cursorSection++ + //return int(jb.cursorSection) } // atomically returns the write counter of the job func (jb *job) count() int { return int(atomic.LoadInt32(&jb.cursorSection)) + //return int(jb.cursorSection) } // size returns the byte size of the span the job represents // if job is last index in a level and writes have been finalized, it will return the target size // otherwise, regardless of job index, it will return the size according to the current write count // TODO: returning expected size in one case and actual size in another can lead to confusion +// TODO: two atomic ops, may change value inbetween func (jb *job) size() int { count := jb.count() endCount := int(atomic.LoadInt32(&jb.endCount)) @@ -224,6 +234,7 @@ func (jb *job) write(index int, data []byte) { // - data write is finalized and targetcount is reached on a subsequent job write func (jb *job) process() { + doneC := jb.target.doneC defer jb.destroy() // is set when data write is finished, AND @@ -236,11 +247,12 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: + jb.mu.Lock() if entry.index == 0 { jb.firstSectionData = entry.data } newCount := jb.inc() - log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", jb.count(), "index", entry.index, "data", hexutil.Encode(entry.data)) + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) @@ -249,44 +261,50 @@ OUTER: // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { log.Trace("quitting writec - endcount") + jb.mu.Unlock() break OUTER } if newCount == jb.params.Branches { log.Trace("quitting writec - branches") + jb.mu.Unlock() break OUTER } + jb.mu.Unlock() // enter here if data writes have been completed // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved - case <-jb.target.doneC: + case <-doneC: + + jb.mu.Lock() // we can never have count 0 and have a completed job // this is the easiest check we can make - //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) + log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) count := jb.count() if count == 0 { + jb.mu.Unlock() continue } + doneC = nil + + // if the target count falls within the span of this job + // set the endcount so we know we have to do extra calculations for + // determining span in case of unbalanced tree + targetCount := jb.target.Count() + endCount = jb.targetCountToEndCount(targetCount) + jb.endCount = int32(endCount) + //atomic.StoreInt32(&jb.endCount, int32(endCount)) + log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed if count == int(endCount) { log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + jb.mu.Unlock() break OUTER } - - // if endcount is already calculated, don't calculate it again - if endCount > 0 { - continue - } - - // if the target count falls within the span of this job - // set the endcount so we know we have to do extra calculations for - // determining span in case of unbalanced tree - targetCount := jb.target.Count() - endCount = jb.targetCountToEndCount(targetCount) - atomic.StoreInt32(&jb.endCount, int32(endCount)) + jb.mu.Unlock() } } @@ -313,6 +331,7 @@ OUTER: // retrieve the parent and the corresponding section in it to write to parent := jb.parent() + log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) nextLevel := jb.level + 1 parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) @@ -335,7 +354,7 @@ OUTER: // determine whether the given data section count falls within the span of the current job func (jb *job) targetWithinJob(targetSection int) (int, bool) { - var endCount int + var endIndex int var ok bool // span one level above equals the data size of 128 units of one section on this level @@ -349,12 +368,12 @@ func (jb *job) targetWithinJob(targetSection int) (int, bool) { // data section index must be divided by corresponding section size on the job's level // then wrap on branch period to find the correct section within this job - endCount = (targetSection / jb.params.Spans[jb.level]) % jb.params.Branches + endIndex = (targetSection / jb.params.Spans[jb.level]) % jb.params.Branches ok = true } - log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endcount", endCount, "ok", ok) - return int(endCount), ok + log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) + return int(endIndex), ok } // if last data index falls within the span, return the appropriate end count for the level diff --git a/file/job_test.go b/file/job_test.go index 9ffe11135f..9b0a71e646 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -442,7 +442,7 @@ func TestJobWriteSpan(t *testing.T) { finalSection := dataSizeToSectionIndex(finalSize, sectionSize) tgt.Set(finalSize, finalSection, 3) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*1000) defer cancel() select { case ref := <-tgt.Done(): @@ -631,7 +631,7 @@ func benchmarkVector(b *testing.B) { _, data := testutil.SerialData(dataLength, 255, 0) jb := newJob(treeParams, tgt, nil, 1, 0) count := 0 - log.Info("test vector", "length", dataLength) + //log.Info("test vector", "length", dataLength) for i := 0; i < dataLength; i += chunkSize { ie := i + chunkSize if ie > dataLength { From 781cb6b267ab85945df9aa92bb57a6705bbc7d0c Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 14:12:19 +0100 Subject: [PATCH 11/67] file: Fix race in target members read --- file/common_test.go | 4 +-- file/job.go | 62 +++++++++++++++++++++++++-------------------- file/job_test.go | 2 +- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index b0afd64143..387627b5b9 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -57,8 +57,8 @@ var ( "1d1bae3a0f2d3ef6b58df4fd6c55c2d3752339b6b474eaab52c579fafe336bfa", // 20 } - start = 0 - end = 1 //len(dataLengths) + start = 13 + end = 14 //len(dataLengths) ) func init() { diff --git a/file/job.go b/file/job.go index 14003957cd..99e57b1260 100644 --- a/file/job.go +++ b/file/job.go @@ -5,9 +5,7 @@ import ( "sync" "sync/atomic" - "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/log" ) // keeps an index of all the existing jobs for a file hashing operation @@ -41,7 +39,7 @@ func (ji *jobIndex) String() string { // Add adds a job to the index at the level // and data section index specified in the job func (ji *jobIndex) Add(jb *job) { - log.Trace("adding job", "job", jb) + //log.Trace("adding job", "job", jb) ji.jobs[jb.level].Store(jb.dataSection, jb) } @@ -70,7 +68,7 @@ func (ji *jobIndex) AddTopHash(ref []byte) { ji.mu.Lock() defer ji.mu.Unlock() ji.topHashes = append(ji.topHashes, ref) - log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) + //log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) } // GetJobHash gets the current top hash for a particular level set by AddTopHash @@ -114,14 +112,24 @@ func (t *target) Set(size int, sections int, level int) { // atomic.StoreInt32(&t.size, int32(size)) // atomic.StoreInt32(&t.sections, int32(sections)) // atomic.StoreInt32(&t.level, int32(level)) - log.Trace("target set", "size", size, "sections", sections, "level", level) + //log.Trace("target set", "size", size, "sections", sections, "level", level) close(t.doneC) } // Count returns the total section count for the target // it should only be called after Set() func (t *target) Count() int { - return int(atomic.LoadInt32(&t.sections)) + 1 + t.mu.Lock() + defer t.mu.Unlock() + return int(t.sections) + 1 + //return int(atomic.LoadInt32(&t.sections)) + 1 +} + +func (t *target) Level() int { + t.mu.Lock() + defer t.mu.Unlock() + return int(t.level) + //return int(atomic.LoadInt32(&t.level)) } // Done returns the channel in which the root hash will be sent @@ -203,7 +211,7 @@ func (jb *job) size() int { if endCount == 0 { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } - log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) + //log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) return int(jb.target.size) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } @@ -218,7 +226,7 @@ func (jb *job) write(index int, data []byte) { if jb.dataSection == 0 { topHashLevel := jb.index.GetTopHashLevel() if topHashLevel < jb.level { - log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + //log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) jb.index.AddTopHash(data) } } @@ -247,12 +255,12 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: - jb.mu.Lock() + //jb.mu.Lock() if entry.index == 0 { jb.firstSectionData = entry.data } newCount := jb.inc() - log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + //log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) @@ -260,31 +268,31 @@ OUTER: // which means data write has been completed // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { - log.Trace("quitting writec - endcount") - jb.mu.Unlock() + //log.Trace("quitting writec - endcount") + // jb.mu.Unlock() break OUTER } if newCount == jb.params.Branches { - log.Trace("quitting writec - branches") - jb.mu.Unlock() + //log.Trace("quitting writec - branches") + // jb.mu.Unlock() break OUTER } - jb.mu.Unlock() + //jb.mu.Unlock() // enter here if data writes have been completed // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved case <-doneC: - jb.mu.Lock() + //jb.mu.Lock() // we can never have count 0 and have a completed job // this is the easiest check we can make - log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) count := jb.count() if count == 0 { - jb.mu.Unlock() + // jb.mu.Unlock() continue } + //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) doneC = nil // if the target count falls within the span of this job @@ -294,21 +302,21 @@ OUTER: endCount = jb.targetCountToEndCount(targetCount) jb.endCount = int32(endCount) //atomic.StoreInt32(&jb.endCount, int32(endCount)) - log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", endCount) + //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed if count == int(endCount) { - log.Trace("quitting donec", "level", jb.level, "count", jb.count()) - jb.mu.Unlock() + //log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + // jb.mu.Unlock() break OUTER } - jb.mu.Unlock() + //jb.mu.Unlock() } } - targetLevel := atomic.LoadInt32(&jb.target.level) + targetLevel := jb.target.Level() if int(targetLevel) == jb.level { jb.target.resultC <- jb.index.GetTopHash(jb.level) return @@ -318,7 +326,7 @@ OUTER: size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", endCount) + //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level @@ -331,7 +339,7 @@ OUTER: // retrieve the parent and the corresponding section in it to write to parent := jb.parent() - log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) + //log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) nextLevel := jb.level + 1 parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) @@ -340,7 +348,7 @@ OUTER: if endCount == 1 { ref = jb.firstSectionData for parent.level < belowRootLevel { - log.Trace("parent write skip", "level", parent.level) + //log.Trace("parent write skip", "level", parent.level) oldParent := parent parent = parent.parent() oldParent.destroy() @@ -372,7 +380,7 @@ func (jb *job) targetWithinJob(targetSection int) (int, bool) { ok = true } - log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) + //log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) return int(endIndex), ok } diff --git a/file/job_test.go b/file/job_test.go index 9b0a71e646..7f7f1d7bdc 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -500,7 +500,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { finalSection := dataSizeToSectionIndex(finalSize, sectionSize) tgt.Set(finalSize, finalSection, 3) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() select { case ref := <-tgt.Done(): From a528496c96c0fa0df2ede41c65e7d0df86cc9018 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 14:17:00 +0100 Subject: [PATCH 12/67] file: Cleanup commented code --- file/job.go | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/file/job.go b/file/job.go index 99e57b1260..691caf61fe 100644 --- a/file/job.go +++ b/file/job.go @@ -109,10 +109,6 @@ func (t *target) Set(size int, sections int, level int) { t.size = int32(size) t.sections = int32(sections) t.level = int32(level) - // atomic.StoreInt32(&t.size, int32(size)) - // atomic.StoreInt32(&t.sections, int32(sections)) - // atomic.StoreInt32(&t.level, int32(level)) - //log.Trace("target set", "size", size, "sections", sections, "level", level) close(t.doneC) } @@ -122,14 +118,18 @@ func (t *target) Count() int { t.mu.Lock() defer t.mu.Unlock() return int(t.sections) + 1 - //return int(atomic.LoadInt32(&t.sections)) + 1 } func (t *target) Level() int { t.mu.Lock() defer t.mu.Unlock() return int(t.level) - //return int(atomic.LoadInt32(&t.level)) +} + +func (t *target) Size() int { + t.mu.Lock() + defer t.mu.Unlock() + return int(t.size) } // Done returns the channel in which the root hash will be sent @@ -190,14 +190,11 @@ func (jb *job) String() string { // atomically increments the write counter of the job func (jb *job) inc() int { return int(atomic.AddInt32(&jb.cursorSection, 1)) - //jb.cursorSection++ - //return int(jb.cursorSection) } // atomically returns the write counter of the job func (jb *job) count() int { return int(atomic.LoadInt32(&jb.cursorSection)) - //return int(jb.cursorSection) } // size returns the byte size of the span the job represents @@ -212,7 +209,7 @@ func (jb *job) size() int { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } //log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) - return int(jb.target.size) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) + return int(jb.target.Size()) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } // add data to job @@ -255,7 +252,6 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: - //jb.mu.Lock() if entry.index == 0 { jb.firstSectionData = entry.data } @@ -269,27 +265,21 @@ OUTER: // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { //log.Trace("quitting writec - endcount") - // jb.mu.Unlock() break OUTER } if newCount == jb.params.Branches { //log.Trace("quitting writec - branches") - // jb.mu.Unlock() break OUTER } - //jb.mu.Unlock() // enter here if data writes have been completed // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved case <-doneC: - //jb.mu.Lock() - // we can never have count 0 and have a completed job // this is the easiest check we can make count := jb.count() if count == 0 { - // jb.mu.Unlock() continue } //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) @@ -301,7 +291,6 @@ OUTER: targetCount := jb.target.Count() endCount = jb.targetCountToEndCount(targetCount) jb.endCount = int32(endCount) - //atomic.StoreInt32(&jb.endCount, int32(endCount)) //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", endCount) // if we have reached the end count for this chunk, we proceed to hashing @@ -309,10 +298,8 @@ OUTER: // registers that data writes have been completed if count == int(endCount) { //log.Trace("quitting donec", "level", jb.level, "count", jb.count()) - // jb.mu.Unlock() break OUTER } - //jb.mu.Unlock() } } From a41865617de680ceabad570a5f40d271cb8be7e4 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 16:49:10 +0100 Subject: [PATCH 13/67] file: Remove (properly) tight loop in doneC select --- file/common_test.go | 4 ++-- file/job.go | 18 ++++++++++++------ file/util.go | 4 +--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 387627b5b9..b0afd64143 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -57,8 +57,8 @@ var ( "1d1bae3a0f2d3ef6b58df4fd6c55c2d3752339b6b474eaab52c579fafe336bfa", // 20 } - start = 13 - end = 14 //len(dataLengths) + start = 0 + end = 1 //len(dataLengths) ) func init() { diff --git a/file/job.go b/file/job.go index 691caf61fe..cfc4933d86 100644 --- a/file/job.go +++ b/file/job.go @@ -252,6 +252,7 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: + jb.mu.Lock() if entry.index == 0 { jb.firstSectionData = entry.data } @@ -265,25 +266,28 @@ OUTER: // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { //log.Trace("quitting writec - endcount") + jb.mu.Unlock() break OUTER } if newCount == jb.params.Branches { //log.Trace("quitting writec - branches") + jb.mu.Unlock() break OUTER } + jb.mu.Unlock() // enter here if data writes have been completed // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved case <-doneC: - + jb.mu.Lock() // we can never have count 0 and have a completed job // this is the easiest check we can make - count := jb.count() - if count == 0 { - continue - } + // if count == 0 { + // continue + // } //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) doneC = nil + count := jb.count() // if the target count falls within the span of this job // set the endcount so we know we have to do extra calculations for @@ -296,10 +300,12 @@ OUTER: // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed - if count == int(endCount) { + if count > 0 && count == int(endCount) { //log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + jb.mu.Unlock() break OUTER } + jb.mu.Unlock() } } diff --git a/file/util.go b/file/util.go index cd75f09bd7..b19e111ce5 100644 --- a/file/util.go +++ b/file/util.go @@ -3,8 +3,6 @@ package file import ( "encoding/binary" "math" - - "github.com/ethersphere/swarm/log" ) // creates a binary span size representation @@ -41,7 +39,7 @@ func dataSectionToLevelBoundary(p *treeParams, lvl int, section int) int { span := p.Spans[lvl+1] spans := section / span spanBytes := spans * span - log.Trace("levelboundary", "spans", spans, "section", section, "span", span) + //log.Trace("levelboundary", "spans", spans, "section", section, "span", span) return spanBytes } From f76e00f122b3414b8a275e970298011222909162 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 17:20:17 +0100 Subject: [PATCH 14/67] file: Set target doneC to nil if target set on job create --- file/common_test.go | 2 +- file/job.go | 40 ++++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index b0afd64143..524778d6b6 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = 1 //len(dataLengths) + end = len(dataLengths) ) func init() { diff --git a/file/job.go b/file/job.go index cfc4933d86..642c67ba4c 100644 --- a/file/job.go +++ b/file/job.go @@ -157,6 +157,7 @@ type job struct { writeC chan jobUnit writer bmt.SectionWriter // underlying data processor + doneC chan struct{} // pointer to target doneC channel, set to nil in process() when closed mu sync.Mutex } @@ -170,10 +171,21 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe writer: params.hashFunc(), writeC: make(chan jobUnit), target: tgt, + doneC: nil, } if jb.index == nil { jb.index = newJobIndex(9) } + targetLevel := tgt.Level() + if targetLevel == 0 { + //log.Trace("setting target", "level", lvl) + jb.doneC = tgt.doneC + + } else { + targetCount := tgt.Count() + jb.endCount = int32(jb.targetCountToEndCount(targetCount)) + } + //log.Trace("target count", "level", lvl, "count", tgt.Count()) jb.index.Add(jb) if !params.Debug { @@ -239,13 +251,11 @@ func (jb *job) write(index int, data []byte) { // - data write is finalized and targetcount is reached on a subsequent job write func (jb *job) process() { - doneC := jb.target.doneC defer jb.destroy() // is set when data write is finished, AND // the final data section falls within the span of this job // if not, loop will only exit on Branches writes - endCount := 0 OUTER: for { select { @@ -253,10 +263,12 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: jb.mu.Lock() + newCount := jb.inc() + endCount := int(jb.endCount) + jb.mu.Unlock() if entry.index == 0 { jb.firstSectionData = entry.data } - newCount := jb.inc() //log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) @@ -266,41 +278,37 @@ OUTER: // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { //log.Trace("quitting writec - endcount") - jb.mu.Unlock() break OUTER } if newCount == jb.params.Branches { //log.Trace("quitting writec - branches") - jb.mu.Unlock() break OUTER } - jb.mu.Unlock() // enter here if data writes have been completed // TODO: this case currently executes for all cycles after data write is complete for which writes to this job do not happen. perhaps it can be improved - case <-doneC: + case <-jb.doneC: jb.mu.Lock() + jb.doneC = nil // we can never have count 0 and have a completed job // this is the easiest check we can make // if count == 0 { // continue // } - //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", endCount) - doneC = nil + //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", jb.endCount) count := jb.count() // if the target count falls within the span of this job // set the endcount so we know we have to do extra calculations for // determining span in case of unbalanced tree targetCount := jb.target.Count() - endCount = jb.targetCountToEndCount(targetCount) - jb.endCount = int32(endCount) - //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", endCount) + jb.endCount = int32(jb.targetCountToEndCount(targetCount)) + //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed - if count > 0 && count == int(endCount) { + if count > 0 && count == int(jb.endCount) { //log.Trace("quitting donec", "level", jb.level, "count", jb.count()) jb.mu.Unlock() break OUTER @@ -319,13 +327,13 @@ OUTER: size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", endCount) + //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level // the hash from the level below the target level will be the result belowRootLevel := int(targetLevel) - 1 - if endCount > 0 && jb.level == belowRootLevel { + if jb.endCount > 0 && jb.level == belowRootLevel { jb.target.resultC <- ref return } @@ -338,7 +346,7 @@ OUTER: // in the event that we have a balanced tree and a chunk with single reference below the target level // we move the single reference up to the penultimate level - if endCount == 1 { + if jb.endCount == 1 { ref = jb.firstSectionData for parent.level < belowRootLevel { //log.Trace("parent write skip", "level", parent.level) From 4b87da3910b25d4d0bd7b804b9454ffef058cd1a Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 17:33:44 +0100 Subject: [PATCH 15/67] file: Move serialdata generation in bench outside bench loop --- file/job_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/file/job_test.go b/file/job_test.go index 7f7f1d7bdc..c56ea123b1 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -625,10 +625,10 @@ func benchmarkVector(b *testing.B) { } dataHash := bmt.New(poolSync) treeParams := newTreeParams(sectionSize, branches, refHashFunc) + _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { tgt := newTarget() - _, data := testutil.SerialData(dataLength, 255, 0) jb := newJob(treeParams, tgt, nil, 1, 0) count := 0 //log.Info("test vector", "length", dataLength) From 988ff37dd70d0c94f93da8bba5aade29c20c4735 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 17:45:36 +0100 Subject: [PATCH 16/67] file: Lock dummy writer data buffer access --- file/job_test.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/file/job_test.go b/file/job_test.go index c56ea123b1..1f711edd72 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -8,6 +8,7 @@ import ( "math/rand" "strconv" "strings" + "sync" "testing" "time" @@ -35,6 +36,7 @@ type dummySectionWriter struct { data []byte sectionSize int writer hash.Hash + mu sync.Mutex } func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { @@ -48,16 +50,22 @@ func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { // implements bmt.SectionWriter // BUG: not actually writing to hasher func (d *dummySectionWriter) Write(index int, data []byte) { + d.mu.Lock() + defer d.mu.Unlock() copy(d.data[index*sectionSize:], data) } // implements bmt.SectionWriter func (d *dummySectionWriter) Sum(b []byte, size int, span []byte) []byte { + d.mu.Lock() + defer d.mu.Unlock() return d.writer.Sum(b) } // implements bmt.SectionWriter func (d *dummySectionWriter) Reset() { + d.mu.Lock() + defer d.mu.Unlock() d.data = make([]byte, len(d.data)) d.writer.Reset() } @@ -139,8 +147,9 @@ func TestTargetWithinJob(t *testing.T) { params := newTreeParams(sectionSize, branches, dummyHashFunc) params.Debug = true index := newJobIndex(9) + tgt := newTarget() - jb := newJob(params, nil, index, 1, branches*branches) + jb := newJob(params, tgt, index, 1, branches*branches) defer jb.destroy() finalSize := chunkSize*branches + chunkSize*2 @@ -366,7 +375,7 @@ func TestWriteParentSection(t *testing.T) { finalSection := dataSizeToSectionIndex(finalSize, sectionSize) tgt.Set(finalSize, finalSection, 3) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() select { case <-tgt.Done(): @@ -378,7 +387,13 @@ func TestWriteParentSection(t *testing.T) { t.Fatalf("parent count: expected %d, got %d", 1, jbnp.count()) } correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" - parentRef := jbnp.writer.(*dummySectionWriter).data[32:64] + + // extract data in section 2 from the writer + // TODO: overload writer to provide a get method to extract data to improve clarity + w := jbnp.writer.(*dummySectionWriter) + w.mu.Lock() + parentRef := w.data[32:64] + w.mu.Unlock() parentRefHex := hexutil.Encode(parentRef) if parentRefHex != correctRefHex { t.Fatalf("parent data: expected %s, got %s", correctRefHex, parentRefHex) From 243b92ba0491f8647a8395731d7b3ab9d0a768f7 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 20:18:51 +0100 Subject: [PATCH 17/67] file: Remove commented code --- file/job.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/file/job.go b/file/job.go index 642c67ba4c..894f60e284 100644 --- a/file/job.go +++ b/file/job.go @@ -290,11 +290,6 @@ OUTER: case <-jb.doneC: jb.mu.Lock() jb.doneC = nil - // we can never have count 0 and have a completed job - // this is the easiest check we can make - // if count == 0 { - // continue - // } //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", jb.endCount) count := jb.count() From e491ee0f736c8358537f5cfdc07ff589d3855d84 Mon Sep 17 00:00:00 2001 From: nolash Date: Thu, 28 Nov 2019 21:44:41 +0100 Subject: [PATCH 18/67] file: Update Write and Sum to correct job object interface --- file/hasher.go | 31 +++++++++++++++++++++++++------ file/hasher_test.go | 25 +++++++++++++++++++++++++ file/job.go | 35 +++++++++++++++++++---------------- 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/file/hasher.go b/file/hasher.go index 462fff81eb..96799d7022 100644 --- a/file/hasher.go +++ b/file/hasher.go @@ -10,13 +10,17 @@ import ( // it is intended to be chainable to accommodate for arbitrary chunk manipulation // like encryption, erasure coding etc type Hasher struct { - writer *bmt.Hasher - target *target - params *treeParams - lastJob *job - jobMu sync.Mutex + writer *bmt.Hasher + target *target + params *treeParams + index *jobIndex + + writeC chan []byte + doneC chan struct{} + job *job // current level 1 job being written to writerPool sync.Pool size int + count int } // New creates a new Hasher object @@ -24,26 +28,41 @@ func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc fu h := &Hasher{ writer: dataWriter, target: newTarget(), + index: newJobIndex(9), + writeC: make(chan []byte, branches), } h.writerPool.New = func() interface{} { return refWriterFunc() } h.params = newTreeParams(sectionSize, branches, h.getWriter) + h.job = newJob(h.params, h.target, h.index, 1, 0) return h } // Write implements hash.Hash +// TODO: enforce buffered writes and limits func (h *Hasher) Write(b []byte) { + if h.count > 0 && h.count%branches == 0 { + jb := h.job + h.job = h.job.Next() + jb.destroy() + } + span := lengthToSpan(len(b)) + h.writer.ResetWithLength(span) _, err := h.writer.Write(b) if err != nil { panic(err) } + h.size += len(b) + h.job.write(h.count%h.params.Branches, h.writer.Sum(nil)) + h.count++ + } // Sum implements hash.Hash func (h *Hasher) Sum(_ []byte) []byte { - sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) + 1 + sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) h.target.Set(h.size, sectionCount, targetLevel) var ref []byte diff --git a/file/hasher_test.go b/file/hasher_test.go index b691ba57a4..63feb49bb8 100644 --- a/file/hasher_test.go +++ b/file/hasher_test.go @@ -1 +1,26 @@ package file + +import ( + "testing" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +func TestHasherOneFullChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + dataHash := bmt.New(poolSync) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + + _, data := testutil.SerialData(chunkSize*branches, 255, 0) + h := New(sectionSize, branches, dataHash, refHashFunc) + for i := 0; i < chunkSize*branches; i += chunkSize { + h.Write(data[i : i+chunkSize]) + } + ref := h.Sum(nil) + t.Logf("res: %x", ref) +} diff --git a/file/job.go b/file/job.go index 894f60e284..b54fc2ac68 100644 --- a/file/job.go +++ b/file/job.go @@ -5,7 +5,9 @@ import ( "sync" "sync/atomic" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" ) // keeps an index of all the existing jobs for a file hashing operation @@ -39,7 +41,7 @@ func (ji *jobIndex) String() string { // Add adds a job to the index at the level // and data section index specified in the job func (ji *jobIndex) Add(jb *job) { - //log.Trace("adding job", "job", jb) + log.Trace("adding job", "job", jb) ji.jobs[jb.level].Store(jb.dataSection, jb) } @@ -68,7 +70,7 @@ func (ji *jobIndex) AddTopHash(ref []byte) { ji.mu.Lock() defer ji.mu.Unlock() ji.topHashes = append(ji.topHashes, ref) - //log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) + log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) } // GetJobHash gets the current top hash for a particular level set by AddTopHash @@ -109,6 +111,7 @@ func (t *target) Set(size int, sections int, level int) { t.size = int32(size) t.sections = int32(sections) t.level = int32(level) + log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) close(t.doneC) } @@ -178,14 +181,14 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe } targetLevel := tgt.Level() if targetLevel == 0 { - //log.Trace("setting target", "level", lvl) + log.Trace("target not set", "level", lvl) jb.doneC = tgt.doneC } else { targetCount := tgt.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) } - //log.Trace("target count", "level", lvl, "count", tgt.Count()) + log.Trace("target count", "level", lvl, "count", tgt.Count()) jb.index.Add(jb) if !params.Debug { @@ -220,7 +223,7 @@ func (jb *job) size() int { if endCount == 0 { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } - //log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) + log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) return int(jb.target.Size()) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } @@ -235,7 +238,7 @@ func (jb *job) write(index int, data []byte) { if jb.dataSection == 0 { topHashLevel := jb.index.GetTopHashLevel() if topHashLevel < jb.level { - //log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) jb.index.AddTopHash(data) } } @@ -269,7 +272,7 @@ OUTER: if entry.index == 0 { jb.firstSectionData = entry.data } - //log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) @@ -277,11 +280,11 @@ OUTER: // which means data write has been completed // otherwise if we reached the chunk limit we also continue to hashing if newCount == endCount { - //log.Trace("quitting writec - endcount") + log.Trace("quitting writec - endcount", "c", endCount, "level", jb.level) break OUTER } if newCount == jb.params.Branches { - //log.Trace("quitting writec - branches") + log.Trace("quitting writec - branches") break OUTER } @@ -290,7 +293,7 @@ OUTER: case <-jb.doneC: jb.mu.Lock() jb.doneC = nil - //log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", jb.endCount) + log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", jb.endCount) count := jb.count() // if the target count falls within the span of this job @@ -298,13 +301,13 @@ OUTER: // determining span in case of unbalanced tree targetCount := jb.target.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) - //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) + log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed if count > 0 && count == int(jb.endCount) { - //log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + log.Trace("quitting donec", "level", jb.level, "count", jb.count()) jb.mu.Unlock() break OUTER } @@ -322,7 +325,7 @@ OUTER: size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) + log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level @@ -335,7 +338,7 @@ OUTER: // retrieve the parent and the corresponding section in it to write to parent := jb.parent() - //log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) + log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) nextLevel := jb.level + 1 parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) @@ -344,7 +347,7 @@ OUTER: if jb.endCount == 1 { ref = jb.firstSectionData for parent.level < belowRootLevel { - //log.Trace("parent write skip", "level", parent.level) + log.Trace("parent write skip", "level", parent.level) oldParent := parent parent = parent.parent() oldParent.destroy() @@ -376,7 +379,7 @@ func (jb *job) targetWithinJob(targetSection int) (int, bool) { ok = true } - //log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) + log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) return int(endIndex), ok } From bec33a089e281ce651186bfae08ef23e2122b81d Mon Sep 17 00:00:00 2001 From: nolash Date: Sat, 30 Nov 2019 10:58:52 +0100 Subject: [PATCH 19/67] file: Add hasher benchmark --- file/common_test.go | 2 +- file/hasher.go | 35 +++++++++++++++++++++++++++-------- file/hasher_r_test.go | 3 ++- file/hasher_test.go | 39 +++++++++++++++++++++++++++++++++++++++ file/tree.go | 11 ++++++++++- 5 files changed, 79 insertions(+), 11 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 524778d6b6..b0afd64143 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = len(dataLengths) + end = 1 //len(dataLengths) ) func init() { diff --git a/file/hasher.go b/file/hasher.go index 96799d7022..49d185dffe 100644 --- a/file/hasher.go +++ b/file/hasher.go @@ -19,6 +19,7 @@ type Hasher struct { doneC chan struct{} job *job // current level 1 job being written to writerPool sync.Pool + hasherPool sync.Pool size int count int } @@ -26,7 +27,7 @@ type Hasher struct { // New creates a new Hasher object func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc func() bmt.SectionWriter) *Hasher { h := &Hasher{ - writer: dataWriter, + //writer: dataWriter, target: newTarget(), index: newJobIndex(9), writeC: make(chan []byte, branches), @@ -34,6 +35,9 @@ func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc fu h.writerPool.New = func() interface{} { return refWriterFunc() } + h.hasherPool.New = func() interface{} { + return dataWriterFunc() + } h.params = newTreeParams(sectionSize, branches, h.getWriter) h.job = newJob(h.params, h.target, h.index, 1, 0) @@ -48,14 +52,16 @@ func (h *Hasher) Write(b []byte) { h.job = h.job.Next() jb.destroy() } - span := lengthToSpan(len(b)) - h.writer.ResetWithLength(span) - _, err := h.writer.Write(b) - if err != nil { - panic(err) - } + go func(i int, jb *job) { + hasher := h.getDataWriter(len(b)) + _, err := hasher.Write(b) + if err != nil { + panic(err) + } + h.job.write(h.count%h.params.Branches, h.writer.Sum(nil)) + h.putDataWriter(hasher) + }(h.count, h.job) h.size += len(b) - h.job.write(h.count%h.params.Branches, h.writer.Sum(nil)) h.count++ } @@ -72,6 +78,19 @@ func (h *Hasher) Sum(_ []byte) []byte { return ref } +// proxy for sync.Pool +func (h *Hasher) putDataWriter(w *bmt.Hasher) { + h.hasherPool.Put(w) +} + +// proxy for sync.Pool +func (h *Hasher) getDataWriter() bmt.SectionWriter { + span := lengthToSpan(len(b)) + hasher := h.hasherPool.Get().(*bmt.Hasher) + hasher.ResetWithLength(span) + return hasher +} + // proxy for sync.Pool func (h *Hasher) putWriter(w bmt.SectionWriter) { w.Reset() diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index 8be8aab010..32873d7d05 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -81,10 +81,11 @@ func benchmarkReferenceFileHasher(b *testing.B) { if err != nil { b.Fatal(err) } - r, data := testutil.SerialData(int(dataLength), 255, 0) pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + log.Trace("running reference bench", "l", dataLength) b.ResetTimer() for i := 0; i < b.N; i++ { + r, data := testutil.SerialData(int(dataLength), 255, 0) h := bmt.New(pool) fh := NewReferenceFileHasher(h, branches) fh.Hash(r, len(data)) diff --git a/file/hasher_test.go b/file/hasher_test.go index 63feb49bb8..06e9212c55 100644 --- a/file/hasher_test.go +++ b/file/hasher_test.go @@ -1,6 +1,9 @@ package file import ( + "fmt" + "strconv" + "strings" "testing" "github.com/ethersphere/swarm/bmt" @@ -24,3 +27,39 @@ func TestHasherOneFullChunk(t *testing.T) { ref := h.Sum(nil) t.Logf("res: %x", ref) } + +// BenchmarkHasher generates benchmarks that are comparable to the pyramid hasher +func BenchmarkHasher(b *testing.B) { + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkHasher) + } +} + +func benchmarkHasher(b *testing.B) { + params := strings.Split(b.Name(), "/") + dataLengthParam, err := strconv.ParseInt(params[2], 10, 64) + if err != nil { + b.Fatal(err) + } + dataLength := int(dataLengthParam) + + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHash := bmt.New(poolSync) + _, data := testutil.SerialData(dataLength, 255, 0) + + for j := 0; j < b.N; j++ { + h := New(sectionSize, branches, dataHash, refHashFunc) + for i := 0; i < dataLength; i += chunkSize { + size := chunkSize + if dataLength-i < chunkSize { + size = dataLength - i + } + h.Write(data[i : i+size]) + } + h.Sum(nil) + } +} diff --git a/file/tree.go b/file/tree.go index 7ce5c7fa6c..49e97d360b 100644 --- a/file/tree.go +++ b/file/tree.go @@ -1,6 +1,10 @@ package file -import "github.com/ethersphere/swarm/bmt" +import ( + "sync" + + "github.com/ethersphere/swarm/bmt" +) // defines the boundaries of the hashing job and also contains the hash factory functino of the job // setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) @@ -10,6 +14,7 @@ type treeParams struct { Spans []int Debug bool hashFunc func() bmt.SectionWriter + writerPool sync.Pool } func newTreeParams(section int, branches int, hashFunc func() bmt.SectionWriter) *treeParams { @@ -19,6 +24,10 @@ func newTreeParams(section int, branches int, hashFunc func() bmt.SectionWriter) Branches: branches, hashFunc: hashFunc, } + p.writerPool.New = func() interface{} { + return hashFunc() + } + span := 1 for i := 0; i < 9; i++ { p.Spans = append(p.Spans, span) From 45db7d9a7ba0d32849177ca41f13963bd33f5bd8 Mon Sep 17 00:00:00 2001 From: nolash Date: Sat, 30 Nov 2019 13:27:24 +0100 Subject: [PATCH 20/67] file: Make Hasher writers asynchronous within jobs --- file/common_test.go | 2 +- file/hasher.go | 30 +++++----- file/hasher_test.go | 138 ++++++++++++++++++++++++++++++++++++++++++-- file/job.go | 61 ++++++++++---------- 4 files changed, 180 insertions(+), 51 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index b0afd64143..524778d6b6 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = 1 //len(dataLengths) + end = len(dataLengths) ) func init() { diff --git a/file/hasher.go b/file/hasher.go index 49d185dffe..044377bf85 100644 --- a/file/hasher.go +++ b/file/hasher.go @@ -10,7 +10,6 @@ import ( // it is intended to be chainable to accommodate for arbitrary chunk manipulation // like encryption, erasure coding etc type Hasher struct { - writer *bmt.Hasher target *target params *treeParams index *jobIndex @@ -24,19 +23,20 @@ type Hasher struct { count int } -// New creates a new Hasher object -func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc func() bmt.SectionWriter) *Hasher { +// New creates a new Hasher object using the given sectionSize and branch factor +// hasherFunc is used to create *bmt.Hashers to hash the incoming data +// writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface +func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFunc func() bmt.SectionWriter) *Hasher { h := &Hasher{ - //writer: dataWriter, target: newTarget(), index: newJobIndex(9), writeC: make(chan []byte, branches), } h.writerPool.New = func() interface{} { - return refWriterFunc() + return writerFunc() } h.hasherPool.New = func() interface{} { - return dataWriterFunc() + return hasherFunc() } h.params = newTreeParams(sectionSize, branches, h.getWriter) h.job = newJob(h.params, h.target, h.index, 1, 0) @@ -46,24 +46,22 @@ func New(sectionSize int, branches int, dataWriter *bmt.Hasher, refWriterFunc fu // Write implements hash.Hash // TODO: enforce buffered writes and limits +// TODO: attempt omit modulo calc on every pass func (h *Hasher) Write(b []byte) { - if h.count > 0 && h.count%branches == 0 { - jb := h.job + if h.count%branches == 0 && h.count > 0 { h.job = h.job.Next() - jb.destroy() } go func(i int, jb *job) { - hasher := h.getDataWriter(len(b)) + hasher := h.getHasher(len(b)) _, err := hasher.Write(b) if err != nil { panic(err) } - h.job.write(h.count%h.params.Branches, h.writer.Sum(nil)) - h.putDataWriter(hasher) + jb.write(i%h.params.Branches, hasher.Sum(nil)) + h.putHasher(hasher) }(h.count, h.job) h.size += len(b) h.count++ - } // Sum implements hash.Hash @@ -79,13 +77,13 @@ func (h *Hasher) Sum(_ []byte) []byte { } // proxy for sync.Pool -func (h *Hasher) putDataWriter(w *bmt.Hasher) { +func (h *Hasher) putHasher(w *bmt.Hasher) { h.hasherPool.Put(w) } // proxy for sync.Pool -func (h *Hasher) getDataWriter() bmt.SectionWriter { - span := lengthToSpan(len(b)) +func (h *Hasher) getHasher(l int) *bmt.Hasher { + span := lengthToSpan(l) hasher := h.hasherPool.Get().(*bmt.Hasher) hasher.ResetWithLength(span) return hasher diff --git a/file/hasher_test.go b/file/hasher_test.go index 06e9212c55..ff1e83e94b 100644 --- a/file/hasher_test.go +++ b/file/hasher_test.go @@ -6,26 +6,152 @@ import ( "strings" "testing" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) +// TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job +func TestHasherJobTopHash(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + for i := 0; i < chunkSize*branches; i += chunkSize { + h.Write(data[i : i+chunkSize]) + } + h.Sum(nil) + levelOneTopHash := hexutil.Encode(h.index.GetTopHash(1)) + correctLevelOneTopHash := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if levelOneTopHash != correctLevelOneTopHash { + t.Fatalf("tophash; expected %s, got %s", correctLevelOneTopHash, levelOneTopHash) + } + +} + +// TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher func TestHasherOneFullChunk(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - dataHash := bmt.New(poolSync) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() bmt.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHash, refHashFunc) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) for i := 0; i < chunkSize*branches; i += chunkSize { h.Write(data[i : i+chunkSize]) } ref := h.Sum(nil) - t.Logf("res: %x", ref) + correctRootHash := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" + rootHash := hexutil.Encode(ref) + if rootHash != correctRootHash { + t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) + } +} + +// TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds +func TestHasherJobChange(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + + jobs := make(map[string]int) + for i := 0; i < chunkSize*branches*branches; i += chunkSize { + h.Write(data[i : i+chunkSize]) + jobs[h.job.String()]++ + } + i := 0 + for _, v := range jobs { + if v != branches { + t.Fatalf("jobwritecount writes: expected %d, got %d", branches, v) + } + i++ + } + if i != branches { + t.Fatalf("jobwritecount jobs: expected %d, got %d", branches, i) + } +} + +// TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher +func TestHasherOneFullLevelOneChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + for i := 0; i < chunkSize*branches*branches; i += chunkSize { + h.Write(data[i : i+chunkSize]) + } + ref := h.Sum(nil) + correctRootHash := "0x522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b" + rootHash := hexutil.Encode(ref) + if rootHash != correctRootHash { + t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) + } +} + +func TestHasherVector(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() bmt.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + var mismatch int + for i, dataLength := range dataLengths { + log.Info("hashervector start", "i", i, "l", dataLength) + eq := true + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + _, data := testutil.SerialData(dataLength, 255, 0) + for j := 0; j < dataLength; j += chunkSize { + size := chunkSize + if dataLength-j < chunkSize { + size = dataLength - j + } + h.Write(data[j : j+size]) + } + ref := h.Sum(nil) + correctRefHex := "0x" + expected[i] + refHex := hexutil.Encode(ref) + if refHex != correctRefHex { + mismatch++ + eq = false + } + t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, ref, expected[i]) + } + if mismatch > 0 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) + } } // BenchmarkHasher generates benchmarks that are comparable to the pyramid hasher @@ -48,11 +174,13 @@ func benchmarkHasher(b *testing.B) { refHashFunc := func() bmt.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHash := bmt.New(poolSync) + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { - h := New(sectionSize, branches, dataHash, refHashFunc) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) for i := 0; i < dataLength; i += chunkSize { size := chunkSize if dataLength-i < chunkSize { diff --git a/file/job.go b/file/job.go index b54fc2ac68..0bf2fbe3bd 100644 --- a/file/job.go +++ b/file/job.go @@ -5,9 +5,7 @@ import ( "sync" "sync/atomic" - "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/log" ) // keeps an index of all the existing jobs for a file hashing operation @@ -41,7 +39,7 @@ func (ji *jobIndex) String() string { // Add adds a job to the index at the level // and data section index specified in the job func (ji *jobIndex) Add(jb *job) { - log.Trace("adding job", "job", jb) + //log.Trace("adding job", "job", jb) ji.jobs[jb.level].Store(jb.dataSection, jb) } @@ -70,7 +68,7 @@ func (ji *jobIndex) AddTopHash(ref []byte) { ji.mu.Lock() defer ji.mu.Unlock() ji.topHashes = append(ji.topHashes, ref) - log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) + //log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) } // GetJobHash gets the current top hash for a particular level set by AddTopHash @@ -111,7 +109,7 @@ func (t *target) Set(size int, sections int, level int) { t.size = int32(size) t.sections = int32(sections) t.level = int32(level) - log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) + //log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) close(t.doneC) } @@ -143,6 +141,7 @@ func (t *target) Done() <-chan []byte { type jobUnit struct { index int data []byte + count int } // encapsulates one single chunk to be hashed @@ -181,14 +180,14 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe } targetLevel := tgt.Level() if targetLevel == 0 { - log.Trace("target not set", "level", lvl) + //log.Trace("target not set", "level", lvl) jb.doneC = tgt.doneC } else { targetCount := tgt.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) } - log.Trace("target count", "level", lvl, "count", tgt.Count()) + //log.Trace("target count", "level", lvl, "count", tgt.Count()) jb.index.Add(jb) if !params.Debug { @@ -199,7 +198,7 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe // implements Stringer interface func (jb *job) String() string { - return fmt.Sprintf("job: l:%d,s:%d,c:%d", jb.level, jb.dataSection, jb.count()) + return fmt.Sprintf("job: l:%d,s:%d", jb.level, jb.dataSection) } // atomically increments the write counter of the job @@ -218,12 +217,14 @@ func (jb *job) count() int { // TODO: returning expected size in one case and actual size in another can lead to confusion // TODO: two atomic ops, may change value inbetween func (jb *job) size() int { - count := jb.count() - endCount := int(atomic.LoadInt32(&jb.endCount)) - if endCount == 0 { + jb.mu.Lock() + count := int(jb.cursorSection) //jb.count() + endCount := int(jb.endCount) //int(atomic.LoadInt32(&jb.endCount)) + jb.mu.Unlock() + if endCount%jb.params.Branches == 0 { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } - log.Trace("size", "sections", jb.target.sections, "endcount", endCount, "level", jb.level) + //log.Trace("size", "sections", jb.target.sections, "size", jb.target.Size(), "endcount", endCount, "level", jb.level) return int(jb.target.Size()) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } @@ -231,14 +232,16 @@ func (jb *job) size() int { // does no checking for data length or index validity func (jb *job) write(index int, data []byte) { + jb.inc() + // if a write is received at the first datasection of a level we need to store this hash // in case of a balanced tree and we need to send it to resultC later // at the time of hasing of a balanced tree we have no way of knowing for sure whether // that is the end of the job or not - if jb.dataSection == 0 { + if jb.dataSection == 0 && index == 0 { topHashLevel := jb.index.GetTopHashLevel() if topHashLevel < jb.level { - log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + //log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) jb.index.AddTopHash(data) } } @@ -254,6 +257,7 @@ func (jb *job) write(index int, data []byte) { // - data write is finalized and targetcount is reached on a subsequent job write func (jb *job) process() { + var processCount int defer jb.destroy() // is set when data write is finished, AND @@ -266,25 +270,25 @@ OUTER: // enter here if new data is written to the job case entry := <-jb.writeC: jb.mu.Lock() - newCount := jb.inc() endCount := int(jb.endCount) + processCount++ jb.mu.Unlock() if entry.index == 0 { jb.firstSectionData = entry.data } - log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "count", newCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + //log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) // since newcount is incremented above it can only equal endcount if this has been set in the case below, // which means data write has been completed // otherwise if we reached the chunk limit we also continue to hashing - if newCount == endCount { - log.Trace("quitting writec - endcount", "c", endCount, "level", jb.level) + if processCount == endCount { + //log.Trace("quitting writec - endcount", "c", processCount, "level", jb.level) break OUTER } - if newCount == jb.params.Branches { - log.Trace("quitting writec - branches") + if processCount == jb.params.Branches { + //log.Trace("quitting writec - branches") break OUTER } @@ -293,21 +297,21 @@ OUTER: case <-jb.doneC: jb.mu.Lock() jb.doneC = nil - log.Trace("doneloop", "level", jb.level, "count", jb.count(), "endcount", jb.endCount) - count := jb.count() + //log.Trace("doneloop", "level", jb.level, "processCount", processCount, "endcount", jb.endCount) + //count := jb.count() // if the target count falls within the span of this job // set the endcount so we know we have to do extra calculations for // determining span in case of unbalanced tree targetCount := jb.target.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) - log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) + //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed - if count > 0 && count == int(jb.endCount) { - log.Trace("quitting donec", "level", jb.level, "count", jb.count()) + if processCount > 0 && processCount == int(jb.endCount) { + //log.Trace("quitting donec", "level", jb.level, "processcount", processCount) jb.mu.Unlock() break OUTER } @@ -325,7 +329,7 @@ OUTER: size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) + //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level @@ -338,7 +342,7 @@ OUTER: // retrieve the parent and the corresponding section in it to write to parent := jb.parent() - log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) + //log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) nextLevel := jb.level + 1 parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) @@ -347,7 +351,7 @@ OUTER: if jb.endCount == 1 { ref = jb.firstSectionData for parent.level < belowRootLevel { - log.Trace("parent write skip", "level", parent.level) + //log.Trace("parent write skip", "level", parent.level) oldParent := parent parent = parent.parent() oldParent.destroy() @@ -379,7 +383,6 @@ func (jb *job) targetWithinJob(targetSection int) (int, bool) { ok = true } - log.Trace("within", "level", jb.level, "datasection", jb.dataSection, "boundary", dataBoundary, "upper", upperLimit, "target", targetSection, "endindex", endIndex, "ok", ok) return int(endIndex), ok } From 21d25b7188eeee889950f61039eb5b50ffe37006 Mon Sep 17 00:00:00 2001 From: nolash Date: Sun, 1 Dec 2019 11:38:27 +0100 Subject: [PATCH 21/67] file: Add explicit proof of ref hash on chunkSize*branches+chunkSize Same case fails for pyramidhasher = bug --- file/common_test.go | 2 +- file/hasher_r_test.go | 79 ++++++++++++++++++++++++++++++++----------- file/job_test.go | 8 ++--- file/pyramid_test.go | 32 ++++++++++++++++++ 4 files changed, 96 insertions(+), 25 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 524778d6b6..365a782f30 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -54,7 +54,7 @@ var ( "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 "59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19 - "1d1bae3a0f2d3ef6b58df4fd6c55c2d3752339b6b474eaab52c579fafe336bfa", // 20 + "ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 } start = 0 diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index 32873d7d05..ed01708238 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -1,7 +1,6 @@ package file import ( - "bytes" "fmt" "strconv" "strings" @@ -14,29 +13,69 @@ import ( "golang.org/x/crypto/sha3" ) -// TestReferenceFileHasherDanglingChunk explicitly tests the edge case where a single chunk hash after a balanced tree -// should skip to the level with a single reference -func TestReferenceFileHasherDanglingChunk(t *testing.T) { - t.Skip("too big") +// TestManualDanglingChunk is a test script explicitly hashing and writing every individual level in the dangling chunk edge case +// we use a balanced tree with data size of chunkSize*branches, and a single chunk of data +// this case is chosen because it produces the wrong result in the pyramid hasher at the time of writing (master commit hash 4928d989ebd0854d993c10c194e61a5a5455e4f9) +func TestManualDanglingChunk(t *testing.T) { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) h := bmt.New(pool) - r, data := testutil.SerialData(chunkSize*branches*branches+sectionSize, 255, 0) - fh := NewReferenceFileHasher(h, branches) - leftHash := fh.Hash(r, chunkSize*branches*branches) - h = bmt.New(pool) - fh = NewReferenceFileHasher(h, branches) - rightHash := fh.Hash(bytes.NewBuffer(data[chunkSize*branches*branches:]), sectionSize) - log.Info("left", "h", hexutil.Encode(leftHash)) - log.Info("right", "h", hexutil.Encode(rightHash)) + // to execute the job we need buffers with the following capacities: + // level 0: chunkSize*branches+chunkSize + // level 1: chunkSize + // level 2: sectionSize * 2 + var levels [][]byte + levels = append(levels, nil) + levels = append(levels, make([]byte, chunkSize)) + levels = append(levels, make([]byte, sectionSize*2)) - h = bmt.New(pool) - span := lengthToSpan(chunkSize * branches * branches * sectionSize) + // hash the balanced tree portion of the data level and write to level 1 + _, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0) + span := lengthToSpan(chunkSize) + for i := 0; i < chunkSize*branches; i += chunkSize { + h.ResetWithLength(span) + h.Write(levels[0][i : i+chunkSize]) + copy(levels[1][i/branches:], h.Sum(nil)) + } + refHex := hexutil.Encode(levels[1][:sectionSize]) + correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) + } + + // write a single section of the dangling chunk + // hash it and write the reference on the second section of level 3 + span = lengthToSpan(chunkSize) + h.ResetWithLength(span) + h.Write(levels[0][chunkSize*branches:]) + copy(levels[2][sectionSize:], h.Sum(nil)) + refHex = hexutil.Encode(levels[2][sectionSize:]) + correctRefHex = "0x81b31d9a7f6c377523e8769db021091df23edd9fd7bd6bcdf11a22f518db6006" + if refHex != correctRefHex { + t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) + } + + // hash the chunk on level 2 and write into the first section of level 3 + span = lengthToSpan(chunkSize * branches) h.ResetWithLength(span) - h.Write(leftHash) - h.Write(rightHash) - topHash := h.Sum(nil) - log.Info("top", "h", hexutil.Encode(topHash)) + h.Write(levels[1]) + copy(levels[2], h.Sum(nil)) + refHex = hexutil.Encode(levels[2][:sectionSize]) + correctRefHex = "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" + if refHex != correctRefHex { + t.Fatalf("manual dangling balanced tree; expected %s, got %s", correctRefHex, refHex) + } + + // hash the two sections on level 3 to obtain the root hash + span = lengthToSpan(chunkSize*branches + chunkSize) + h.ResetWithLength(span) + h.Write(levels[2]) + ref := h.Sum(nil) + refHex = hexutil.Encode(ref) + correctRefHex = "0xb8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199" + if refHex != correctRefHex { + t.Fatalf("manual dangling root; expected %s, got %s", correctRefHex, refHex) + } } // TestReferenceFileHasher executes the file hasher algorithms on serial input data of periods of 0-254 @@ -45,7 +84,7 @@ func TestReferenceFileHasherDanglingChunk(t *testing.T) { // the "expected" array in common_test.go is generated by this implementation, and test failure due to // result mismatch is nothing else than an indication that something has changed in the reference filehasher // or the underlying hashing algorithm -func TestReferenceFileHasher(t *testing.T) { +func TestReferenceFileHasherVector(t *testing.T) { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) h := bmt.New(pool) var mismatch int diff --git a/file/job_test.go b/file/job_test.go index 1f711edd72..2909c49910 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -550,7 +550,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { // and verifies against source of truth results generated from the reference hasher // for the same data // TODO: vet dynamically against the referencefilehasher instead of expect vector -func TestVectors(t *testing.T) { +func TestJobVector(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() bmt.SectionWriter { @@ -619,13 +619,13 @@ func TestVectors(t *testing.T) { } // BenchmarkVector generates benchmarks that are comparable to the pyramid hasher -func BenchmarkVector(b *testing.B) { +func BenchmarkJob(b *testing.B) { for i := start; i < end; i++ { - b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkVector) + b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkJob) } } -func benchmarkVector(b *testing.B) { +func benchmarkJob(b *testing.B) { params := strings.Split(b.Name(), "/") dataLengthParam, err := strconv.ParseInt(params[2], 10, 64) if err != nil { diff --git a/file/pyramid_test.go b/file/pyramid_test.go index 17a5f7185e..3b61e818c5 100644 --- a/file/pyramid_test.go +++ b/file/pyramid_test.go @@ -10,10 +10,42 @@ import ( "testing" "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/storage" "github.com/ethersphere/swarm/testutil" ) +func TestPyramidHasherVector(t *testing.T) { + t.Skip("only provided for easy reference to bug in case chunkSize*129") + var mismatch int + for i := start; i < end; i++ { + eq := true + dataLength := dataLengths[i] + log.Info("pyramidvector start", "i", i, "l", dataLength) + buf, _ := testutil.SerialData(dataLength, 255, 0) + putGetter := storage.NewHasherStore(&storage.FakeChunkStore{}, storage.MakeHashFunc(storage.BMTHash), false, chunk.NewTag(0, "foo", 0, false)) + + ctx := context.Background() + ref, wait, err := storage.PyramidSplit(ctx, buf, putGetter, putGetter, chunk.NewTag(0, "foo", int64(dataLength/4096+1), false)) + if err != nil { + t.Fatalf(err.Error()) + } + err = wait(ctx) + if err != nil { + t.Fatalf(err.Error()) + } + if ref.Hex() != expected[i] { + mismatch++ + eq = false + } + t.Logf("[%7d+%4d]\t%v\tref: %s\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, ref, expected[i]) + } + + if mismatch != 1 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) + } +} + func BenchmarkPyramidHasher(b *testing.B) { for i := start; i < end; i++ { From 04705b6e2ab7844dfc43d75b95bc898d5f6d2783 Mon Sep 17 00:00:00 2001 From: nolash Date: Sun, 1 Dec 2019 12:18:36 +0100 Subject: [PATCH 22/67] file: Complete comments --- file/hasher_r_test.go | 10 +++++----- file/pyramid_test.go | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/file/hasher_r_test.go b/file/hasher_r_test.go index ed01708238..778b196672 100644 --- a/file/hasher_r_test.go +++ b/file/hasher_r_test.go @@ -43,8 +43,8 @@ func TestManualDanglingChunk(t *testing.T) { t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) } - // write a single section of the dangling chunk - // hash it and write the reference on the second section of level 3 + // write the dangling chunk + // hash it and write the reference on the second section of level 2 span = lengthToSpan(chunkSize) h.ResetWithLength(span) h.Write(levels[0][chunkSize*branches:]) @@ -55,7 +55,7 @@ func TestManualDanglingChunk(t *testing.T) { t.Fatalf("manual dangling single chunk; expected %s, got %s", correctRefHex, refHex) } - // hash the chunk on level 2 and write into the first section of level 3 + // hash the chunk on level 1 and write into the first section of level 2 span = lengthToSpan(chunkSize * branches) h.ResetWithLength(span) h.Write(levels[1]) @@ -66,7 +66,7 @@ func TestManualDanglingChunk(t *testing.T) { t.Fatalf("manual dangling balanced tree; expected %s, got %s", correctRefHex, refHex) } - // hash the two sections on level 3 to obtain the root hash + // hash the two sections on level 2 to obtain the root hash span = lengthToSpan(chunkSize*branches + chunkSize) h.ResetWithLength(span) h.Write(levels[2]) @@ -78,7 +78,7 @@ func TestManualDanglingChunk(t *testing.T) { } } -// TestReferenceFileHasher executes the file hasher algorithms on serial input data of periods of 0-254 +// TestReferenceFileHasherVector executes the file hasher algorithms on serial input data of periods of 0-254 // of lengths defined in common_test.go // // the "expected" array in common_test.go is generated by this implementation, and test failure due to diff --git a/file/pyramid_test.go b/file/pyramid_test.go index 3b61e818c5..307ea30d97 100644 --- a/file/pyramid_test.go +++ b/file/pyramid_test.go @@ -15,6 +15,8 @@ import ( "github.com/ethersphere/swarm/testutil" ) +// TestPyramidHasherVector executes the file hasher algorithms on serial input data of periods of 0-254 +// of lengths defined in common_test.go func TestPyramidHasherVector(t *testing.T) { t.Skip("only provided for easy reference to bug in case chunkSize*129") var mismatch int @@ -46,6 +48,7 @@ func TestPyramidHasherVector(t *testing.T) { } } +// BenchmarkPyramidHasher establishes the benchmark BenchmarkHasher should be compared to func BenchmarkPyramidHasher(b *testing.B) { for i := start; i < end; i++ { From b4ba44a332b83f5a17781393974f0eb9c25ce938 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 10:52:06 +0100 Subject: [PATCH 23/67] file: Delint --- file/common_test.go | 4 ++-- file/hasher.go | 6 +----- file/job.go | 11 +++++------ 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index 365a782f30..b4580ccc41 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -57,8 +57,8 @@ var ( "ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 } - start = 0 - end = len(dataLengths) + start = 14 + end = 15 //len(dataLengths) ) func init() { diff --git a/file/hasher.go b/file/hasher.go index 044377bf85..e7c2aa4463 100644 --- a/file/hasher.go +++ b/file/hasher.go @@ -69,11 +69,7 @@ func (h *Hasher) Sum(_ []byte) []byte { sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) h.target.Set(h.size, sectionCount, targetLevel) - var ref []byte - select { - case ref = <-h.target.Done(): - } - return ref + return <-h.target.Done() } // proxy for sync.Pool diff --git a/file/job.go b/file/job.go index 0bf2fbe3bd..889f82f7ef 100644 --- a/file/job.go +++ b/file/job.go @@ -215,7 +215,6 @@ func (jb *job) count() int { // if job is last index in a level and writes have been finalized, it will return the target size // otherwise, regardless of job index, it will return the size according to the current write count // TODO: returning expected size in one case and actual size in another can lead to confusion -// TODO: two atomic ops, may change value inbetween func (jb *job) size() int { jb.mu.Lock() count := int(jb.cursorSection) //jb.count() @@ -225,7 +224,7 @@ func (jb *job) size() int { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } //log.Trace("size", "sections", jb.target.sections, "size", jb.target.Size(), "endcount", endCount, "level", jb.level) - return int(jb.target.Size()) % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) + return jb.target.Size() % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } // add data to job @@ -320,7 +319,7 @@ OUTER: } targetLevel := jb.target.Level() - if int(targetLevel) == jb.level { + if targetLevel == jb.level { jb.target.resultC <- jb.index.GetTopHash(jb.level) return } @@ -334,7 +333,7 @@ OUTER: // endCount > 0 means this is the last chunk on the level // the hash from the level below the target level will be the result - belowRootLevel := int(targetLevel) - 1 + belowRootLevel := targetLevel - 1 if jb.endCount > 0 && jb.level == belowRootLevel { jb.target.resultC <- ref return @@ -383,7 +382,7 @@ func (jb *job) targetWithinJob(targetSection int) (int, bool) { ok = true } - return int(endIndex), ok + return endIndex, ok } // if last data index falls within the span, return the appropriate end count for the level @@ -412,7 +411,7 @@ func (jb *job) parent() *job { } // Next creates the job for the next data section span on the same level as the receiver job -// this is only meant to be called once for each job, consequtive calls will overwrite index with new empty job +// this is only meant to be called once for each job, consecutive calls will overwrite index with new empty job func (jb *job) Next() *job { return newJob(jb.params, jb.target, jb.index, jb.level, jb.dataSection+jb.params.Spans[jb.level+1]) } From e4f8fc6319c8bbb3c41e574f867bd17a185d3216 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 15:01:35 +0100 Subject: [PATCH 24/67] file: Rename files and extract index, target to separate files --- file/index.go | 82 ++++++++++++ file/job.go | 131 +------------------ file/{tree.go => param.go} | 0 file/{hasher_r.go => reference.go} | 0 file/{hasher_r_test.go => reference_test.go} | 0 file/{hasher.go => split.go} | 3 - file/{hasher_test.go => split_test.go} | 0 file/target.go | 57 ++++++++ file/type.go | 8 -- 9 files changed, 140 insertions(+), 141 deletions(-) create mode 100644 file/index.go rename file/{tree.go => param.go} (100%) rename file/{hasher_r.go => reference.go} (100%) rename file/{hasher_r_test.go => reference_test.go} (100%) rename file/{hasher.go => split.go} (96%) rename file/{hasher_test.go => split_test.go} (100%) create mode 100644 file/target.go delete mode 100644 file/type.go diff --git a/file/index.go b/file/index.go new file mode 100644 index 0000000000..6a4fbdae99 --- /dev/null +++ b/file/index.go @@ -0,0 +1,82 @@ +package file + +import ( + "fmt" + "sync" +) + +// keeps an index of all the existing jobs for a file hashing operation +// sorted by level +// +// it also keeps all the "top hashes", ie hashes on first data section index of every level +// these are needed in case of balanced tree results, since the hashing result would be +// lost otherwise, due to the job not having any intermediate storage of any data +type jobIndex struct { + maxLevels int + jobs []sync.Map + topHashes [][]byte + mu sync.Mutex +} + +func newJobIndex(maxLevels int) *jobIndex { + ji := &jobIndex{ + maxLevels: maxLevels, + } + for i := 0; i < maxLevels; i++ { + ji.jobs = append(ji.jobs, sync.Map{}) + } + return ji +} + +// implements Stringer interface +func (ji *jobIndex) String() string { + return fmt.Sprintf("%p", ji) +} + +// Add adds a job to the index at the level +// and data section index specified in the job +func (ji *jobIndex) Add(jb *job) { + //log.Trace("adding job", "job", jb) + ji.jobs[jb.level].Store(jb.dataSection, jb) +} + +// Get retrieves a job from the job index +// based on the level of the job and its data section index +// if a job for the level and section index does not exist this method returns nil +func (ji *jobIndex) Get(lvl int, section int) *job { + jb, ok := ji.jobs[lvl].Load(section) + if !ok { + return nil + } + return jb.(*job) +} + +// Delete removes a job from the job index +// leaving it to be garbage collected when +// the reference in the main code is relinquished +func (ji *jobIndex) Delete(jb *job) { + ji.jobs[jb.level].Delete(jb.dataSection) +} + +// AddTopHash should be called by a job when a hash is written to the first index of a level +// since the job doesn't store any data written to it (just passing it through to the underlying writer) +// this is needed for the edge case of balanced trees +func (ji *jobIndex) AddTopHash(ref []byte) { + ji.mu.Lock() + defer ji.mu.Unlock() + ji.topHashes = append(ji.topHashes, ref) + //log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) +} + +// GetJobHash gets the current top hash for a particular level set by AddTopHash +func (ji *jobIndex) GetTopHash(lvl int) []byte { + ji.mu.Lock() + defer ji.mu.Unlock() + return ji.topHashes[lvl-1] +} + +func (ji *jobIndex) GetTopHashLevel() int { + ji.mu.Lock() + defer ji.mu.Unlock() + return len(ji.topHashes) +} diff --git a/file/job.go b/file/job.go index 889f82f7ef..6b439fa026 100644 --- a/file/job.go +++ b/file/job.go @@ -8,136 +8,6 @@ import ( "github.com/ethersphere/swarm/bmt" ) -// keeps an index of all the existing jobs for a file hashing operation -// sorted by level -// -// it also keeps all the "top hashes", ie hashes on first data section index of every level -// these are needed in case of balanced tree results, since the hashing result would be -// lost otherwise, due to the job not having any intermediate storage of any data -type jobIndex struct { - maxLevels int - jobs []sync.Map - topHashes [][]byte - mu sync.Mutex -} - -func newJobIndex(maxLevels int) *jobIndex { - ji := &jobIndex{ - maxLevels: maxLevels, - } - for i := 0; i < maxLevels; i++ { - ji.jobs = append(ji.jobs, sync.Map{}) - } - return ji -} - -// implements Stringer interface -func (ji *jobIndex) String() string { - return fmt.Sprintf("%p", ji) -} - -// Add adds a job to the index at the level -// and data section index specified in the job -func (ji *jobIndex) Add(jb *job) { - //log.Trace("adding job", "job", jb) - ji.jobs[jb.level].Store(jb.dataSection, jb) -} - -// Get retrieves a job from the job index -// based on the level of the job and its data section index -// if a job for the level and section index does not exist this method returns nil -func (ji *jobIndex) Get(lvl int, section int) *job { - jb, ok := ji.jobs[lvl].Load(section) - if !ok { - return nil - } - return jb.(*job) -} - -// Delete removes a job from the job index -// leaving it to be garbage collected when -// the reference in the main code is relinquished -func (ji *jobIndex) Delete(jb *job) { - ji.jobs[jb.level].Delete(jb.dataSection) -} - -// AddTopHash should be called by a job when a hash is written to the first index of a level -// since the job doesn't store any data written to it (just passing it through to the underlying writer) -// this is needed for the edge case of balanced trees -func (ji *jobIndex) AddTopHash(ref []byte) { - ji.mu.Lock() - defer ji.mu.Unlock() - ji.topHashes = append(ji.topHashes, ref) - //log.Trace("added top hash", "length", len(ji.topHashes), "index", ji) -} - -// GetJobHash gets the current top hash for a particular level set by AddTopHash -func (ji *jobIndex) GetTopHash(lvl int) []byte { - ji.mu.Lock() - defer ji.mu.Unlock() - return ji.topHashes[lvl-1] -} - -func (ji *jobIndex) GetTopHashLevel() int { - ji.mu.Lock() - defer ji.mu.Unlock() - return len(ji.topHashes) -} - -// passed to a job to determine at which data lengths and levels a job should terminate -type target struct { - size int32 // bytes written - sections int32 // sections written - level int32 // target level calculated from bytes written against branching factor and sector size - resultC chan []byte // channel to receive root hash - doneC chan struct{} // when this channel is closed all jobs will calculate their end write count - mu sync.Mutex -} - -func newTarget() *target { - return &target{ - resultC: make(chan []byte), - doneC: make(chan struct{}), - } -} - -// Set is called when the final length of the data to be written is known -// TODO: method can be simplified to calculate sections and level internally -func (t *target) Set(size int, sections int, level int) { - t.mu.Lock() - defer t.mu.Unlock() - t.size = int32(size) - t.sections = int32(sections) - t.level = int32(level) - //log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) - close(t.doneC) -} - -// Count returns the total section count for the target -// it should only be called after Set() -func (t *target) Count() int { - t.mu.Lock() - defer t.mu.Unlock() - return int(t.sections) + 1 -} - -func (t *target) Level() int { - t.mu.Lock() - defer t.mu.Unlock() - return int(t.level) -} - -func (t *target) Size() int { - t.mu.Lock() - defer t.mu.Unlock() - return int(t.size) -} - -// Done returns the channel in which the root hash will be sent -func (t *target) Done() <-chan []byte { - return t.resultC -} - type jobUnit struct { index int data []byte @@ -229,6 +99,7 @@ func (jb *job) size() int { // add data to job // does no checking for data length or index validity +// TODO: rename index param not to confuse with index object func (jb *job) write(index int, data []byte) { jb.inc() diff --git a/file/tree.go b/file/param.go similarity index 100% rename from file/tree.go rename to file/param.go diff --git a/file/hasher_r.go b/file/reference.go similarity index 100% rename from file/hasher_r.go rename to file/reference.go diff --git a/file/hasher_r_test.go b/file/reference_test.go similarity index 100% rename from file/hasher_r_test.go rename to file/reference_test.go diff --git a/file/hasher.go b/file/split.go similarity index 96% rename from file/hasher.go rename to file/split.go index e7c2aa4463..a016b7ab3b 100644 --- a/file/hasher.go +++ b/file/split.go @@ -6,7 +6,6 @@ import ( "github.com/ethersphere/swarm/bmt" ) -// Hasher implements file.SectionWriter // it is intended to be chainable to accommodate for arbitrary chunk manipulation // like encryption, erasure coding etc type Hasher struct { @@ -44,7 +43,6 @@ func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFun return h } -// Write implements hash.Hash // TODO: enforce buffered writes and limits // TODO: attempt omit modulo calc on every pass func (h *Hasher) Write(b []byte) { @@ -64,7 +62,6 @@ func (h *Hasher) Write(b []byte) { h.count++ } -// Sum implements hash.Hash func (h *Hasher) Sum(_ []byte) []byte { sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) diff --git a/file/hasher_test.go b/file/split_test.go similarity index 100% rename from file/hasher_test.go rename to file/split_test.go diff --git a/file/target.go b/file/target.go new file mode 100644 index 0000000000..dfb29f4c0a --- /dev/null +++ b/file/target.go @@ -0,0 +1,57 @@ +package file + +import "sync" + +// passed to a job to determine at which data lengths and levels a job should terminate +type target struct { + size int32 // bytes written + sections int32 // sections written + level int32 // target level calculated from bytes written against branching factor and sector size + resultC chan []byte // channel to receive root hash + doneC chan struct{} // when this channel is closed all jobs will calculate their end write count + mu sync.Mutex +} + +func newTarget() *target { + return &target{ + resultC: make(chan []byte), + doneC: make(chan struct{}), + } +} + +// Set is called when the final length of the data to be written is known +// TODO: method can be simplified to calculate sections and level internally +func (t *target) Set(size int, sections int, level int) { + t.mu.Lock() + defer t.mu.Unlock() + t.size = int32(size) + t.sections = int32(sections) + t.level = int32(level) + //log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) + close(t.doneC) +} + +// Count returns the total section count for the target +// it should only be called after Set() +func (t *target) Count() int { + t.mu.Lock() + defer t.mu.Unlock() + return int(t.sections) + 1 +} + +func (t *target) Level() int { + t.mu.Lock() + defer t.mu.Unlock() + return int(t.level) +} + +func (t *target) Size() int { + t.mu.Lock() + defer t.mu.Unlock() + return int(t.size) +} + +// Done returns the channel in which the root hash will be sent +func (t *target) Done() <-chan []byte { + return t.resultC +} diff --git a/file/type.go b/file/type.go deleted file mode 100644 index eba4a3a26b..0000000000 --- a/file/type.go +++ /dev/null @@ -1,8 +0,0 @@ -package file - -import "hash" - -// SectionWriter is a chainable interface for file-based operations in swarm -type SectionWriter interface { - hash.Hash -} From d300b86616b3c9f9d4a6c517aca4caabc467a24e Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 15:57:33 +0100 Subject: [PATCH 25/67] file: Extract job sum into separate method --- file/job.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/file/job.go b/file/job.go index 6b439fa026..d4d9a3955e 100644 --- a/file/job.go +++ b/file/job.go @@ -189,6 +189,11 @@ OUTER: } } + jb.sum() +} + +func (jb *job) sum() { + targetLevel := jb.target.Level() if targetLevel == jb.level { jb.target.resultC <- jb.index.GetTopHash(jb.level) From 92c2c7ecf940b67943f75f85c5cdfee25ae68afd Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 21:37:40 +0100 Subject: [PATCH 26/67] file: Extract split to separate file, implement bmt.SectionWriter --- file/common_test.go | 4 +- file/param.go | 2 + file/split.go | 104 +++++++------------------- file/split_test.go | 177 +++----------------------------------------- 4 files changed, 38 insertions(+), 249 deletions(-) diff --git a/file/common_test.go b/file/common_test.go index b4580ccc41..365a782f30 100644 --- a/file/common_test.go +++ b/file/common_test.go @@ -57,8 +57,8 @@ var ( "ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 } - start = 14 - end = 15 //len(dataLengths) + start = 0 + end = len(dataLengths) ) func init() { diff --git a/file/param.go b/file/param.go index 49e97d360b..d6c923d5ef 100644 --- a/file/param.go +++ b/file/param.go @@ -11,6 +11,7 @@ import ( type treeParams struct { SectionSize int Branches int + ChunkSize int Spans []int Debug bool hashFunc func() bmt.SectionWriter @@ -22,6 +23,7 @@ func newTreeParams(section int, branches int, hashFunc func() bmt.SectionWriter) p := &treeParams{ SectionSize: section, Branches: branches, + ChunkSize: section * branches, hashFunc: hashFunc, } p.writerPool.New = func() interface{} { diff --git a/file/split.go b/file/split.go index a016b7ab3b..8a98afdd61 100644 --- a/file/split.go +++ b/file/split.go @@ -1,94 +1,40 @@ package file import ( - "sync" + "io" "github.com/ethersphere/swarm/bmt" ) -// it is intended to be chainable to accommodate for arbitrary chunk manipulation -// like encryption, erasure coding etc -type Hasher struct { - target *target - params *treeParams - index *jobIndex - - writeC chan []byte - doneC chan struct{} - job *job // current level 1 job being written to - writerPool sync.Pool - hasherPool sync.Pool - size int - count int +type Splitter struct { + r io.Reader + w bmt.SectionWriter } -// New creates a new Hasher object using the given sectionSize and branch factor -// hasherFunc is used to create *bmt.Hashers to hash the incoming data -// writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface -func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFunc func() bmt.SectionWriter) *Hasher { - h := &Hasher{ - target: newTarget(), - index: newJobIndex(9), - writeC: make(chan []byte, branches), - } - h.writerPool.New = func() interface{} { - return writerFunc() +func NewSplitter(r io.Reader, w bmt.SectionWriter) *Splitter { + s := &Splitter{ + r: r, + w: w, } - h.hasherPool.New = func() interface{} { - return hasherFunc() - } - h.params = newTreeParams(sectionSize, branches, h.getWriter) - h.job = newJob(h.params, h.target, h.index, 1, 0) - - return h + return s } -// TODO: enforce buffered writes and limits -// TODO: attempt omit modulo calc on every pass -func (h *Hasher) Write(b []byte) { - if h.count%branches == 0 && h.count > 0 { - h.job = h.job.Next() - } - go func(i int, jb *job) { - hasher := h.getHasher(len(b)) - _, err := hasher.Write(b) +// TODO: enforce buffer capacity and auto-grow +func (s *Splitter) Split() ([]byte, error) { + wc := 0 + l := 0 + for { + d := make([]byte, s.w.SectionSize()) + c, err := s.r.Read(d) if err != nil { - panic(err) + if err == io.EOF { + break + } + return nil, err } - jb.write(i%h.params.Branches, hasher.Sum(nil)) - h.putHasher(hasher) - }(h.count, h.job) - h.size += len(b) - h.count++ -} - -func (h *Hasher) Sum(_ []byte) []byte { - sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) - targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) - h.target.Set(h.size, sectionCount, targetLevel) - return <-h.target.Done() -} - -// proxy for sync.Pool -func (h *Hasher) putHasher(w *bmt.Hasher) { - h.hasherPool.Put(w) -} - -// proxy for sync.Pool -func (h *Hasher) getHasher(l int) *bmt.Hasher { - span := lengthToSpan(l) - hasher := h.hasherPool.Get().(*bmt.Hasher) - hasher.ResetWithLength(span) - return hasher -} - -// proxy for sync.Pool -func (h *Hasher) putWriter(w bmt.SectionWriter) { - w.Reset() - h.writerPool.Put(w) -} - -// proxy for sync.Pool -func (h *Hasher) getWriter() bmt.SectionWriter { - return h.writerPool.Get().(bmt.SectionWriter) + s.w.Write(wc, d) + wc++ + l += c + } + return s.w.Sum(nil, l, nil), nil } diff --git a/file/split_test.go b/file/split_test.go index ff1e83e94b..c7c264a94d 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -1,20 +1,15 @@ package file import ( - "fmt" - "strconv" - "strings" "testing" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) -// TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job -func TestHasherJobTopHash(t *testing.T) { +func TestSplit(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() bmt.SectionWriter { @@ -23,171 +18,17 @@ func TestHasherJobTopHash(t *testing.T) { dataHashFunc := func() *bmt.Hasher { return bmt.New(poolSync) } - - _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - for i := 0; i < chunkSize*branches; i += chunkSize { - h.Write(data[i : i+chunkSize]) - } - h.Sum(nil) - levelOneTopHash := hexutil.Encode(h.index.GetTopHash(1)) - correctLevelOneTopHash := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" - if levelOneTopHash != correctLevelOneTopHash { - t.Fatalf("tophash; expected %s, got %s", correctLevelOneTopHash, levelOneTopHash) - } - -} - -// TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher -func TestHasherOneFullChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) - } - - _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - for i := 0; i < chunkSize*branches; i += chunkSize { - h.Write(data[i : i+chunkSize]) - } - ref := h.Sum(nil) - correctRootHash := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" - rootHash := hexutil.Encode(ref) - if rootHash != correctRootHash { - t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) - } -} - -// TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds -func TestHasherJobChange(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) - } - - _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) h := New(sectionSize, branches, dataHashFunc, refHashFunc) - jobs := make(map[string]int) - for i := 0; i < chunkSize*branches*branches; i += chunkSize { - h.Write(data[i : i+chunkSize]) - jobs[h.job.String()]++ - } - i := 0 - for _, v := range jobs { - if v != branches { - t.Fatalf("jobwritecount writes: expected %d, got %d", branches, v) - } - i++ - } - if i != branches { - t.Fatalf("jobwritecount jobs: expected %d, got %d", branches, i) - } -} - -// TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher -func TestHasherOneFullLevelOneChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) - } - - _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - for i := 0; i < chunkSize*branches*branches; i += chunkSize { - h.Write(data[i : i+chunkSize]) - } - ref := h.Sum(nil) - correctRootHash := "0x522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b" - rootHash := hexutil.Encode(ref) - if rootHash != correctRootHash { - t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) - } -} - -func TestHasherVector(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) - } - - var mismatch int - for i, dataLength := range dataLengths { - log.Info("hashervector start", "i", i, "l", dataLength) - eq := true - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - _, data := testutil.SerialData(dataLength, 255, 0) - for j := 0; j < dataLength; j += chunkSize { - size := chunkSize - if dataLength-j < chunkSize { - size = dataLength - j - } - h.Write(data[j : j+size]) - } - ref := h.Sum(nil) - correctRefHex := "0x" + expected[i] - refHex := hexutil.Encode(ref) - if refHex != correctRefHex { - mismatch++ - eq = false - } - t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, ref, expected[i]) - } - if mismatch > 0 { - t.Fatalf("mismatches: %d/%d", mismatch, end-start) - } -} - -// BenchmarkHasher generates benchmarks that are comparable to the pyramid hasher -func BenchmarkHasher(b *testing.B) { - for i := start; i < end; i++ { - b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkHasher) - } -} - -func benchmarkHasher(b *testing.B) { - params := strings.Split(b.Name(), "/") - dataLengthParam, err := strconv.ParseInt(params[2], 10, 64) + r, _ := testutil.SerialData(chunkSize, 255, 0) + s := NewSplitter(r, h) + ref, err := s.Split() if err != nil { - b.Fatal(err) - } - dataLength := int(dataLengthParam) - - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) + t.Fatal(err) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) - } - _, data := testutil.SerialData(dataLength, 255, 0) - - for j := 0; j < b.N; j++ { - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - for i := 0; i < dataLength; i += chunkSize { - size := chunkSize - if dataLength-i < chunkSize { - size = dataLength - i - } - h.Write(data[i : i+size]) - } - h.Sum(nil) + refHex := hexutil.Encode(ref) + correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) } } From 5407b6240e199f059fd9a5ca73de42916532383c Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 21:55:42 +0100 Subject: [PATCH 27/67] file: Add comments --- file/job.go | 3 ++- file/split.go | 7 ++++++- file/split_test.go | 3 +++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/file/job.go b/file/job.go index d4d9a3955e..4af7f9118a 100644 --- a/file/job.go +++ b/file/job.go @@ -8,13 +8,14 @@ import ( "github.com/ethersphere/swarm/bmt" ) +// necessary metadata across asynchronous input type jobUnit struct { index int data []byte count int } -// encapsulates one single chunk to be hashed +// encapsulates one single intermediate chunk to be hashed type job struct { target *target params *treeParams diff --git a/file/split.go b/file/split.go index 8a98afdd61..498a8e979a 100644 --- a/file/split.go +++ b/file/split.go @@ -6,11 +6,14 @@ import ( "github.com/ethersphere/swarm/bmt" ) +// TODO: grow buffer on demand to reduce allocs +// Splitter returns the result of a data stream from a bmt.SectionWriter type Splitter struct { r io.Reader w bmt.SectionWriter } +// NewSplitter creates a new Splitter object func NewSplitter(r io.Reader, w bmt.SectionWriter) *Splitter { s := &Splitter{ r: r, @@ -19,7 +22,9 @@ func NewSplitter(r io.Reader, w bmt.SectionWriter) *Splitter { return s } -// TODO: enforce buffer capacity and auto-grow +// Split is a blocking call that consumes and passes data from its reader to its SectionWriter +// according to the SectionWriter's SectionSize +// On EOF from the reader it calls Sum on the bmt.SectionWriter and returns the result func (s *Splitter) Split() ([]byte, error) { wc := 0 l := 0 diff --git a/file/split_test.go b/file/split_test.go index c7c264a94d..766ef9ddbf 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -9,6 +9,9 @@ import ( "golang.org/x/crypto/sha3" ) +// TestSplit creates a Splitter with a reader with one chunk of serial data and +// a Hasher as the underlying bmt.SectionWriter +// It verifies the returned result func TestSplit(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) From e5d29cb73ecf988513bb6b0ec3a58da054c7c576 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 22:16:16 +0100 Subject: [PATCH 28/67] file, param: Add package for global interfaces and settings --- bmt/bmt.go | 15 ++-- bmt/bmt_test.go | 7 +- file/hasher.go | 117 ++++++++++++++++++++++++++ file/hasher_test.go | 197 ++++++++++++++++++++++++++++++++++++++++++++ file/job.go | 6 +- file/job_test.go | 28 ++++--- file/param.go | 6 +- file/split.go | 6 +- file/split_test.go | 5 +- param/io.go | 10 +++ 10 files changed, 364 insertions(+), 33 deletions(-) create mode 100644 file/hasher.go create mode 100644 file/hasher_test.go create mode 100644 param/io.go diff --git a/bmt/bmt.go b/bmt/bmt.go index 18eab5a2bc..c9c118d028 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -411,14 +411,6 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { } } -// SectionWriter is an asynchronous segment/section writer interface -type SectionWriter interface { - Reset() // standard init to be called before reuse - Write(index int, data []byte) // write into section of index - Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer - SectionSize() int // size of the async section unit to use -} - // AsyncHasher extends BMT Hasher with an asynchronous segment/section writer interface // AsyncHasher is unsafe and does not check indexes and section data lengths // it must be used with the right indexes and length and the right number of sections @@ -444,10 +436,17 @@ type AsyncHasher struct { // methods needed to implement AsyncWriter // SectionSize returns the size of async section unit to use +// Implements param.SectionWriter func (sw *AsyncHasher) SectionSize() int { return sw.secsize } +// DigestSize returns the size of the result +// Implements param.SectionWriter +func (sw *AsyncHasher) DigestSize() int { + return sw.secsize +} + // Write writes the i-th section of the BMT base // this function can and is meant to be called concurrently // it sets max segment threadsafely diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index fc020eb7c2..071b273ca5 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -26,6 +26,7 @@ import ( "testing" "time" + "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) @@ -547,16 +548,16 @@ func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) { } // splits the input data performs a random shuffle to mock async section writes -func asyncHashRandom(bmt SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) { +func asyncHashRandom(bmt param.SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) { idxs, segments := splitAndShuffle(bmt.SectionSize(), data) return asyncHash(bmt, span, len(data), wh, idxs, segments) } -// mock for async section writes for BMT SectionWriter +// mock for async section writes for param.SectionWriter // requires a permutation (a random shuffle) of list of all indexes of segments // and writes them in order to the appropriate section // the Sum function is called according to the wh parameter (first, last, random [relative to segment writes]) -func asyncHash(bmt SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { +func asyncHash(bmt param.SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { bmt.Reset() if l == 0 { return bmt.Sum(nil, l, span) diff --git a/file/hasher.go b/file/hasher.go new file mode 100644 index 0000000000..ea6e1a6ab3 --- /dev/null +++ b/file/hasher.go @@ -0,0 +1,117 @@ +package file + +import ( + "sync" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" +) + +// Hasher is a bmt.SectionWriter that executes the file hashing algorithm on arbitary data +type Hasher struct { + target *target + params *treeParams + index *jobIndex + + job *job // current level 1 job being written to + writerPool sync.Pool + hasherPool sync.Pool + size int + count int +} + +// New creates a new Hasher object using the given sectionSize and branch factor +// hasherFunc is used to create *bmt.Hashers to hash the incoming data +// writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface +func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFunc func() param.SectionWriter) *Hasher { + h := &Hasher{ + target: newTarget(), + index: newJobIndex(9), + } + h.writerPool.New = func() interface{} { + return writerFunc() + } + h.hasherPool.New = func() interface{} { + return hasherFunc() + } + h.params = newTreeParams(sectionSize, branches, h.getWriter) + h.job = newJob(h.params, h.target, h.index, 1, 0) + + return h +} + +// Write implements bmt.SectionWriter +// It as a non-blocking call that hashes a data chunk and passes the resulting reference to the hash job representing +// the intermediate chunk holding the data references +// TODO: enforce buffered writes and limits +// TODO: attempt omit modulo calc on every pass +func (h *Hasher) Write(index int, b []byte) { + if h.count%branches == 0 && h.count > 0 { + h.job = h.job.Next() + } + go func(i int, jb *job) { + hasher := h.getHasher(len(b)) + _, err := hasher.Write(b) + if err != nil { + panic(err) + } + jb.write(i%h.params.Branches, hasher.Sum(nil)) + h.putHasher(hasher) + }(h.count, h.job) + h.size += len(b) + h.count++ +} + +// Sum implements bmt.SectionWriter +// It is a blocking call that calculates the target level and section index of the received data +// and alerts hasher jobs the end of write is reached +// It returns the root hash +func (h *Hasher) Sum(_ []byte, length int, _ []byte) []byte { + sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) + targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) + h.target.Set(h.size, sectionCount, targetLevel) + return <-h.target.Done() +} + +func (h *Hasher) Reset() { +} + +func (h *Hasher) SectionSize() int { + return h.params.ChunkSize +} + +func (h *Hasher) Branches() int { + return h.params.Branches +} + +func (h *Hasher) ChunkSize() int { + return h.params.ChunkSize +} + +func (h *Hasher) DigestSize() int { + return h.params.SectionSize +} + +// proxy for sync.Pool +func (h *Hasher) putHasher(w *bmt.Hasher) { + h.hasherPool.Put(w) +} + +// proxy for sync.Pool +func (h *Hasher) getHasher(l int) *bmt.Hasher { + span := lengthToSpan(l) + hasher := h.hasherPool.Get().(*bmt.Hasher) + hasher.ResetWithLength(span) + return hasher +} + +// proxy for sync.Pool +func (h *Hasher) putWriter(w param.SectionWriter) { + w.Reset() + h.writerPool.Put(w) +} + +// proxy for sync.Pool +func (h *Hasher) getWriter() param.SectionWriter { + return h.writerPool.Get().(param.SectionWriter) +} diff --git a/file/hasher_test.go b/file/hasher_test.go new file mode 100644 index 0000000000..311dce6cf2 --- /dev/null +++ b/file/hasher_test.go @@ -0,0 +1,197 @@ +package file + +import ( + "fmt" + "strconv" + "strings" + "testing" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +// TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job +func TestHasherJobTopHash(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + var i int + for i = 0; i < chunkSize*branches; i += chunkSize { + h.Write(i, data[i:i+chunkSize]) + } + h.Sum(nil, i, nil) + levelOneTopHash := hexutil.Encode(h.index.GetTopHash(1)) + correctLevelOneTopHash := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if levelOneTopHash != correctLevelOneTopHash { + t.Fatalf("tophash; expected %s, got %s", correctLevelOneTopHash, levelOneTopHash) + } + +} + +// TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher +func TestHasherOneFullChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + var i int + for i = 0; i < chunkSize*branches; i += chunkSize { + h.Write(i, data[i:i+chunkSize]) + } + ref := h.Sum(nil, i, nil) + correctRootHash := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" + rootHash := hexutil.Encode(ref) + if rootHash != correctRootHash { + t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) + } +} + +// TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds +func TestHasherJobChange(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + + jobs := make(map[string]int) + for i := 0; i < chunkSize*branches*branches; i += chunkSize { + h.Write(i, data[i:i+chunkSize]) + jobs[h.job.String()]++ + } + i := 0 + for _, v := range jobs { + if v != branches { + t.Fatalf("jobwritecount writes: expected %d, got %d", branches, v) + } + i++ + } + if i != branches { + t.Fatalf("jobwritecount jobs: expected %d, got %d", branches, i) + } +} + +// TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher +func TestHasherOneFullLevelOneChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + var i int + for i = 0; i < chunkSize*branches*branches; i += chunkSize { + h.Write(i, data[i:i+chunkSize]) + } + ref := h.Sum(nil, i, nil) + correctRootHash := "0x522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b" + rootHash := hexutil.Encode(ref) + if rootHash != correctRootHash { + t.Fatalf("roothash; expected %s, got %s", correctRootHash, rootHash) + } +} + +func TestHasherVector(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + var mismatch int + for i, dataLength := range dataLengths { + log.Info("hashervector start", "i", i, "l", dataLength) + eq := true + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + _, data := testutil.SerialData(dataLength, 255, 0) + for j := 0; j < dataLength; j += chunkSize { + size := chunkSize + if dataLength-j < chunkSize { + size = dataLength - j + } + h.Write(j, data[j:j+size]) + } + ref := h.Sum(nil, dataLength, nil) + correctRefHex := "0x" + expected[i] + refHex := hexutil.Encode(ref) + if refHex != correctRefHex { + mismatch++ + eq = false + } + t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, ref, expected[i]) + } + if mismatch > 0 { + t.Fatalf("mismatches: %d/%d", mismatch, end-start) + } +} + +// BenchmarkHasher generates benchmarks that are comparable to the pyramid hasher +func BenchmarkHasher(b *testing.B) { + for i := start; i < end; i++ { + b.Run(fmt.Sprintf("%d/%d", i, dataLengths[i]), benchmarkHasher) + } +} + +func benchmarkHasher(b *testing.B) { + params := strings.Split(b.Name(), "/") + dataLengthParam, err := strconv.ParseInt(params[2], 10, 64) + if err != nil { + b.Fatal(err) + } + dataLength := int(dataLengthParam) + + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + _, data := testutil.SerialData(dataLength, 255, 0) + + for j := 0; j < b.N; j++ { + h := New(sectionSize, branches, dataHashFunc, refHashFunc) + for i := 0; i < dataLength; i += chunkSize { + size := chunkSize + if dataLength-i < chunkSize { + size = dataLength - i + } + h.Write(i, data[i:i+size]) + } + h.Sum(nil, dataLength, nil) + } +} diff --git a/file/job.go b/file/job.go index 4af7f9118a..70d4bac22e 100644 --- a/file/job.go +++ b/file/job.go @@ -5,7 +5,7 @@ import ( "sync" "sync/atomic" - "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" ) // necessary metadata across asynchronous input @@ -29,8 +29,8 @@ type job struct { firstSectionData []byte // store first section of data written to solve the dangling chunk edge case writeC chan jobUnit - writer bmt.SectionWriter // underlying data processor - doneC chan struct{} // pointer to target doneC channel, set to nil in process() when closed + writer param.SectionWriter // underlying data processor + doneC chan struct{} // pointer to target doneC channel, set to nil in process() when closed mu sync.Mutex } diff --git a/file/job_test.go b/file/job_test.go index 2909c49910..a610d33b1d 100644 --- a/file/job_test.go +++ b/file/job_test.go @@ -15,21 +15,22 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) var ( - dummyHashFunc = func() bmt.SectionWriter { + dummyHashFunc = func() param.SectionWriter { return newDummySectionWriter(chunkSize*branches, sectionSize) } // placeholder for cases where a hasher is not necessary - noHashFunc = func() bmt.SectionWriter { + noHashFunc = func() param.SectionWriter { return nil } ) -// simple bmt.SectionWriter hasher that keeps the data written to it +// simple param.SectionWriter hasher that keeps the data written to it // for later inspection // TODO: see if this can be replaced with the fake hasher from storage module type dummySectionWriter struct { @@ -47,7 +48,7 @@ func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { } } -// implements bmt.SectionWriter +// implements param.SectionWriter // BUG: not actually writing to hasher func (d *dummySectionWriter) Write(index int, data []byte) { d.mu.Lock() @@ -55,14 +56,14 @@ func (d *dummySectionWriter) Write(index int, data []byte) { copy(d.data[index*sectionSize:], data) } -// implements bmt.SectionWriter +// implements param.SectionWriter func (d *dummySectionWriter) Sum(b []byte, size int, span []byte) []byte { d.mu.Lock() defer d.mu.Unlock() return d.writer.Sum(b) } -// implements bmt.SectionWriter +// implements param.SectionWriter func (d *dummySectionWriter) Reset() { d.mu.Lock() defer d.mu.Unlock() @@ -70,11 +71,16 @@ func (d *dummySectionWriter) Reset() { d.writer.Reset() } -// implements bmt.SectionWriter +// implements param.SectionWriter func (d *dummySectionWriter) SectionSize() int { return d.sectionSize } +// implements param.SectionWriter +func (d *dummySectionWriter) DigestSize() int { + return d.sectionSize +} + // TestDummySectionWriter func TestDummySectionWriter(t *testing.T) { @@ -439,7 +445,7 @@ func TestJobWriteSpan(t *testing.T) { tgt := newTarget() pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func() bmt.SectionWriter { + hashFunc := func() param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } params := newTreeParams(sectionSize, branches, hashFunc) @@ -489,7 +495,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { tgt := newTarget() pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func() bmt.SectionWriter { + hashFunc := func() param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } params := newTreeParams(sectionSize, branches, hashFunc) @@ -553,7 +559,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { func TestJobVector(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { + refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) @@ -635,7 +641,7 @@ func benchmarkJob(b *testing.B) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { + refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) diff --git a/file/param.go b/file/param.go index d6c923d5ef..13ec8d3fa9 100644 --- a/file/param.go +++ b/file/param.go @@ -3,7 +3,7 @@ package file import ( "sync" - "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" ) // defines the boundaries of the hashing job and also contains the hash factory functino of the job @@ -14,11 +14,11 @@ type treeParams struct { ChunkSize int Spans []int Debug bool - hashFunc func() bmt.SectionWriter + hashFunc func() param.SectionWriter writerPool sync.Pool } -func newTreeParams(section int, branches int, hashFunc func() bmt.SectionWriter) *treeParams { +func newTreeParams(section int, branches int, hashFunc func() param.SectionWriter) *treeParams { p := &treeParams{ SectionSize: section, diff --git a/file/split.go b/file/split.go index 498a8e979a..28b82d0783 100644 --- a/file/split.go +++ b/file/split.go @@ -3,18 +3,18 @@ package file import ( "io" - "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" ) // TODO: grow buffer on demand to reduce allocs // Splitter returns the result of a data stream from a bmt.SectionWriter type Splitter struct { r io.Reader - w bmt.SectionWriter + w param.SectionWriter } // NewSplitter creates a new Splitter object -func NewSplitter(r io.Reader, w bmt.SectionWriter) *Splitter { +func NewSplitter(r io.Reader, w param.SectionWriter) *Splitter { s := &Splitter{ r: r, w: w, diff --git a/file/split_test.go b/file/split_test.go index 766ef9ddbf..37f65fb01e 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -5,17 +5,18 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) // TestSplit creates a Splitter with a reader with one chunk of serial data and -// a Hasher as the underlying bmt.SectionWriter +// a Hasher as the underlying param.SectionWriter // It verifies the returned result func TestSplit(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() bmt.SectionWriter { + refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHashFunc := func() *bmt.Hasher { diff --git a/param/io.go b/param/io.go new file mode 100644 index 0000000000..e0de482759 --- /dev/null +++ b/param/io.go @@ -0,0 +1,10 @@ +package param + +// SectionWriter is an asynchronous segment/section writer interface +type SectionWriter interface { + Reset() // standard init to be called before reuse + Write(index int, data []byte) // write into section of index + Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer + SectionSize() int // size of the async section unit to use + DigestSize() int +} From 91c0df12212e7c78320f074a17b0275e69c8fbf4 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 3 Dec 2019 22:35:44 +0100 Subject: [PATCH 29/67] file: Separate splitter and hasher in different packages --- file/{ => hasher}/common_test.go | 2 +- file/{ => hasher}/hasher.go | 4 ++-- file/{ => hasher}/hasher_test.go | 2 +- file/{ => hasher}/index.go | 2 +- file/{ => hasher}/job.go | 2 +- file/{ => hasher}/job_test.go | 2 +- file/{ => hasher}/param.go | 2 +- file/{ => hasher}/pyramid_test.go | 2 +- file/{ => hasher}/reference.go | 4 ++-- file/{ => hasher}/reference_test.go | 2 +- file/{ => hasher}/target.go | 2 +- file/{ => hasher}/util.go | 2 +- file/{ => hasher}/util_test.go | 2 +- file/split_test.go | 9 ++++++++- 14 files changed, 23 insertions(+), 16 deletions(-) rename file/{ => hasher}/common_test.go (99%) rename file/{ => hasher}/hasher.go (97%) rename file/{ => hasher}/hasher_test.go (99%) rename file/{ => hasher}/index.go (99%) rename file/{ => hasher}/job.go (99%) rename file/{ => hasher}/job_test.go (99%) rename file/{ => hasher}/param.go (98%) rename file/{ => hasher}/pyramid_test.go (99%) rename file/{ => hasher}/reference.go (98%) rename file/{ => hasher}/reference_test.go (99%) rename file/{ => hasher}/target.go (99%) rename file/{ => hasher}/util.go (99%) rename file/{ => hasher}/util_test.go (99%) diff --git a/file/common_test.go b/file/hasher/common_test.go similarity index 99% rename from file/common_test.go rename to file/hasher/common_test.go index 365a782f30..bad3556420 100644 --- a/file/common_test.go +++ b/file/hasher/common_test.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "github.com/ethersphere/swarm/testutil" diff --git a/file/hasher.go b/file/hasher/hasher.go similarity index 97% rename from file/hasher.go rename to file/hasher/hasher.go index ea6e1a6ab3..db2b184902 100644 --- a/file/hasher.go +++ b/file/hasher/hasher.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "sync" @@ -46,7 +46,7 @@ func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFun // TODO: enforce buffered writes and limits // TODO: attempt omit modulo calc on every pass func (h *Hasher) Write(index int, b []byte) { - if h.count%branches == 0 && h.count > 0 { + if h.count%h.params.Branches == 0 && h.count > 0 { h.job = h.job.Next() } go func(i int, jb *job) { diff --git a/file/hasher_test.go b/file/hasher/hasher_test.go similarity index 99% rename from file/hasher_test.go rename to file/hasher/hasher_test.go index 311dce6cf2..310e45465c 100644 --- a/file/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "fmt" diff --git a/file/index.go b/file/hasher/index.go similarity index 99% rename from file/index.go rename to file/hasher/index.go index 6a4fbdae99..9e8ab21071 100644 --- a/file/index.go +++ b/file/hasher/index.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "fmt" diff --git a/file/job.go b/file/hasher/job.go similarity index 99% rename from file/job.go rename to file/hasher/job.go index 70d4bac22e..4cb3d24627 100644 --- a/file/job.go +++ b/file/hasher/job.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "fmt" diff --git a/file/job_test.go b/file/hasher/job_test.go similarity index 99% rename from file/job_test.go rename to file/hasher/job_test.go index a610d33b1d..85d2f9485f 100644 --- a/file/job_test.go +++ b/file/hasher/job_test.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "bytes" diff --git a/file/param.go b/file/hasher/param.go similarity index 98% rename from file/param.go rename to file/hasher/param.go index 13ec8d3fa9..be6f0e8b2a 100644 --- a/file/param.go +++ b/file/hasher/param.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "sync" diff --git a/file/pyramid_test.go b/file/hasher/pyramid_test.go similarity index 99% rename from file/pyramid_test.go rename to file/hasher/pyramid_test.go index 307ea30d97..a8f03c9ac0 100644 --- a/file/pyramid_test.go +++ b/file/hasher/pyramid_test.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "bytes" diff --git a/file/reference.go b/file/hasher/reference.go similarity index 98% rename from file/reference.go rename to file/hasher/reference.go index eec2582702..369d1e66b7 100644 --- a/file/reference.go +++ b/file/hasher/reference.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "io" @@ -124,7 +124,7 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { // if we're at end, the span is given by the period of the potential span // if not, it will be the full span (since we then must have full chunk writes in the levels below) var dataUnderSpan int - span := f.params.Spans[level] * chunkSize + span := f.params.Spans[level] * f.params.ChunkSize if end { dataUnderSpan = (f.totalBytes-1)%span + 1 } else { diff --git a/file/reference_test.go b/file/hasher/reference_test.go similarity index 99% rename from file/reference_test.go rename to file/hasher/reference_test.go index 778b196672..537d77be40 100644 --- a/file/reference_test.go +++ b/file/hasher/reference_test.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "fmt" diff --git a/file/target.go b/file/hasher/target.go similarity index 99% rename from file/target.go rename to file/hasher/target.go index dfb29f4c0a..8aa655f99d 100644 --- a/file/target.go +++ b/file/hasher/target.go @@ -1,4 +1,4 @@ -package file +package hasher import "sync" diff --git a/file/util.go b/file/hasher/util.go similarity index 99% rename from file/util.go rename to file/hasher/util.go index b19e111ce5..cf9c27ad8c 100644 --- a/file/util.go +++ b/file/hasher/util.go @@ -1,4 +1,4 @@ -package file +package hasher import ( "encoding/binary" diff --git a/file/util_test.go b/file/hasher/util_test.go similarity index 99% rename from file/util_test.go rename to file/hasher/util_test.go index d9ace7bac3..bea855f576 100644 --- a/file/util_test.go +++ b/file/hasher/util_test.go @@ -1,4 +1,4 @@ -package file +package hasher import "testing" diff --git a/file/split_test.go b/file/split_test.go index 37f65fb01e..d42feed073 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -5,11 +5,18 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/hasher" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 +) + // TestSplit creates a Splitter with a reader with one chunk of serial data and // a Hasher as the underlying param.SectionWriter // It verifies the returned result @@ -22,7 +29,7 @@ func TestSplit(t *testing.T) { dataHashFunc := func() *bmt.Hasher { return bmt.New(poolSync) } - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := hasher.New(sectionSize, branches, dataHashFunc, refHashFunc) r, _ := testutil.SerialData(chunkSize, 255, 0) s := NewSplitter(r, h) From e1f4283f64d91303372d3d7cad8905d1211f2054 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 00:54:11 +0100 Subject: [PATCH 30/67] file: Adjust interface to add context --- bmt/bmt.go | 17 ++++++++++++- file/hasher/hasher.go | 33 +++++++++++++----------- file/hasher/hasher_test.go | 19 +++++++++----- file/hasher/job.go | 52 +++++++++++++++++++++++--------------- file/hasher/job_test.go | 15 ++++++++--- file/hasher/param.go | 13 +++++++++- file/split_test.go | 3 ++- param/io.go | 7 ++++- 8 files changed, 108 insertions(+), 51 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index c9c118d028..4e76b76c84 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -18,11 +18,14 @@ package bmt import ( + "context" "fmt" "hash" "strings" "sync" "sync/atomic" + + "github.com/ethersphere/swarm/param" ) /* @@ -433,7 +436,19 @@ type AsyncHasher struct { write func(i int, section []byte, final bool) } -// methods needed to implement AsyncWriter +// Implements param.SectionWriter +func (sw *AsyncHasher) Reset(_ context.Context) { + sw.Hasher.Reset() +} + +// Implements param.SectionWriter +func (sw *AsyncHasher) Init(_ context.Context) { + +} + +// Implements param.SectionWriter +func (sw *AsyncHasher) Link(_ func() param.SectionWriter) { +} // SectionSize returns the size of async section unit to use // Implements param.SectionWriter diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index db2b184902..0fe141d0fa 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -1,9 +1,11 @@ package hasher import ( + "context" "sync" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/param" ) @@ -23,28 +25,33 @@ type Hasher struct { // New creates a new Hasher object using the given sectionSize and branch factor // hasherFunc is used to create *bmt.Hashers to hash the incoming data // writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface -func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher, writerFunc func() param.SectionWriter) *Hasher { +func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher) *Hasher { h := &Hasher{ target: newTarget(), index: newJobIndex(9), } + h.params = newTreeParams(sectionSize, branches, h.getWriter) h.writerPool.New = func() interface{} { - return writerFunc() + return h.params.hashFunc() } h.hasherPool.New = func() interface{} { return hasherFunc() } - h.params = newTreeParams(sectionSize, branches, h.getWriter) h.job = newJob(h.params, h.target, h.index, 1, 0) - return h } +func (h *Hasher) Link(writerFunc func() param.SectionWriter) { + h.params.hashFunc = writerFunc + h.job.start() +} + // Write implements bmt.SectionWriter // It as a non-blocking call that hashes a data chunk and passes the resulting reference to the hash job representing // the intermediate chunk holding the data references // TODO: enforce buffered writes and limits // TODO: attempt omit modulo calc on every pass +// TODO: preallocate full size span slice func (h *Hasher) Write(index int, b []byte) { if h.count%h.params.Branches == 0 && h.count > 0 { h.job = h.job.Next() @@ -55,7 +62,10 @@ func (h *Hasher) Write(index int, b []byte) { if err != nil { panic(err) } - jb.write(i%h.params.Branches, hasher.Sum(nil)) + span := lengthToSpan(len(b)) + ref := hasher.Sum(nil) + chunk.NewChunk(ref, append(span, b...)) + jb.write(i%h.params.Branches, ref) h.putHasher(hasher) }(h.count, h.job) h.size += len(b) @@ -73,21 +83,14 @@ func (h *Hasher) Sum(_ []byte, length int, _ []byte) []byte { return <-h.target.Done() } -func (h *Hasher) Reset() { +func (h *Hasher) Reset(ctx context.Context) { + h.params.ctx = ctx } func (h *Hasher) SectionSize() int { return h.params.ChunkSize } -func (h *Hasher) Branches() int { - return h.params.Branches -} - -func (h *Hasher) ChunkSize() int { - return h.params.ChunkSize -} - func (h *Hasher) DigestSize() int { return h.params.SectionSize } @@ -107,7 +110,7 @@ func (h *Hasher) getHasher(l int) *bmt.Hasher { // proxy for sync.Pool func (h *Hasher) putWriter(w param.SectionWriter) { - w.Reset() + w.Reset(h.params.ctx) h.writerPool.Put(w) } diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 310e45465c..f0416137d4 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -26,7 +26,8 @@ func TestHasherJobTopHash(t *testing.T) { } _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -52,7 +53,8 @@ func TestHasherOneFullChunk(t *testing.T) { } _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -77,8 +79,8 @@ func TestHasherJobChange(t *testing.T) { } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) - + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) jobs := make(map[string]int) for i := 0; i < chunkSize*branches*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -108,7 +110,8 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) var i int for i = 0; i < chunkSize*branches*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -135,7 +138,8 @@ func TestHasherVector(t *testing.T) { for i, dataLength := range dataLengths { log.Info("hashervector start", "i", i, "l", dataLength) eq := true - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < dataLength; j += chunkSize { size := chunkSize @@ -184,7 +188,8 @@ func benchmarkHasher(b *testing.B) { _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { - h := New(sectionSize, branches, dataHashFunc, refHashFunc) + h := New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) for i := 0; i < dataLength; i += chunkSize { size := chunkSize if dataLength-i < chunkSize { diff --git a/file/hasher/job.go b/file/hasher/job.go index 4cb3d24627..8c910ebdbc 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -5,6 +5,8 @@ import ( "sync" "sync/atomic" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -41,7 +43,6 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe index: jobIndex, level: lvl, dataSection: dataSection, - writer: params.hashFunc(), writeC: make(chan jobUnit), target: tgt, doneC: nil, @@ -51,22 +52,24 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe } targetLevel := tgt.Level() if targetLevel == 0 { - //log.Trace("target not set", "level", lvl) + log.Trace("target not set", "level", lvl) jb.doneC = tgt.doneC } else { targetCount := tgt.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) } - //log.Trace("target count", "level", lvl, "count", tgt.Count()) + log.Trace("target count", "level", lvl, "count", tgt.Count()) jb.index.Add(jb) - if !params.Debug { - go jb.process() - } return jb } +func (jb *job) start() { + jb.writer = jb.params.hashFunc() + go jb.process() +} + // implements Stringer interface func (jb *job) String() string { return fmt.Sprintf("job: l:%d,s:%d", jb.level, jb.dataSection) @@ -94,7 +97,7 @@ func (jb *job) size() int { if endCount%jb.params.Branches == 0 { return count * jb.params.SectionSize * jb.params.Spans[jb.level] } - //log.Trace("size", "sections", jb.target.sections, "size", jb.target.Size(), "endcount", endCount, "level", jb.level) + log.Trace("size", "sections", jb.target.sections, "size", jb.target.Size(), "endcount", endCount, "level", jb.level) return jb.target.Size() % (jb.params.Spans[jb.level] * jb.params.SectionSize * jb.params.Branches) } @@ -112,7 +115,7 @@ func (jb *job) write(index int, data []byte) { if jb.dataSection == 0 && index == 0 { topHashLevel := jb.index.GetTopHashLevel() if topHashLevel < jb.level { - //log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) + log.Trace("have tophash", "level", jb.level, "ref", hexutil.Encode(data)) jb.index.AddTopHash(data) } } @@ -147,19 +150,20 @@ OUTER: if entry.index == 0 { jb.firstSectionData = entry.data } - //log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) - // this write is superfluous when the received data is the root hash + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + + // TODO: this write is superfluous when the received data is the root hash jb.writer.Write(entry.index, entry.data) // since newcount is incremented above it can only equal endcount if this has been set in the case below, // which means data write has been completed // otherwise if we reached the chunk limit we also continue to hashing if processCount == endCount { - //log.Trace("quitting writec - endcount", "c", processCount, "level", jb.level) + log.Trace("quitting writec - endcount", "c", processCount, "level", jb.level) break OUTER } if processCount == jb.params.Branches { - //log.Trace("quitting writec - branches") + log.Trace("quitting writec - branches") break OUTER } @@ -168,7 +172,7 @@ OUTER: case <-jb.doneC: jb.mu.Lock() jb.doneC = nil - //log.Trace("doneloop", "level", jb.level, "processCount", processCount, "endcount", jb.endCount) + log.Trace("doneloop", "level", jb.level, "processCount", processCount, "endcount", jb.endCount) //count := jb.count() // if the target count falls within the span of this job @@ -176,13 +180,13 @@ OUTER: // determining span in case of unbalanced tree targetCount := jb.target.Count() jb.endCount = int32(jb.targetCountToEndCount(targetCount)) - //log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) + log.Trace("doneloop done", "level", jb.level, "targetcount", jb.target.Count(), "endcount", jb.endCount) // if we have reached the end count for this chunk, we proceed to hashing // this case is important when write to the level happen after this goroutine // registers that data writes have been completed if processCount > 0 && processCount == int(jb.endCount) { - //log.Trace("quitting donec", "level", jb.level, "processcount", processCount) + log.Trace("quitting donec", "level", jb.level, "processcount", processCount) jb.mu.Unlock() break OUTER } @@ -205,7 +209,7 @@ func (jb *job) sum() { size := jb.size() span := lengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - //log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) + log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil, refSize, span) // endCount > 0 means this is the last chunk on the level @@ -218,7 +222,7 @@ func (jb *job) sum() { // retrieve the parent and the corresponding section in it to write to parent := jb.parent() - //log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) + log.Trace("have parent", "level", jb.level, "jb p", fmt.Sprintf("%p", jb), "jbp p", fmt.Sprintf("%p", parent)) nextLevel := jb.level + 1 parentSection := dataSectionToLevelSection(jb.params, nextLevel, jb.dataSection) @@ -227,7 +231,7 @@ func (jb *job) sum() { if jb.endCount == 1 { ref = jb.firstSectionData for parent.level < belowRootLevel { - //log.Trace("parent write skip", "level", parent.level) + log.Trace("parent write skip", "level", parent.level) oldParent := parent parent = parent.parent() oldParent.destroy() @@ -284,17 +288,23 @@ func (jb *job) parent() *job { if parent != nil { return parent } - return newJob(jb.params, jb.target, jb.index, jb.level+1, newDataSection) + jbp := newJob(jb.params, jb.target, jb.index, jb.level+1, newDataSection) + jbp.start() + return jbp } // Next creates the job for the next data section span on the same level as the receiver job // this is only meant to be called once for each job, consecutive calls will overwrite index with new empty job func (jb *job) Next() *job { - return newJob(jb.params, jb.target, jb.index, jb.level, jb.dataSection+jb.params.Spans[jb.level+1]) + jbn := newJob(jb.params, jb.target, jb.index, jb.level, jb.dataSection+jb.params.Spans[jb.level+1]) + jbn.start() + return jbn } // cleans up the job; reset hasher and remove pointer to job from index func (jb *job) destroy() { - jb.writer.Reset() + if jb.writer != nil { + jb.writer.Reset(jb.params.GetContext()) + } jb.index.Delete(jb) } diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 85d2f9485f..9dcb95af92 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -48,6 +48,9 @@ func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { } } +func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { +} + // implements param.SectionWriter // BUG: not actually writing to hasher func (d *dummySectionWriter) Write(index int, data []byte) { @@ -64,7 +67,7 @@ func (d *dummySectionWriter) Sum(b []byte, size int, span []byte) []byte { } // implements param.SectionWriter -func (d *dummySectionWriter) Reset() { +func (d *dummySectionWriter) Reset(_ context.Context) { d.mu.Lock() defer d.mu.Unlock() d.data = make([]byte, len(d.data)) @@ -85,7 +88,7 @@ func (d *dummySectionWriter) DigestSize() int { func TestDummySectionWriter(t *testing.T) { w := newDummySectionWriter(chunkSize*2, sectionSize) - w.Reset() + w.Reset(context.Background()) data := make([]byte, 32) rand.Seed(23115) @@ -151,7 +154,6 @@ func TestTarget(t *testing.T) { // falls within a particular job's span func TestTargetWithinJob(t *testing.T) { params := newTreeParams(sectionSize, branches, dummyHashFunc) - params.Debug = true index := newJobIndex(9) tgt := newTarget() @@ -308,6 +310,7 @@ func TestJobWriteTwoAndFinish(t *testing.T) { params := newTreeParams(sectionSize*2, branches, dummyHashFunc) jb := newJob(params, tgt, nil, 1, 0) + jb.start() _, data := testutil.SerialData(sectionSize*2, 255, 0) jb.write(0, data[:sectionSize]) jb.write(1, data[:sectionSize]) @@ -342,6 +345,7 @@ func TestGetJobParent(t *testing.T) { params := newTreeParams(sectionSize, branches, dummyHashFunc) jb := newJob(params, tgt, nil, 1, branches*branches) + jb.start() jbp := jb.parent() if jbp == nil { t.Fatalf("parent: nil") @@ -414,7 +418,7 @@ func TestJobWriteFull(t *testing.T) { params := newTreeParams(sectionSize, branches, dummyHashFunc) jb := newJob(params, tgt, nil, 1, 0) - + jb.start() _, data := testutil.SerialData(chunkSize, 255, 0) for i := 0; i < branches; i++ { jb.write(i, data[i*sectionSize:i*sectionSize+sectionSize]) @@ -451,6 +455,7 @@ func TestJobWriteSpan(t *testing.T) { params := newTreeParams(sectionSize, branches, hashFunc) jb := newJob(params, tgt, nil, 1, 0) + jb.start() _, data := testutil.SerialData(chunkSize+sectionSize*2, 255, 0) for i := 0; i < chunkSize; i += sectionSize { @@ -501,6 +506,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { params := newTreeParams(sectionSize, branches, hashFunc) jb := newJob(params, tgt, nil, 1, 0) + jb.start() _, data := testutil.SerialData(chunkSize+sectionSize*2, 255, 0) var idxs []int @@ -571,6 +577,7 @@ func TestJobVector(t *testing.T) { dataLength := dataLengths[i] _, data := testutil.SerialData(dataLength, 255, 0) jb := newJob(params, tgt, nil, 1, 0) + jb.start() count := 0 log.Info("test vector", "length", dataLength) for i := 0; i < dataLength; i += chunkSize { diff --git a/file/hasher/param.go b/file/hasher/param.go index be6f0e8b2a..38c3290272 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -1,6 +1,7 @@ package hasher import ( + "context" "sync" "github.com/ethersphere/swarm/param" @@ -16,6 +17,7 @@ type treeParams struct { Debug bool hashFunc func() param.SectionWriter writerPool sync.Pool + ctx context.Context } func newTreeParams(section int, branches int, hashFunc func() param.SectionWriter) *treeParams { @@ -25,9 +27,10 @@ func newTreeParams(section int, branches int, hashFunc func() param.SectionWrite Branches: branches, ChunkSize: section * branches, hashFunc: hashFunc, + ctx: context.Background(), } p.writerPool.New = func() interface{} { - return hashFunc() + return p.hashFunc() } span := 1 @@ -37,3 +40,11 @@ func newTreeParams(section int, branches int, hashFunc func() param.SectionWrite } return p } + +func (p *treeParams) SetContext(ctx context.Context) { + p.ctx = ctx +} + +func (p *treeParams) GetContext() context.Context { + return p.ctx +} diff --git a/file/split_test.go b/file/split_test.go index d42feed073..5a610161f0 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -29,7 +29,8 @@ func TestSplit(t *testing.T) { dataHashFunc := func() *bmt.Hasher { return bmt.New(poolSync) } - h := hasher.New(sectionSize, branches, dataHashFunc, refHashFunc) + h := hasher.New(sectionSize, branches, dataHashFunc) + h.Link(refHashFunc) r, _ := testutil.SerialData(chunkSize, 255, 0) s := NewSplitter(r, h) diff --git a/param/io.go b/param/io.go index e0de482759..4325a216b4 100644 --- a/param/io.go +++ b/param/io.go @@ -1,8 +1,13 @@ package param +import ( + "context" +) + // SectionWriter is an asynchronous segment/section writer interface type SectionWriter interface { - Reset() // standard init to be called before reuse + Link(writerFunc func() SectionWriter) + Reset(ctx context.Context) // standard init to be called before reuse Write(index int, data []byte) // write into section of index Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer SectionSize() int // size of the async section unit to use From 835c4b4104da9430604e8ed45ff7010f67f7dedd Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 01:16:26 +0100 Subject: [PATCH 31/67] file: Add SectionWriter storer interface for chunks --- bmt/bmt.go | 7 ++--- file/hasher/hasher.go | 4 +++ file/hasher/job_test.go | 3 ++ file/store/store.go | 63 +++++++++++++++++++++++++++++++++++++++++ param/io.go | 11 +++---- 5 files changed, 79 insertions(+), 9 deletions(-) create mode 100644 file/store/store.go diff --git a/bmt/bmt.go b/bmt/bmt.go index 4e76b76c84..d72a38093e 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -437,13 +437,12 @@ type AsyncHasher struct { } // Implements param.SectionWriter -func (sw *AsyncHasher) Reset(_ context.Context) { - sw.Hasher.Reset() +func (sw *AsyncHasher) Init(_ context.Context, errFunc func(error)) { } // Implements param.SectionWriter -func (sw *AsyncHasher) Init(_ context.Context) { - +func (sw *AsyncHasher) Reset(_ context.Context) { + sw.Hasher.Reset() } // Implements param.SectionWriter diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 0fe141d0fa..53b1d2bd5b 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -41,6 +41,10 @@ func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher) *Hasher { return h } +func (h *Hasher) Init(ctx context.Context, errFunc func(error)) { + h.params.SetContext(ctx) +} + func (h *Hasher) Link(writerFunc func() param.SectionWriter) { h.params.hashFunc = writerFunc h.job.start() diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 9dcb95af92..d5f7c26622 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -48,6 +48,9 @@ func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { } } +func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { +} + func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { } diff --git a/file/store/store.go b/file/store/store.go new file mode 100644 index 0000000000..20f55d63b2 --- /dev/null +++ b/file/store/store.go @@ -0,0 +1,63 @@ +package store + +import ( + "context" + + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/param" +) + +type FileStore struct { + chunkStore chunk.Store + w param.SectionWriter + ctx context.Context + data [][]byte + errFunc func(error) +} + +func New(chunkStore chunk.Store) *FileStore { + return &FileStore{ + chunkStore: chunkStore, + } +} + +func (f *FileStore) Init(ctx context.Context, errFunc func(error)) { + f.ctx = ctx + f.errFunc = errFunc +} + +func (f *FileStore) Link(writerFunc func() param.SectionWriter) { + f.w = writerFunc() +} + +func (f *FileStore) Reset(ctx context.Context) { + f.ctx = ctx +} + +func (f *FileStore) Write(index int, b []byte) { + f.data = append(f.data, b) +} + +func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { + ref := f.w.Sum(b, length, span) + go func(ref []byte) { + var b []byte + for _, data := range f.data { + b = append(b, data...) + } + ch := chunk.NewChunk(ref, b) + _, err := f.chunkStore.Put(f.ctx, chunk.ModePutUpload, ch) + if err != nil { + f.errFunc(err) + } + }(ref) + return ref +} + +func (f *FileStore) SectionSize() int { + return chunk.DefaultSize +} + +func (f *FileStore) DigestSize() int { + return f.w.DigestSize() +} diff --git a/param/io.go b/param/io.go index 4325a216b4..0b373defc0 100644 --- a/param/io.go +++ b/param/io.go @@ -6,10 +6,11 @@ import ( // SectionWriter is an asynchronous segment/section writer interface type SectionWriter interface { - Link(writerFunc func() SectionWriter) - Reset(ctx context.Context) // standard init to be called before reuse - Write(index int, data []byte) // write into section of index - Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer - SectionSize() int // size of the async section unit to use + Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination + Link(writerFunc func() SectionWriter) // sets the writer the current writer should pipeline to + Reset(ctx context.Context) // standard init to be called before reuse + Write(index int, data []byte) // write into section of index + Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer + SectionSize() int // size of the async section unit to use DigestSize() int } From 4f26df0c3673f2717e75f491966be6e8bce7881b Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 02:01:23 +0100 Subject: [PATCH 32/67] file: Add test for FileStore SectionWriter --- bmt/bmt.go | 10 ++++ file/hasher/hasher.go | 4 +- file/hasher/job.go | 3 +- file/hasher/job_test.go | 4 +- file/hasher/reference.go | 2 +- file/hasher/reference_test.go | 8 +-- file/hasher/util.go | 10 ---- file/store/store.go | 17 ++++++- file/store/store_test.go | 91 +++++++++++++++++++++++++++++++++++ 9 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 file/store/store_test.go diff --git a/bmt/bmt.go b/bmt/bmt.go index d72a38093e..3fc33c57c1 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -19,6 +19,7 @@ package bmt import ( "context" + "encoding/binary" "fmt" "hash" "strings" @@ -701,3 +702,12 @@ func calculateDepthFor(n int) (d int) { } return d + 1 } + +// creates a binary span size representation +// to pass to bmt.SectionWriter +// TODO: move to bmt.SectionWriter, which is the object for which this is actually relevant +func LengthToSpan(length int) []byte { + spanBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(spanBytes, uint64(length)) + return spanBytes +} diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 53b1d2bd5b..2e4ea4b0ea 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -66,7 +66,7 @@ func (h *Hasher) Write(index int, b []byte) { if err != nil { panic(err) } - span := lengthToSpan(len(b)) + span := bmt.LengthToSpan(len(b)) ref := hasher.Sum(nil) chunk.NewChunk(ref, append(span, b...)) jb.write(i%h.params.Branches, ref) @@ -106,7 +106,7 @@ func (h *Hasher) putHasher(w *bmt.Hasher) { // proxy for sync.Pool func (h *Hasher) getHasher(l int) *bmt.Hasher { - span := lengthToSpan(l) + span := bmt.LengthToSpan(l) hasher := h.hasherPool.Get().(*bmt.Hasher) hasher.ResetWithLength(span) return hasher diff --git a/file/hasher/job.go b/file/hasher/job.go index 8c910ebdbc..245f73ada8 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -6,6 +6,7 @@ import ( "sync/atomic" "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -207,7 +208,7 @@ func (jb *job) sum() { // get the size of the span and execute the hash digest of the content size := jb.size() - span := lengthToSpan(size) + span := bmt.LengthToSpan(size) refSize := jb.count() * jb.params.SectionSize log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil, refSize, span) diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index d5f7c26622..2a6e3cbe1e 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -589,7 +589,7 @@ func TestJobVector(t *testing.T) { ie = dataLength } writeSize := ie - i - span := lengthToSpan(writeSize) + span := bmt.LengthToSpan(writeSize) log.Debug("data write", "i", i, "length", writeSize, "span", span) dataHash.ResetWithLength(span) c, err := dataHash.Write(data[i:ie]) @@ -669,7 +669,7 @@ func benchmarkJob(b *testing.B) { ie = dataLength } writeSize := ie - i - span := lengthToSpan(writeSize) + span := bmt.LengthToSpan(writeSize) dataHash.ResetWithLength(span) c, err := dataHash.Write(data[i:ie]) if err != nil { diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 369d1e66b7..2092a8073c 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -147,7 +147,7 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { // prepare the hasher, // write data since previous hash operation from the current level cursor position // and sum - spanBytes := lengthToSpan(dataUnderSpan) + spanBytes := bmt.LengthToSpan(dataUnderSpan) f.hasher.ResetWithLength(spanBytes) hasherWriteOffset := f.cursors[level+1] * f.params.SectionSize f.hasher.Write(f.buffer[hasherWriteOffset : hasherWriteOffset+hashDataSize]) diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go index 537d77be40..c73bc84d69 100644 --- a/file/hasher/reference_test.go +++ b/file/hasher/reference_test.go @@ -31,7 +31,7 @@ func TestManualDanglingChunk(t *testing.T) { // hash the balanced tree portion of the data level and write to level 1 _, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0) - span := lengthToSpan(chunkSize) + span := bmt.LengthToSpan(chunkSize) for i := 0; i < chunkSize*branches; i += chunkSize { h.ResetWithLength(span) h.Write(levels[0][i : i+chunkSize]) @@ -45,7 +45,7 @@ func TestManualDanglingChunk(t *testing.T) { // write the dangling chunk // hash it and write the reference on the second section of level 2 - span = lengthToSpan(chunkSize) + span = bmt.LengthToSpan(chunkSize) h.ResetWithLength(span) h.Write(levels[0][chunkSize*branches:]) copy(levels[2][sectionSize:], h.Sum(nil)) @@ -56,7 +56,7 @@ func TestManualDanglingChunk(t *testing.T) { } // hash the chunk on level 1 and write into the first section of level 2 - span = lengthToSpan(chunkSize * branches) + span = bmt.LengthToSpan(chunkSize * branches) h.ResetWithLength(span) h.Write(levels[1]) copy(levels[2], h.Sum(nil)) @@ -67,7 +67,7 @@ func TestManualDanglingChunk(t *testing.T) { } // hash the two sections on level 2 to obtain the root hash - span = lengthToSpan(chunkSize*branches + chunkSize) + span = bmt.LengthToSpan(chunkSize*branches + chunkSize) h.ResetWithLength(span) h.Write(levels[2]) ref := h.Sum(nil) diff --git a/file/hasher/util.go b/file/hasher/util.go index cf9c27ad8c..b082789ac5 100644 --- a/file/hasher/util.go +++ b/file/hasher/util.go @@ -1,19 +1,9 @@ package hasher import ( - "encoding/binary" "math" ) -// creates a binary span size representation -// to pass to bmt.SectionWriter -// TODO: move to bmt.SectionWriter, which is the object for which this is actually relevant -func lengthToSpan(length int) []byte { - spanBytes := make([]byte, 8) - binary.LittleEndian.PutUint64(spanBytes, uint64(length)) - return spanBytes -} - // TODO: use params instead of sectionSize // calculates the section index of the given byte size func dataSizeToSectionIndex(length int, sectionSize int) int { diff --git a/file/store/store.go b/file/store/store.go index 20f55d63b2..8b8b05f73b 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -7,6 +7,10 @@ import ( "github.com/ethersphere/swarm/param" ) +// FileStore implements param.SectionWriter +// It intercepts data between source and hasher +// and compiles the data with the received hash on sum +// to a chunk to be passed to underlying chunk.Store.Put type FileStore struct { chunkStore chunk.Store w param.SectionWriter @@ -15,33 +19,42 @@ type FileStore struct { errFunc func(error) } +// New creates a new FileStore with the supplied chunk.Store func New(chunkStore chunk.Store) *FileStore { return &FileStore{ chunkStore: chunkStore, } } +// Init implements param.SectionWriter func (f *FileStore) Init(ctx context.Context, errFunc func(error)) { f.ctx = ctx f.errFunc = errFunc } +// Link implements param.SectionWriter func (f *FileStore) Link(writerFunc func() param.SectionWriter) { f.w = writerFunc() } +// Reset implements param.SectionWriter func (f *FileStore) Reset(ctx context.Context) { f.ctx = ctx } +// Write implements param.SectionWriter +// it asynchronously writes to the underlying writer while caching the data slice func (f *FileStore) Write(index int, b []byte) { + go f.w.Write(index, b) f.data = append(f.data, b) } +// Sum implements param.SectionWriter +// calls underlying writer's Sum and sends the result with data as a chunk to chunk.Store func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { ref := f.w.Sum(b, length, span) go func(ref []byte) { - var b []byte + b = span for _, data := range f.data { b = append(b, data...) } @@ -54,10 +67,12 @@ func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { return ref } +// SectionSize implements param.SectionWriter func (f *FileStore) SectionSize() int { return chunk.DefaultSize } +// DigestSize implements param.SectionWriter func (f *FileStore) DigestSize() int { return f.w.DigestSize() } diff --git a/file/store/store_test.go b/file/store/store_test.go new file mode 100644 index 0000000000..80da3b3ee5 --- /dev/null +++ b/file/store/store_test.go @@ -0,0 +1,91 @@ +package store + +import ( + "bytes" + "context" + "testing" + "time" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/storage" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 +) + +// wraps storage.FakeChunkStore to intercept incoming chunk +type testChunkStore struct { + *storage.FakeChunkStore + chunkC chan<- chunk.Chunk +} + +func newTestChunkStore(chunkC chan<- chunk.Chunk) *testChunkStore { + return &testChunkStore{ + FakeChunkStore: &storage.FakeChunkStore{}, + chunkC: chunkC, + } +} + +// Put overrides storage.FakeChunkStore.Put +func (s *testChunkStore) Put(_ context.Context, _ chunk.ModePut, chs ...chunk.Chunk) ([]bool, error) { + for _, ch := range chs { + s.chunkC <- ch + } + return s.FakeChunkStore.Put(nil, 0, chs...) +} + +// TestStoreWithHasher writes a single chunk and verifies the asynchronusly received chunk +// through the underlying chunk store +func TestStoreWithHasher(t *testing.T) { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + hashFunc := func() param.SectionWriter { + return bmt.New(pool).NewAsyncWriter(false) + } + + // initialize chunk store with channel to intercept chunk + chunkC := make(chan chunk.Chunk) + store := newTestChunkStore(chunkC) + + // initialize FileStore + h := New(store) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + h.Init(ctx, nil) + h.Link(hashFunc) + + // Write data to Store + _, data := testutil.SerialData(chunkSize, 255, 0) + span := bmt.LengthToSpan(chunkSize) + go func() { + for i := 0; i < chunkSize; i += sectionSize { + h.Write(i/sectionSize, data[i:i+sectionSize]) + } + h.Sum(nil, chunkSize, span) + }() + + // capture chunk and verify contents + select { + case ch := <-chunkC: + if !bytes.Equal(ch.Data()[:8], span) { + t.Fatalf("chunk span; expected %x, got %x", span, ch.Data()[:8]) + } + if !bytes.Equal(ch.Data()[8:], data) { + t.Fatalf("chunk data; expected %x, got %x", data, ch.Data()[8:]) + } + refHex := ch.Address().Hex() + correctRefHex := "c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("chunk ref; expected %s, got %s", correctRefHex, refHex) + } + + case <-ctx.Done(): + t.Fatalf("timeout %v", ctx.Err()) + } +} From eaf0af108e421082a58c57b1da49ec6ba183cf65 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 02:22:41 +0100 Subject: [PATCH 33/67] file: Add FileStore integration test with Splitter --- file/split_test.go | 45 ++++++++++++++++++++++++++++++++++++++++ file/store/store.go | 4 +++- file/store/store_test.go | 4 ++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/file/split_test.go b/file/split_test.go index 5a610161f0..9586111828 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -1,12 +1,16 @@ package file import ( + "context" "testing" + "time" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/file/hasher" + "github.com/ethersphere/swarm/file/store" "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/storage" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) @@ -17,6 +21,10 @@ const ( chunkSize = 4096 ) +func init() { + testutil.Init() +} + // TestSplit creates a Splitter with a reader with one chunk of serial data and // a Hasher as the underlying param.SectionWriter // It verifies the returned result @@ -44,3 +52,40 @@ func TestSplit(t *testing.T) { t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) } } + +func TestSplitWithFileStore(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + chunkStore := &storage.FakeChunkStore{} + storeFunc := func() param.SectionWriter { + h := store.New(chunkStore) + h.Init(ctx, func(_ error) {}) + h.Link(refHashFunc) + return h + } + + dataHashFunc := func() *bmt.Hasher { + return bmt.New(poolSync) + } + + h := hasher.New(sectionSize, branches, dataHashFunc) + h.Link(storeFunc) + + r, _ := testutil.SerialData(chunkSize*2, 255, 0) + s := NewSplitter(r, h) + ref, err := s.Split() + if err != nil { + t.Fatal(err) + } + refHex := hexutil.Encode(ref) + correctRefHex := "0x29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9" + if refHex != correctRefHex { + t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) + } +} diff --git a/file/store/store.go b/file/store/store.go index 8b8b05f73b..a63967286e 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -4,6 +4,7 @@ import ( "context" "github.com/ethersphere/swarm/chunk" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -45,13 +46,14 @@ func (f *FileStore) Reset(ctx context.Context) { // Write implements param.SectionWriter // it asynchronously writes to the underlying writer while caching the data slice func (f *FileStore) Write(index int, b []byte) { - go f.w.Write(index, b) + f.w.Write(index, b) f.data = append(f.data, b) } // Sum implements param.SectionWriter // calls underlying writer's Sum and sends the result with data as a chunk to chunk.Store func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { + log.Trace("filestore put chunk", "ch", span) ref := f.w.Sum(b, length, span) go func(ref []byte) { b = span diff --git a/file/store/store_test.go b/file/store/store_test.go index 80da3b3ee5..e95117d87f 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -20,6 +20,10 @@ const ( chunkSize = 4096 ) +func init() { + testutil.Init() +} + // wraps storage.FakeChunkStore to intercept incoming chunk type testChunkStore struct { *storage.FakeChunkStore From e5fd998d59b6d3f9d617c41f7efde287c2044931 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 02:50:49 +0100 Subject: [PATCH 34/67] file, bmt: Wrap bmt.Hasher in param.SectionWriter --- bmt/bmt.go | 4 +++ file/hasher/hasher.go | 61 ++++++++++++++++++++++++++++++-------- file/hasher/hasher_test.go | 24 +++++++-------- file/split_test.go | 3 +- file/store/store.go | 2 +- 5 files changed, 68 insertions(+), 26 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 3fc33c57c1..4b828b76e6 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -292,6 +292,10 @@ func (h *Hasher) BlockSize() int { return 2 * h.pool.SegmentSize } +func (h *Hasher) ChunkSize() int { + return h.pool.Size +} + // Sum returns the BMT root hash of the buffer // using Sum presupposes sequential synchronous writes (io.Writer interface) // hash.Hash interface Sum method appends the byte slice to the underlying diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 2e4ea4b0ea..a860932912 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -9,6 +9,45 @@ import ( "github.com/ethersphere/swarm/param" ) +type BMTSyncSectionWriter struct { + hasher *bmt.Hasher + data []byte +} + +func NewBMTSyncSectionWriter(hasher *bmt.Hasher) param.SectionWriter { + return &BMTSyncSectionWriter{ + hasher: hasher, + } +} + +func (b *BMTSyncSectionWriter) Init(_ context.Context, errFunc func(error)) { +} + +func (b *BMTSyncSectionWriter) Link(_ func() param.SectionWriter) { +} + +func (b *BMTSyncSectionWriter) Sum(extra []byte, _ int, span []byte) []byte { + b.hasher.ResetWithLength(span) + b.hasher.Write(b.data) + return b.hasher.Sum(extra) +} + +func (b *BMTSyncSectionWriter) Reset(_ context.Context) { + b.hasher.Reset() +} + +func (b *BMTSyncSectionWriter) Write(_ int, data []byte) { + b.data = data +} + +func (b *BMTSyncSectionWriter) SectionSize() int { + return b.hasher.ChunkSize() +} + +func (b *BMTSyncSectionWriter) DigestSize() int { + return b.hasher.Size() +} + // Hasher is a bmt.SectionWriter that executes the file hashing algorithm on arbitary data type Hasher struct { target *target @@ -25,7 +64,7 @@ type Hasher struct { // New creates a new Hasher object using the given sectionSize and branch factor // hasherFunc is used to create *bmt.Hashers to hash the incoming data // writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface -func New(sectionSize int, branches int, hasherFunc func() *bmt.Hasher) *Hasher { +func New(sectionSize int, branches int, hasherFunc func() param.SectionWriter) *Hasher { h := &Hasher{ target: newTarget(), index: newJobIndex(9), @@ -62,12 +101,10 @@ func (h *Hasher) Write(index int, b []byte) { } go func(i int, jb *job) { hasher := h.getHasher(len(b)) - _, err := hasher.Write(b) - if err != nil { - panic(err) - } - span := bmt.LengthToSpan(len(b)) - ref := hasher.Sum(nil) + hasher.Write(0, b) + l := len(b) + span := bmt.LengthToSpan(l) + ref := hasher.Sum(nil, l, span) chunk.NewChunk(ref, append(span, b...)) jb.write(i%h.params.Branches, ref) h.putHasher(hasher) @@ -100,15 +137,15 @@ func (h *Hasher) DigestSize() int { } // proxy for sync.Pool -func (h *Hasher) putHasher(w *bmt.Hasher) { +func (h *Hasher) putHasher(w param.SectionWriter) { h.hasherPool.Put(w) } // proxy for sync.Pool -func (h *Hasher) getHasher(l int) *bmt.Hasher { - span := bmt.LengthToSpan(l) - hasher := h.hasherPool.Get().(*bmt.Hasher) - hasher.ResetWithLength(span) +func (h *Hasher) getHasher(l int) param.SectionWriter { + //span := bmt.LengthToSpan(l) + hasher := h.hasherPool.Get().(param.SectionWriter) + hasher.Reset(h.params.ctx) //WithLength(span) return hasher } diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index f0416137d4..2de783ab20 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -21,8 +21,8 @@ func TestHasherJobTopHash(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } _, data := testutil.SerialData(chunkSize*branches, 255, 0) @@ -48,8 +48,8 @@ func TestHasherOneFullChunk(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } _, data := testutil.SerialData(chunkSize*branches, 255, 0) @@ -74,8 +74,8 @@ func TestHasherJobChange(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) @@ -105,8 +105,8 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) @@ -130,8 +130,8 @@ func TestHasherVector(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } var mismatch int @@ -182,8 +182,8 @@ func benchmarkHasher(b *testing.B) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return NewBMTSyncSectionWriter(bmt.New(poolSync)) } _, data := testutil.SerialData(dataLength, 255, 0) diff --git a/file/split_test.go b/file/split_test.go index 9586111828..314ca9bdb2 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -53,7 +53,7 @@ func TestSplit(t *testing.T) { } } -func TestSplitWithFileStore(t *testing.T) { +func TestSplitWithIntermediateFileStore(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { @@ -83,6 +83,7 @@ func TestSplitWithFileStore(t *testing.T) { if err != nil { t.Fatal(err) } + time.Sleep(time.Second) refHex := hexutil.Encode(ref) correctRefHex := "0x29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9" if refHex != correctRefHex { diff --git a/file/store/store.go b/file/store/store.go index a63967286e..a944abcf22 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -53,7 +53,6 @@ func (f *FileStore) Write(index int, b []byte) { // Sum implements param.SectionWriter // calls underlying writer's Sum and sends the result with data as a chunk to chunk.Store func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { - log.Trace("filestore put chunk", "ch", span) ref := f.w.Sum(b, length, span) go func(ref []byte) { b = span @@ -62,6 +61,7 @@ func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { } ch := chunk.NewChunk(ref, b) _, err := f.chunkStore.Put(f.ctx, chunk.ModePutUpload, ch) + log.Trace("filestore put chunk", "ch", ch) if err != nil { f.errFunc(err) } From a0e9ffdde594452ff7e43c4ef9cc57f5d9e4cb7a Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 03:13:30 +0100 Subject: [PATCH 35/67] file: Add test, chunk.Store sink for both data+intermediate hashers --- file/hasher/hasher.go | 23 ++++++++--- file/hasher/target.go | 8 +++- file/split_test.go | 93 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 12 deletions(-) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index a860932912..dfee3240dd 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -5,45 +5,53 @@ import ( "sync" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/param" ) +// BMTSyncSectionWriter is a wrapper for bmt.Hasher to implement the param.SectionWriter interface type BMTSyncSectionWriter struct { hasher *bmt.Hasher data []byte } +// NewBMTSyncSectionWriter creates a new BMTSyncSectionWriter func NewBMTSyncSectionWriter(hasher *bmt.Hasher) param.SectionWriter { return &BMTSyncSectionWriter{ hasher: hasher, } } +// Init implements param.SectionWriter func (b *BMTSyncSectionWriter) Init(_ context.Context, errFunc func(error)) { } +// Link implements param.SectionWriter func (b *BMTSyncSectionWriter) Link(_ func() param.SectionWriter) { } +// Sum implements param.SectionWriter func (b *BMTSyncSectionWriter) Sum(extra []byte, _ int, span []byte) []byte { b.hasher.ResetWithLength(span) b.hasher.Write(b.data) return b.hasher.Sum(extra) } +// Reset implements param.SectionWriter func (b *BMTSyncSectionWriter) Reset(_ context.Context) { b.hasher.Reset() } +// Write implements param.SectionWriter func (b *BMTSyncSectionWriter) Write(_ int, data []byte) { b.data = data } +// SectionSize implements param.SectionWriter func (b *BMTSyncSectionWriter) SectionSize() int { return b.hasher.ChunkSize() } +// DigestSize implements param.SectionWriter func (b *BMTSyncSectionWriter) DigestSize() int { return b.hasher.Size() } @@ -80,16 +88,18 @@ func New(sectionSize int, branches int, hasherFunc func() param.SectionWriter) * return h } +// Init implements param.SectionWriter func (h *Hasher) Init(ctx context.Context, errFunc func(error)) { h.params.SetContext(ctx) } +// Link implements param.SectionWriter func (h *Hasher) Link(writerFunc func() param.SectionWriter) { h.params.hashFunc = writerFunc h.job.start() } -// Write implements bmt.SectionWriter +// Write implements param.SectionWriter // It as a non-blocking call that hashes a data chunk and passes the resulting reference to the hash job representing // the intermediate chunk holding the data references // TODO: enforce buffered writes and limits @@ -104,16 +114,14 @@ func (h *Hasher) Write(index int, b []byte) { hasher.Write(0, b) l := len(b) span := bmt.LengthToSpan(l) - ref := hasher.Sum(nil, l, span) - chunk.NewChunk(ref, append(span, b...)) - jb.write(i%h.params.Branches, ref) + jb.write(i%h.params.Branches, hasher.Sum(nil, l, span)) h.putHasher(hasher) }(h.count, h.job) h.size += len(b) h.count++ } -// Sum implements bmt.SectionWriter +// Sum implements param.SectionWriter // It is a blocking call that calculates the target level and section index of the received data // and alerts hasher jobs the end of write is reached // It returns the root hash @@ -124,14 +132,17 @@ func (h *Hasher) Sum(_ []byte, length int, _ []byte) []byte { return <-h.target.Done() } +// Reset implements param.SectionWriter func (h *Hasher) Reset(ctx context.Context) { h.params.ctx = ctx } +// SectionSize implements param.SectionWriter func (h *Hasher) SectionSize() int { return h.params.ChunkSize } +// DigestSize implements param.SectionWriter func (h *Hasher) DigestSize() int { return h.params.SectionSize } diff --git a/file/hasher/target.go b/file/hasher/target.go index 8aa655f99d..9d566fa490 100644 --- a/file/hasher/target.go +++ b/file/hasher/target.go @@ -1,6 +1,10 @@ package hasher -import "sync" +import ( + "sync" + + "github.com/ethersphere/swarm/log" +) // passed to a job to determine at which data lengths and levels a job should terminate type target struct { @@ -27,7 +31,7 @@ func (t *target) Set(size int, sections int, level int) { t.size = int32(size) t.sections = int32(sections) t.level = int32(level) - //log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) + log.Trace("target set", "size", t.size, "section", t.sections, "level", t.level) close(t.doneC) } diff --git a/file/split_test.go b/file/split_test.go index 314ca9bdb2..8e3ce0d3d4 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -34,8 +34,8 @@ func TestSplit(t *testing.T) { refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) } h := hasher.New(sectionSize, branches, dataHashFunc) h.Link(refHashFunc) @@ -53,6 +53,46 @@ func TestSplit(t *testing.T) { } } +// TestSplitWithDataFileStore verifies chunk.Store sink result for data hashing +func TestSplitWithDataFileStore(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + chunkStore := &storage.FakeChunkStore{} + storeFunc := func() param.SectionWriter { + h := store.New(chunkStore) + h.Init(ctx, func(_ error) {}) + h.Link(dataHashFunc) + return h + } + + h := hasher.New(sectionSize, branches, storeFunc) + h.Init(ctx, func(error) {}) + h.Link(refHashFunc) + + r, _ := testutil.SerialData(chunkSize, 255, 0) + s := NewSplitter(r, h) + ref, err := s.Split() + if err != nil { + t.Fatal(err) + } + time.Sleep(time.Second) + refHex := hexutil.Encode(ref) + correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) + } +} + +// TestSplitWithIntermediateFileStore verifies chunk.Store sink result for intermediate hashing func TestSplitWithIntermediateFileStore(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) @@ -70,8 +110,8 @@ func TestSplitWithIntermediateFileStore(t *testing.T) { return h } - dataHashFunc := func() *bmt.Hasher { - return bmt.New(poolSync) + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) } h := hasher.New(sectionSize, branches, dataHashFunc) @@ -90,3 +130,48 @@ func TestSplitWithIntermediateFileStore(t *testing.T) { t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) } } + +// TestSplitWithBothFileStore verifies chunk.Store sink result for both data and intermediate hashing +func TestSplitWithBothFileStore(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + chunkStore := &storage.FakeChunkStore{} + refStoreFunc := func() param.SectionWriter { + h := store.New(chunkStore) + h.Init(ctx, func(_ error) {}) + h.Link(refHashFunc) + return h + } + + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + dataStoreFunc := func() param.SectionWriter { + h := store.New(chunkStore) + h.Init(ctx, func(_ error) {}) + h.Link(dataHashFunc) + return h + } + + h := hasher.New(sectionSize, branches, dataStoreFunc) + h.Link(refStoreFunc) + + r, _ := testutil.SerialData(chunkSize*128, 255, 0) + s := NewSplitter(r, h) + ref, err := s.Split() + if err != nil { + t.Fatal(err) + } + time.Sleep(time.Second) + refHex := hexutil.Encode(ref) + correctRefHex := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" + if refHex != correctRefHex { + t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) + } +} From 9dab8dacc97967e240096da73bf3bee16712f85a Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 11:34:06 +0100 Subject: [PATCH 36/67] file: Add encrypt writer stub and cache store as local testutil --- file/encrypt/encrypt.go | 47 ++++++++++++++++++++ file/encrypt/encrypt_test.go | 74 ++++++++++++++++++++++++++++++++ file/testutillocal/cache.go | 51 ++++++++++++++++++++++ file/testutillocal/cache_test.go | 55 ++++++++++++++++++++++++ 4 files changed, 227 insertions(+) create mode 100644 file/encrypt/encrypt.go create mode 100644 file/encrypt/encrypt_test.go create mode 100644 file/testutillocal/cache.go create mode 100644 file/testutillocal/cache_test.go diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go new file mode 100644 index 0000000000..5ddcedf838 --- /dev/null +++ b/file/encrypt/encrypt.go @@ -0,0 +1,47 @@ +package encrypt + +import ( + "context" + + "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/storage/encryption" + "golang.org/x/crypto/sha3" +) + +type Encrypt struct { + e encryption.Encryption + w param.SectionWriter +} + +func New(key []byte, initCtr uint32) *Encrypt { + return &Encrypt{ + e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), + } +} + +func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { +} + +func (e *Encrypt) Link(writerFunc func() param.SectionWriter) { + e.w = writerFunc() +} + +func (e *Encrypt) Write(index int, b []byte) { + e.w.Write(index, b) +} + +func (e *Encrypt) Reset(ctx context.Context) { + e.w.Reset(ctx) +} + +func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { + return e.w.Sum(b, length, span) +} + +func (e *Encrypt) DigestSize() int { + return e.w.DigestSize() + encryption.KeyLength +} + +func (e *Encrypt) SectionSize() int { + return e.w.SectionSize() +} diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go new file mode 100644 index 0000000000..d0395af018 --- /dev/null +++ b/file/encrypt/encrypt_test.go @@ -0,0 +1,74 @@ +package encrypt + +import ( + "context" + crand "crypto/rand" + "testing" + "time" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/hasher" + "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/storage/encryption" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 +) + +func init() { + testutil.Init() +} + +func TestEncryptOneChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + errFunc := func(error) {} + + cacheFunc := func() params.SectionWriter { + } + + key := make([]byte, encryption.KeyLength) + c, err := crand.Read(key) + if err != nil { + t.Fatal(err) + } + if c != encryption.KeyLength { + t.Fatalf("short read %d", c) + } + encryptFunc := func() param.SectionWriter { + eFunc := New(key, uint32(42)) + eFunc.Init(ctx, errFunc) + eFunc.Link(dataHashFunc) + return eFunc + } + + _, data := testutil.SerialData(chunkSize, 255, 0) + h := hasher.New(sectionSize, branches, encryptFunc) + h.Init(ctx, func(error) {}) + h.Link(refHashFunc) + h.Write(0, data) + ref := h.Sum(nil, 0, nil) + + enc := encryption.New(key, 0, 42, sha3.NewLegacyKeccak256) + cipherText, err := enc.Encrypt(data) + if err != nil { + t.Fatal(err) + } + + t.Logf("cipher: %x - ref: %x", cipherText, ref) + +} diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go new file mode 100644 index 0000000000..c237e6300a --- /dev/null +++ b/file/testutillocal/cache.go @@ -0,0 +1,51 @@ +package testutillocal + +import ( + "context" + + "github.com/ethersphere/swarm/param" +) + +type Cache struct { + data map[int][]byte + w param.SectionWriter +} + +func NewCache() *Cache { + return &Cache{ + data: make(map[int][]byte), + } +} + +func (c *Cache) Init(_ context.Context, _ func(error)) { +} + +func (c *Cache) Link(writeFunc func() param.SectionWriter) { + c.w = writeFunc() +} + +func (c *Cache) Write(index int, b []byte) { + c.data[index] = b + if c.w == nil { + return + } + c.w.Write(index, b) +} + +func (c *Cache) Sum(b []byte, length int, span []byte) []byte { + if c.w == nil { + return nil + } + return c.w.Sum(b, length, span) +} + +func (c *Cache) Reset(ctx context.Context) { + if c.w == nil { + return + } + c.w.Reset(ctx) +} + +func (c *Cache) Get(index int) []byte { + return c.data[index] +} diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go new file mode 100644 index 0000000000..e76190c0f2 --- /dev/null +++ b/file/testutillocal/cache_test.go @@ -0,0 +1,55 @@ +package testutillocal + +import ( + "bytes" + "context" + "testing" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/hasher" + "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" +) + +const ( + sectionSize = 32 + branches = 128 + chunkSize = 4096 +) + +func init() { + testutil.Init() +} + +func TestCache(t *testing.T) { + c := NewCache() + c.Init(context.Background(), func(error) {}) + _, data := testutil.SerialData(chunkSize, 255, 0) + c.Write(0, data) + cachedData := c.Get(0) + if !bytes.Equal(cachedData, data) { + t.Fatalf("cache data; expected %x, got %x", data, cachedData) + } +} + +func TestCacheLink(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + + c := NewCache() + c.Init(context.Background(), func(error) {}) + c.Link(dataHashFunc) + _, data := testutil.SerialData(chunkSize, 255, 0) + c.Write(0, data) + span := bmt.LengthToSpan(chunkSize) + ref := c.Sum(nil, chunkSize, span) + refHex := hexutil.Encode(ref) + correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" + if refHex != correctRefHex { + t.Fatalf("cache link; expected %s, got %s", correctRefHex, refHex) + } +} From afc7001d7ba7de23a01f3908d4f262bb9c41eafb Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 11:54:27 +0100 Subject: [PATCH 37/67] file: Test encryption pipeline with one chunk --- file/encrypt/encrypt.go | 13 ++++++++++--- file/encrypt/encrypt_test.go | 19 ++++++++++++++----- file/testutillocal/cache.go | 18 ++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 5ddcedf838..c1bb8b552b 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -9,8 +9,9 @@ import ( ) type Encrypt struct { - e encryption.Encryption - w param.SectionWriter + e encryption.Encryption + w param.SectionWriter + errFunc func(error) } func New(key []byte, initCtr uint32) *Encrypt { @@ -20,6 +21,7 @@ func New(key []byte, initCtr uint32) *Encrypt { } func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { + e.errFunc = errFunc } func (e *Encrypt) Link(writerFunc func() param.SectionWriter) { @@ -27,7 +29,12 @@ func (e *Encrypt) Link(writerFunc func() param.SectionWriter) { } func (e *Encrypt) Write(index int, b []byte) { - e.w.Write(index, b) + cipherText, err := e.e.Encrypt(b) + if err != nil { + e.errFunc(err) + return + } + e.w.Write(index, cipherText) } func (e *Encrypt) Reset(ctx context.Context) { diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index d0395af018..31f4defd26 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -1,6 +1,7 @@ package encrypt import ( + "bytes" "context" crand "crypto/rand" "testing" @@ -8,6 +9,8 @@ import ( "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/file/hasher" + "github.com/ethersphere/swarm/file/testutillocal" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/storage/encryption" "github.com/ethersphere/swarm/testutil" @@ -38,7 +41,11 @@ func TestEncryptOneChunk(t *testing.T) { defer cancel() errFunc := func(error) {} - cacheFunc := func() params.SectionWriter { + cache := testutillocal.NewCache() + cache.Init(ctx, errFunc) + cache.Link(dataHashFunc) + cacheFunc := func() param.SectionWriter { + return cache } key := make([]byte, encryption.KeyLength) @@ -52,7 +59,7 @@ func TestEncryptOneChunk(t *testing.T) { encryptFunc := func() param.SectionWriter { eFunc := New(key, uint32(42)) eFunc.Init(ctx, errFunc) - eFunc.Link(dataHashFunc) + eFunc.Link(cacheFunc) return eFunc } @@ -68,7 +75,9 @@ func TestEncryptOneChunk(t *testing.T) { if err != nil { t.Fatal(err) } - - t.Logf("cipher: %x - ref: %x", cipherText, ref) - + cacheText := cache.Get(0) + if !bytes.Equal(cipherText, cacheText) { + log.Trace("data mismatch", "expect", cipherText, "got", cacheText) + t.Fatalf("encrypt onechunk; data mismatch") + } } diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index c237e6300a..963a10179d 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -6,6 +6,10 @@ import ( "github.com/ethersphere/swarm/param" ) +var ( + defaultSectionSize = 32 +) + type Cache struct { data map[int][]byte w param.SectionWriter @@ -46,6 +50,20 @@ func (c *Cache) Reset(ctx context.Context) { c.w.Reset(ctx) } +func (c *Cache) SectionSize() int { + if c.w != nil { + return c.w.SectionSize() + } + return defaultSectionSize +} + +func (c *Cache) DigestSize() int { + if c.w != nil { + return c.w.DigestSize() + } + return defaultSectionSize +} + func (c *Cache) Get(index int) []byte { return c.data[index] } From 3953663d3e55579df3baaa1cf37b48093fae1380 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 13:20:59 +0100 Subject: [PATCH 38/67] file: Add key derivation to filehasher encrypt --- file/encrypt/encrypt.go | 34 ++++++++++++-- file/encrypt/encrypt_test.go | 90 +++++++++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 6 deletions(-) diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index c1bb8b552b..c5b68ffc40 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -2,6 +2,9 @@ package encrypt import ( "context" + crand "crypto/rand" + "fmt" + "hash" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/storage/encryption" @@ -9,15 +12,32 @@ import ( ) type Encrypt struct { + key []byte e encryption.Encryption w param.SectionWriter + keyHash hash.Hash errFunc func(error) } -func New(key []byte, initCtr uint32) *Encrypt { - return &Encrypt{ - e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), +func New(key []byte, initCtr uint32) (*Encrypt, error) { + e := &Encrypt{ + e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), + key: make([]byte, encryption.KeyLength), + keyHash: param.HashFunc(), } + if key == nil { + e.key = make([]byte, encryption.KeyLength) + c, err := crand.Read(e.key) + if err != nil { + return nil, err + } + if c < encryption.KeyLength { + return nil, fmt.Errorf("short read: %d", c) + } + } else { + copy(e.key, key) + } + return e, nil } func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { @@ -42,7 +62,13 @@ func (e *Encrypt) Reset(ctx context.Context) { } func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { - return e.w.Sum(b, length, span) + oldKey := make([]byte, 32) + copy(oldKey, e.key) + e.keyHash.Reset() + e.keyHash.Write(e.key) + copy(e.key, e.keyHash.Sum(nil)) + s := e.w.Sum(b, length, span) + return append(oldKey, s...) } func (e *Encrypt) DigestSize() int { diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 31f4defd26..59bfa2b39b 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/file/hasher" "github.com/ethersphere/swarm/file/testutillocal" @@ -27,6 +28,51 @@ func init() { testutil.Init() } +func TestKey(t *testing.T) { + + e, err := New(nil, 42) + if err != nil { + t.Fatal(err) + } + if e.key == nil { + t.Fatalf("new key nil; expected not nil") + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + errFunc := func(error) {} + cache := testutillocal.NewCache() + cache.Init(ctx, errFunc) + cacheFunc := func() param.SectionWriter { + return cache + } + key := [32]byte{} + key[0] = 0x2a + e, err = New(key[:], 42) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(key[:], e.key) { + t.Fatalf("key seed; expected %x, got %x", key, e.key) + } + + _, data := testutil.SerialData(chunkSize, 255, 0) + e.Link(cacheFunc) + e.Write(0, data) + span := bmt.LengthToSpan(chunkSize) + doubleRef := e.Sum(nil, chunkSize, span) + refKey := doubleRef[:encryption.KeyLength] + if !bytes.Equal(refKey, key[:]) { + t.Fatalf("returned ref key, expected %x, got %x", key, refKey) + } + + correctNextKeyHex := "0xd83b8137defe4bdaf5e1243b3175dc49b0a19c9d1f68044b7bf261db9f006233" + nextKeyHex := hexutil.Encode(e.key) + if nextKeyHex != correctNextKeyHex { + t.Fatalf("key next; expected %s, got %s", correctNextKeyHex, nextKeyHex) + } +} + func TestEncryptOneChunk(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) @@ -57,7 +103,10 @@ func TestEncryptOneChunk(t *testing.T) { t.Fatalf("short read %d", c) } encryptFunc := func() param.SectionWriter { - eFunc := New(key, uint32(42)) + eFunc, err := New(key, uint32(42)) + if err != nil { + t.Fatal(err) + } eFunc.Init(ctx, errFunc) eFunc.Link(cacheFunc) return eFunc @@ -68,7 +117,7 @@ func TestEncryptOneChunk(t *testing.T) { h.Init(ctx, func(error) {}) h.Link(refHashFunc) h.Write(0, data) - ref := h.Sum(nil, 0, nil) + h.Sum(nil, 0, nil) enc := encryption.New(key, 0, 42, sha3.NewLegacyKeccak256) cipherText, err := enc.Encrypt(data) @@ -81,3 +130,40 @@ func TestEncryptOneChunk(t *testing.T) { t.Fatalf("encrypt onechunk; data mismatch") } } + +//func TestEncryptIntermediateChunk(t *testing.T) { +// poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) +// poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) +// refHashFunc := func() param.SectionWriter { +// return bmt.New(poolAsync).NewAsyncWriter(false) +// } +// dataHashFunc := func() param.SectionWriter { +// return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) +// } +// +// ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) +// defer cancel() +// errFunc := func(error) {} +// +// cache := testutillocal.NewCache() +// cache.Init(ctx, errFunc) +// cache.Link(dataHashFunc) +// cacheFunc := func() param.SectionWriter { +// return cache +// } +// +// key := make([]byte, encryption.KeyLength) +// c, err := crand.Read(key) +// if err != nil { +// t.Fatal(err) +// } +// if c != encryption.KeyLength { +// t.Fatalf("short read %d", c) +// } +// encryptFunc := func() param.SectionWriter { +// eFunc := New(key, uint32(42)) +// eFunc.Init(ctx, errFunc) +// eFunc.Link(cacheFunc) +// return eFunc +// } +//} From ef446f4146ef60a6874cae770f04e7dde73fe07b Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 15:57:16 +0100 Subject: [PATCH 39/67] file, param: Add test to verify buffer-neutral encryption --- file/encrypt/encrypt_test.go | 102 +++++++++++++++++++++++++------ file/testutillocal/cache.go | 4 ++ file/testutillocal/cache_test.go | 5 ++ param/hash.go | 7 +++ 4 files changed, 98 insertions(+), 20 deletions(-) create mode 100644 param/hash.go diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 59bfa2b39b..192c8b08a3 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -3,7 +3,6 @@ package encrypt import ( "bytes" "context" - crand "crypto/rand" "testing" "time" @@ -24,6 +23,10 @@ const ( chunkSize = 4096 ) +var ( + testKey = append(make([]byte, encryption.KeyLength-1), byte(0x2a)) +) + func init() { testutil.Init() } @@ -46,14 +49,12 @@ func TestKey(t *testing.T) { cacheFunc := func() param.SectionWriter { return cache } - key := [32]byte{} - key[0] = 0x2a - e, err = New(key[:], 42) + e, err = New(testKey, 42) if err != nil { t.Fatal(err) } - if !bytes.Equal(key[:], e.key) { - t.Fatalf("key seed; expected %x, got %x", key, e.key) + if !bytes.Equal(testKey, e.key) { + t.Fatalf("key seed; expected %x, got %x", testKey, e.key) } _, data := testutil.SerialData(chunkSize, 255, 0) @@ -62,11 +63,11 @@ func TestKey(t *testing.T) { span := bmt.LengthToSpan(chunkSize) doubleRef := e.Sum(nil, chunkSize, span) refKey := doubleRef[:encryption.KeyLength] - if !bytes.Equal(refKey, key[:]) { - t.Fatalf("returned ref key, expected %x, got %x", key, refKey) + if !bytes.Equal(refKey, testKey) { + t.Fatalf("returned ref key, expected %x, got %x", testKey, refKey) } - correctNextKeyHex := "0xd83b8137defe4bdaf5e1243b3175dc49b0a19c9d1f68044b7bf261db9f006233" + correctNextKeyHex := "0xbeced09521047d05b8960b7e7bcc1d1292cf3e4b2a6b63f48335cbde5f7545d2" nextKeyHex := hexutil.Encode(e.key) if nextKeyHex != correctNextKeyHex { t.Fatalf("key next; expected %s, got %s", correctNextKeyHex, nextKeyHex) @@ -94,16 +95,8 @@ func TestEncryptOneChunk(t *testing.T) { return cache } - key := make([]byte, encryption.KeyLength) - c, err := crand.Read(key) - if err != nil { - t.Fatal(err) - } - if c != encryption.KeyLength { - t.Fatalf("short read %d", c) - } encryptFunc := func() param.SectionWriter { - eFunc, err := New(key, uint32(42)) + eFunc, err := New(testKey, uint32(42)) if err != nil { t.Fatal(err) } @@ -117,9 +110,9 @@ func TestEncryptOneChunk(t *testing.T) { h.Init(ctx, func(error) {}) h.Link(refHashFunc) h.Write(0, data) - h.Sum(nil, 0, nil) + doubleRef := h.Sum(nil, 0, nil) - enc := encryption.New(key, 0, 42, sha3.NewLegacyKeccak256) + enc := encryption.New(testKey, 0, 42, sha3.NewLegacyKeccak256) cipherText, err := enc.Encrypt(data) if err != nil { t.Fatal(err) @@ -129,6 +122,75 @@ func TestEncryptOneChunk(t *testing.T) { log.Trace("data mismatch", "expect", cipherText, "got", cacheText) t.Fatalf("encrypt onechunk; data mismatch") } + + hc := bmt.New(poolSync) + span := bmt.LengthToSpan(len(cipherText)) + hc.ResetWithLength(span) + hc.Write(cipherText) + cipherRef := hc.Sum(nil) + dataRef := doubleRef[encryption.KeyLength:] + if !bytes.Equal(dataRef, cipherRef) { + t.Fatalf("encrypt ref; expected %x, got %x", cipherRef, dataRef) + } +} + +func TestEncryptChunkWholeAndSections(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + errFunc := func(error) {} + + cache := testutillocal.NewCache() + cache.Init(ctx, errFunc) + cache.Link(dataHashFunc) + cacheFunc := func() param.SectionWriter { + return cache + } + + e, err := New(testKey, uint32(42)) + if err != nil { + t.Fatal(err) + } + e.Init(ctx, errFunc) + e.Link(cacheFunc) + + _, data := testutil.SerialData(chunkSize, 255, 0) + e.Write(0, data) + span := bmt.LengthToSpan(chunkSize) + e.Sum(nil, chunkSize, span) + + cacheCopy := make([]byte, chunkSize) + copy(cacheCopy, cache.Get(0)) + cache.Delete(0) + + cache.Link(refHashFunc) + e, err = New(testKey, uint32(42)) + if err != nil { + t.Fatal(err) + } + e.Init(ctx, errFunc) + e.Link(cacheFunc) + + for i := 0; i < chunkSize; i += sectionSize { + e.Write(i/sectionSize, data[i:i+sectionSize]) + } + e.Sum(nil, chunkSize, span) + + for i := 0; i < chunkSize; i += sectionSize { + chunked := cacheCopy[i : i+sectionSize] + sectioned := cache.Get(i / sectionSize) + if !bytes.Equal(chunked, sectioned) { + t.Fatalf("encrypt chunk full and section idx %d; expected %x, got %x", i/sectionSize, chunked, sectioned) + } + } } //func TestEncryptIntermediateChunk(t *testing.T) { diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index 963a10179d..bece850a7b 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -67,3 +67,7 @@ func (c *Cache) DigestSize() int { func (c *Cache) Get(index int) []byte { return c.data[index] } + +func (c *Cache) Delete(index int) { + delete(c.data, index) +} diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go index e76190c0f2..043c459eeb 100644 --- a/file/testutillocal/cache_test.go +++ b/file/testutillocal/cache_test.go @@ -52,4 +52,9 @@ func TestCacheLink(t *testing.T) { if refHex != correctRefHex { t.Fatalf("cache link; expected %s, got %s", correctRefHex, refHex) } + + c.Delete(0) + if _, ok := c.data[0]; ok { + t.Fatalf("delete; expected not found") + } } diff --git a/param/hash.go b/param/hash.go new file mode 100644 index 0000000000..114fa0ad6c --- /dev/null +++ b/param/hash.go @@ -0,0 +1,7 @@ +package param + +import "golang.org/x/crypto/sha3" + +var ( + HashFunc = sha3.NewLegacyKeccak256 +) From d0c7833fe8fe14a357bdd7ed631056f364a9606d Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 16:48:54 +0100 Subject: [PATCH 40/67] file: Add first part of intermediate chunk test missing correctness check --- file/encrypt/encrypt.go | 26 ++++++---- file/encrypt/encrypt_test.go | 96 ++++++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 46 deletions(-) diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index c5b68ffc40..cb6063ff43 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -6,6 +6,7 @@ import ( "fmt" "hash" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/storage/encryption" "golang.org/x/crypto/sha3" @@ -20,23 +21,24 @@ type Encrypt struct { } func New(key []byte, initCtr uint32) (*Encrypt, error) { - e := &Encrypt{ - e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), - key: make([]byte, encryption.KeyLength), - keyHash: param.HashFunc(), - } if key == nil { - e.key = make([]byte, encryption.KeyLength) - c, err := crand.Read(e.key) + key = make([]byte, encryption.KeyLength) + c, err := crand.Read(key) if err != nil { return nil, err } if c < encryption.KeyLength { return nil, fmt.Errorf("short read: %d", c) } - } else { - copy(e.key, key) + } else if len(key) != encryption.KeyLength { + return nil, fmt.Errorf("encryption key must be %d bytes", encryption.KeyLength) + } + e := &Encrypt{ + e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), + key: make([]byte, encryption.KeyLength), + keyHash: param.HashFunc(), } + copy(e.key, key) return e, nil } @@ -58,16 +60,20 @@ func (e *Encrypt) Write(index int, b []byte) { } func (e *Encrypt) Reset(ctx context.Context) { + //e.e.Reset() uncomment when change is made to storage/encryption interface e.w.Reset(ctx) } func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { + // derive new key oldKey := make([]byte, 32) copy(oldKey, e.key) e.keyHash.Reset() e.keyHash.Write(e.key) - copy(e.key, e.keyHash.Sum(nil)) + newKey := e.keyHash.Sum(nil) + copy(e.key, newKey) s := e.w.Sum(b, length, span) + log.Trace("key", "key", oldKey, "ekey", e.key, "newkey", newKey) return append(oldKey, s...) } diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 192c8b08a3..ffd811ce94 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -193,39 +193,63 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { } } -//func TestEncryptIntermediateChunk(t *testing.T) { -// poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) -// poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) -// refHashFunc := func() param.SectionWriter { -// return bmt.New(poolAsync).NewAsyncWriter(false) -// } -// dataHashFunc := func() param.SectionWriter { -// return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) -// } -// -// ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) -// defer cancel() -// errFunc := func(error) {} -// -// cache := testutillocal.NewCache() -// cache.Init(ctx, errFunc) -// cache.Link(dataHashFunc) -// cacheFunc := func() param.SectionWriter { -// return cache -// } -// -// key := make([]byte, encryption.KeyLength) -// c, err := crand.Read(key) -// if err != nil { -// t.Fatal(err) -// } -// if c != encryption.KeyLength { -// t.Fatalf("short read %d", c) -// } -// encryptFunc := func() param.SectionWriter { -// eFunc := New(key, uint32(42)) -// eFunc.Init(ctx, errFunc) -// eFunc.Link(cacheFunc) -// return eFunc -// } -//} +func TestEncryptIntermediateChunk(t *testing.T) { + poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + dataHashFunc := func() param.SectionWriter { + return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + defer cancel() + errFunc := func(err error) { + log.Error("filehasher pipeline error", "err", err) + cancel() + } + + cache := testutillocal.NewCache() + cache.Init(ctx, errFunc) + cache.Link(refHashFunc) + cacheFunc := func() param.SectionWriter { + return cache + } + + encryptRefFunc := func() param.SectionWriter { + eFunc, err := New(testKey, uint32(42)) + if err != nil { + t.Fatal(err) + } + eFunc.Init(ctx, errFunc) + eFunc.Link(cacheFunc) + return eFunc + } + + encryptDataFunc := func() param.SectionWriter { + eFunc, err := New(nil, uint32(42)) + if err != nil { + t.Fatal(err) + } + eFunc.Init(ctx, errFunc) + eFunc.Link(dataHashFunc) + return eFunc + } + + h := hasher.New(sectionSize, branches, encryptDataFunc) + h.Link(encryptRefFunc) + + _, data := testutil.SerialData(chunkSize*branches, 255, 0) + for i := 0; i < chunkSize*branches; i += chunkSize { + h.Write(i/chunkSize, data[i:i+chunkSize]) + } + span := bmt.LengthToSpan(chunkSize * branches) + ref := h.Sum(nil, chunkSize*branches, span) + select { + case <-ctx.Done(): + t.Fatalf("ctx done: %v", ctx.Err()) + default: + } + t.Logf("%x", ref) +} From fef9be36094b59e58efcf3eb167a21aa5d0a1c00 Mon Sep 17 00:00:00 2001 From: nolash Date: Wed, 4 Dec 2019 16:50:14 +0100 Subject: [PATCH 41/67] file: Add Encryption.Reset to file/encrypt Reset --- file/encrypt/encrypt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index cb6063ff43..633f4ac847 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -60,7 +60,7 @@ func (e *Encrypt) Write(index int, b []byte) { } func (e *Encrypt) Reset(ctx context.Context) { - //e.e.Reset() uncomment when change is made to storage/encryption interface + e.e.Reset() e.w.Reset(ctx) } From 77d25a366e22892a8a970bccd64da71a9d764dc0 Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 12:33:13 +0100 Subject: [PATCH 42/67] file: Add multi-section write capability to job --- file/hasher/job.go | 17 ++++++++++++++--- file/hasher/job_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/file/hasher/job.go b/file/hasher/job.go index 245f73ada8..4963c16ee1 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -143,18 +143,29 @@ OUTER: select { // enter here if new data is written to the job + // TODO: Error if calculated write count exceed chunk case entry := <-jb.writeC: + + // split the contents to fit the underlying SectionWriter + entrySections := len(entry.data) / jb.writer.SectionSize() jb.mu.Lock() endCount := int(jb.endCount) - processCount++ + processCount += entrySections jb.mu.Unlock() if entry.index == 0 { jb.firstSectionData = entry.data } - log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + log.Trace("job entry", "datasection", jb.dataSection, "num sections", entrySections, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // TODO: this write is superfluous when the received data is the root hash - jb.writer.Write(entry.index, entry.data) + var offset int + for i := 0; i < entrySections; i++ { + idx := entry.index + i + data := entry.data[offset : offset+sectionSize] + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index+i, "data", hexutil.Encode(data)) + jb.writer.Write(idx, data) + offset += sectionSize + } // since newcount is incremented above it can only equal endcount if this has been set in the case below, // which means data write has been completed diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 2a6e3cbe1e..b628f77981 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -561,6 +561,39 @@ func TestJobWriteSpanShuffle(t *testing.T) { } } +func TestJobWriteDoubleSection(t *testing.T) { + //poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + //dataHash := bmt.New(poolSync) + writeSize := sectionSize * 2 + dummyHashLongSectionFunc := func() param.SectionWriter { + return newDummySectionWriter(chunkSize*branches, sectionSize) + } + params := newTreeParams(sectionSize, branches, dummyHashLongSectionFunc) + + tgt := newTarget() + jb := newJob(params, tgt, nil, 1, 0) + jb.start() + _, data := testutil.SerialData(chunkSize, 255, 0) + + for i := 0; i < chunkSize; i += writeSize { + jb.write(i/writeSize, data[i:i+writeSize]) + } + tgt.Set(chunkSize, branches, 2) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + select { + case refLong := <-tgt.Done(): + refLongHex := hexutil.Encode(refLong) + correctRefLongHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + if refLongHex != correctRefLongHex { + t.Fatalf("section long: expected %s, got %s", correctRefLongHex, refLongHex) + } + case <-ctx.Done(): + t.Fatalf("timeout: %v", ctx.Err()) + } + +} + // TestVectors executes the barebones functionality of the hasher // and verifies against source of truth results generated from the reference hasher // for the same data From 35b91aea5ee73cde21caed0165e9dec2e3c04a0c Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 14:48:05 +0100 Subject: [PATCH 43/67] file, bmt, param: Add dynamic branch/section write and tests encrypt and split now fails, need revisit endcount calculation --- bmt/bmt.go | 35 +++++++--- file/encrypt/encrypt.go | 10 ++- file/hasher/common_test.go | 2 +- file/hasher/hasher.go | 11 +++ file/hasher/job.go | 11 +-- file/hasher/job_test.go | 134 ++++++++++++++++++++++++++---------- file/store/store.go | 5 ++ file/testutillocal/cache.go | 8 +++ param/io.go | 1 + 9 files changed, 164 insertions(+), 53 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 4b828b76e6..5235322f74 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -296,6 +296,10 @@ func (h *Hasher) ChunkSize() int { return h.pool.Size } +func (h *Hasher) Count() int { + return h.pool.SegmentCount +} + // Sum returns the BMT root hash of the buffer // using Sum presupposes sequential synchronous writes (io.Writer interface) // hash.Hash interface Sum method appends the byte slice to the underlying @@ -403,19 +407,25 @@ func (h *Hasher) releaseTree() { } // NewAsyncWriter extends Hasher with an interface for concurrent segment/section writes +// TODO: Instead of explicitly setting double size of segment should be dynamic and chunked internally. If not, we have to keep different bmt hashers generation functions for different purposes in the same instance, or cope with added complexity of bmt hasher generation functions having to receive parameters func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { secsize := h.pool.SegmentSize if double { secsize *= 2 } + seccount := h.pool.SegmentCount + if double { + seccount /= 2 + } write := func(i int, section []byte, final bool) { h.writeSection(i, section, double, final) } return &AsyncHasher{ - Hasher: h, - double: double, - secsize: secsize, - write: write, + Hasher: h, + double: double, + secsize: secsize, + seccount: seccount, + write: write, } } @@ -434,11 +444,12 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { // * it will not leak processes if not all sections are written but it blocks // and keeps the resource which can be released calling Reset() type AsyncHasher struct { - *Hasher // extends the Hasher - mtx sync.Mutex // to lock the cursor access - double bool // whether to use double segments (call Hasher.writeSection) - secsize int // size of base section (size of hash or double) - write func(i int, section []byte, final bool) + *Hasher // extends the Hasher + mtx sync.Mutex // to lock the cursor access + double bool // whether to use double segments (call Hasher.writeSection) + secsize int // size of base section (size of hash or double) + seccount int // base section count + write func(i int, section []byte, final bool) } // Implements param.SectionWriter @@ -466,6 +477,12 @@ func (sw *AsyncHasher) DigestSize() int { return sw.secsize } +// DigestSize returns the branching factor, which is equivalent to the size of the BMT input +// Implements param.SectionWriter +func (sw *AsyncHasher) Branches() int { + return sw.seccount +} + // Write writes the i-th section of the BMT base // this function can and is meant to be called concurrently // it sets max segment threadsafely diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 633f4ac847..324d09afc9 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -66,7 +66,7 @@ func (e *Encrypt) Reset(ctx context.Context) { func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { // derive new key - oldKey := make([]byte, 32) + oldKey := make([]byte, encryption.KeyLength) copy(oldKey, e.key) e.keyHash.Reset() e.keyHash.Write(e.key) @@ -77,10 +77,18 @@ func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { return append(oldKey, s...) } +// DigestSize implements param.SectionWriter +// TODO: cache these calculations func (e *Encrypt) DigestSize() int { return e.w.DigestSize() + encryption.KeyLength } +// SectionSize implements param.SectionWriter func (e *Encrypt) SectionSize() int { return e.w.SectionSize() } + +// Branches implements param.SectionWriter +func (e *Encrypt) Branches() int { + return e.w.Branches() / (e.DigestSize() / e.w.SectionSize()) +} diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index bad3556420..f7eac6219c 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -58,7 +58,7 @@ var ( } start = 0 - end = len(dataLengths) + end = 14 //len(dataLengths) ) func init() { diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index dfee3240dd..7649e6d568 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -56,6 +56,11 @@ func (b *BMTSyncSectionWriter) DigestSize() int { return b.hasher.Size() } +// Branches implements param.SectionWriter +func (b *BMTSyncSectionWriter) Branches() int { + return b.hasher.Count() +} + // Hasher is a bmt.SectionWriter that executes the file hashing algorithm on arbitary data type Hasher struct { target *target @@ -72,6 +77,7 @@ type Hasher struct { // New creates a new Hasher object using the given sectionSize and branch factor // hasherFunc is used to create *bmt.Hashers to hash the incoming data // writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface +// TODO: sectionSize and branches should be inferred from underlying writer, not shared across job and hasher func New(sectionSize int, branches int, hasherFunc func() param.SectionWriter) *Hasher { h := &Hasher{ target: newTarget(), @@ -147,6 +153,11 @@ func (h *Hasher) DigestSize() int { return h.params.SectionSize } +// DigestSize implements param.SectionWriter +func (h *Hasher) Branches() int { + return h.params.Branches +} + // proxy for sync.Pool func (h *Hasher) putHasher(w param.SectionWriter) { h.hasherPool.Put(w) diff --git a/file/hasher/job.go b/file/hasher/job.go index 4963c16ee1..ad5f09ca78 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -150,21 +150,22 @@ OUTER: entrySections := len(entry.data) / jb.writer.SectionSize() jb.mu.Lock() endCount := int(jb.endCount) + oldProcessCount := processCount processCount += entrySections jb.mu.Unlock() if entry.index == 0 { jb.firstSectionData = entry.data } - log.Trace("job entry", "datasection", jb.dataSection, "num sections", entrySections, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) + log.Trace("job entry", "datasection", jb.dataSection, "num sections", entrySections, "level", jb.level, "processCount", oldProcessCount, "endcount", endCount, "index", entry.index, "data", hexutil.Encode(entry.data)) // TODO: this write is superfluous when the received data is the root hash var offset int for i := 0; i < entrySections; i++ { idx := entry.index + i - data := entry.data[offset : offset+sectionSize] - log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", processCount, "endcount", endCount, "index", entry.index+i, "data", hexutil.Encode(data)) + data := entry.data[offset : offset+jb.writer.SectionSize()] + log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", oldProcessCount+i, "endcount", endCount, "index", entry.index+i, "data", hexutil.Encode(data)) jb.writer.Write(idx, data) - offset += sectionSize + offset += jb.writer.SectionSize() } // since newcount is incremented above it can only equal endcount if this has been set in the case below, @@ -174,7 +175,7 @@ OUTER: log.Trace("quitting writec - endcount", "c", processCount, "level", jb.level) break OUTER } - if processCount == jb.params.Branches { + if processCount == jb.writer.Branches() { log.Trace("quitting writec - branches") break OUTER } diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index b628f77981..069ddbd08c 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -20,9 +20,13 @@ import ( "golang.org/x/crypto/sha3" ) +const ( + zeroHex = "0000000000000000000000000000000000000000000000000000000000000000" +) + var ( dummyHashFunc = func() param.SectionWriter { - return newDummySectionWriter(chunkSize*branches, sectionSize) + return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) } // placeholder for cases where a hasher is not necessary noHashFunc = func() param.SectionWriter { @@ -34,17 +38,26 @@ var ( // for later inspection // TODO: see if this can be replaced with the fake hasher from storage module type dummySectionWriter struct { - data []byte sectionSize int + digestSize int + branches int + data []byte + digest []byte + size int + summed bool writer hash.Hash mu sync.Mutex + wg sync.WaitGroup } -func newDummySectionWriter(cp int, sectionSize int) *dummySectionWriter { +func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter { return &dummySectionWriter{ - data: make([]byte, cp), sectionSize: sectionSize, + digestSize: digestSize, + branches: branches, + data: make([]byte, cp), writer: sha3.NewLegacyKeccak256(), + digest: make([]byte, digestSize), } } @@ -55,18 +68,45 @@ func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { } // implements param.SectionWriter -// BUG: not actually writing to hasher func (d *dummySectionWriter) Write(index int, data []byte) { d.mu.Lock() - defer d.mu.Unlock() - copy(d.data[index*sectionSize:], data) + copy(d.data[index*d.sectionSize:], data) + d.size += len(data) + log.Trace("dummywriter", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) + if d.isFull() { + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } } // implements param.SectionWriter -func (d *dummySectionWriter) Sum(b []byte, size int, span []byte) []byte { +func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { + log.Trace("dummy Sumcall", "size", size) + d.mu.Lock() + if !d.summed { + d.size = size + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } + return d.digest +} + +func (d *dummySectionWriter) sum() { d.mu.Lock() defer d.mu.Unlock() - return d.writer.Sum(b) + for i := 0; i < d.size; i += d.writer.Size() { + sectionData := d.data[i : i+d.writer.Size()] + log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) + d.writer.Write(sectionData) + } + copy(d.digest, d.writer.Sum(nil)) + log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest)) } // implements param.SectionWriter @@ -74,6 +114,9 @@ func (d *dummySectionWriter) Reset(_ context.Context) { d.mu.Lock() defer d.mu.Unlock() d.data = make([]byte, len(d.data)) + d.digest = make([]byte, d.digestSize) + d.size = 0 + d.summed = false d.writer.Reset() } @@ -87,31 +130,48 @@ func (d *dummySectionWriter) DigestSize() int { return d.sectionSize } +// implements param.SectionWriter +func (d *dummySectionWriter) Branches() int { + return d.branches +} + +func (d *dummySectionWriter) isFull() bool { + return d.size == d.sectionSize*d.branches +} + // TestDummySectionWriter func TestDummySectionWriter(t *testing.T) { - w := newDummySectionWriter(chunkSize*2, sectionSize) + w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) w.Reset(context.Background()) - data := make([]byte, 32) - rand.Seed(23115) - c, err := rand.Read(data) - if err != nil { - t.Fatal(err) + _, data := testutil.SerialData(sectionSize*2, 255, 0) + + w.Write(branches, data[:sectionSize]) + w.Write(branches+1, data[sectionSize:]) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } - if c < 32 { - t.Fatalf("short read %d", c) + + correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" + digest := w.Sum(nil, chunkSize*2, nil) + digestHex := hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) } - w.Write(branches, data) - if !bytes.Equal(w.data[chunkSize:chunkSize+32], data) { - t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+32], data) + w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) + w.Reset(context.Background()) + w.Write(branches/2, data) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } - correctDigest := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" - digest := w.Sum(nil, chunkSize*2, nil) - if hexutil.Encode(digest) != correctDigest { - t.Fatalf("Digest: expected %s, got %x", correctDigest, digest) + correctDigestHex += zeroHex + digest = w.Sum(nil, chunkSize*2, nil) + digestHex = hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) } } @@ -310,23 +370,23 @@ func TestGetJobNext(t *testing.T) { func TestJobWriteTwoAndFinish(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize*2, branches, dummyHashFunc) + params := newTreeParams(sectionSize, branches, dummyHashFunc) jb := newJob(params, tgt, nil, 1, 0) jb.start() _, data := testutil.SerialData(sectionSize*2, 255, 0) jb.write(0, data[:sectionSize]) - jb.write(1, data[:sectionSize]) + jb.write(1, data[sectionSize:]) finalSize := chunkSize * 2 finalSection := dataSizeToSectionIndex(finalSize, sectionSize) - tgt.Set(finalSize, finalSection, 2) + tgt.Set(finalSize, finalSection-1, 2) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*199) defer cancel() select { case ref := <-tgt.Done(): - correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + correctRefHex := "0x002030bde3d4cf89919649775cd71875c4d0ab1708a380e03fefc3a28aa24831" refHex := hexutil.Encode(ref) if refHex != correctRefHex { t.Fatalf("job write full: expected %s, got %s", correctRefHex, refHex) @@ -399,7 +459,7 @@ func TestWriteParentSection(t *testing.T) { if jbnp.count() != 1 { t.Fatalf("parent count: expected %d, got %d", 1, jbnp.count()) } - correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + correctRefHex := "0x002030bde3d4cf89919649775cd71875c4d0ab1708a380e03fefc3a28aa24831" // extract data in section 2 from the writer // TODO: overload writer to provide a get method to extract data to improve clarity @@ -423,16 +483,16 @@ func TestJobWriteFull(t *testing.T) { jb := newJob(params, tgt, nil, 1, 0) jb.start() _, data := testutil.SerialData(chunkSize, 255, 0) - for i := 0; i < branches; i++ { - jb.write(i, data[i*sectionSize:i*sectionSize+sectionSize]) + for i := 0; i < chunkSize; i += sectionSize { + jb.write(i/sectionSize, data[i:i+sectionSize]) } tgt.Set(chunkSize, branches, 2) - correctRefHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() select { case ref := <-tgt.Done(): + correctRefHex := "0x8ace4673563b86281778b943aa60481fc4ede9f238dd98f1b3a5df4cb54ee79b" refHex := hexutil.Encode(ref) if refHex != correctRefHex { t.Fatalf("job write full: expected %s, got %s", correctRefHex, refHex) @@ -566,7 +626,7 @@ func TestJobWriteDoubleSection(t *testing.T) { //dataHash := bmt.New(poolSync) writeSize := sectionSize * 2 dummyHashLongSectionFunc := func() param.SectionWriter { - return newDummySectionWriter(chunkSize*branches, sectionSize) + return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } params := newTreeParams(sectionSize, branches, dummyHashLongSectionFunc) @@ -578,13 +638,13 @@ func TestJobWriteDoubleSection(t *testing.T) { for i := 0; i < chunkSize; i += writeSize { jb.write(i/writeSize, data[i:i+writeSize]) } - tgt.Set(chunkSize, branches, 2) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + tgt.Set(chunkSize, branches/2-1, 2) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*10) defer cancel() select { case refLong := <-tgt.Done(): refLongHex := hexutil.Encode(refLong) - correctRefLongHex := "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470" + correctRefLongHex := "0x8ace4673563b86281778b943aa60481fc4ede9f238dd98f1b3a5df4cb54ee79b" + zeroHex if refLongHex != correctRefLongHex { t.Fatalf("section long: expected %s, got %s", correctRefLongHex, refLongHex) } diff --git a/file/store/store.go b/file/store/store.go index a944abcf22..0612737641 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -78,3 +78,8 @@ func (f *FileStore) SectionSize() int { func (f *FileStore) DigestSize() int { return f.w.DigestSize() } + +// Branches implements param.SectionWriter +func (f *FileStore) Branches() int { + return f.w.Branches() +} diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index bece850a7b..78c4829271 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -8,6 +8,7 @@ import ( var ( defaultSectionSize = 32 + defaultBranches = 128 ) type Cache struct { @@ -64,6 +65,13 @@ func (c *Cache) DigestSize() int { return defaultSectionSize } +func (c *Cache) Branches() int { + if c.w != nil { + return c.w.Branches() + } + return defaultBranches +} + func (c *Cache) Get(index int) []byte { return c.data[index] } diff --git a/param/io.go b/param/io.go index 0b373defc0..485e5c0f58 100644 --- a/param/io.go +++ b/param/io.go @@ -13,4 +13,5 @@ type SectionWriter interface { Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer SectionSize() int // size of the async section unit to use DigestSize() int + Branches() int } From 3132569470ecb8c5c317bd6bae8b7d14223ce12b Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 15:57:06 +0100 Subject: [PATCH 44/67] file: Add test for endcount calculation with differing sectionsize/branches Test fails; implementation not yet updated --- file/hasher/common_test.go | 162 +++++++++++++++++++++++++++++++ file/hasher/job_test.go | 191 +++++++------------------------------ 2 files changed, 194 insertions(+), 159 deletions(-) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index f7eac6219c..62080fd6f2 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -1,7 +1,17 @@ package hasher import ( + "bytes" + "context" + "hash" + "sync" + "testing" + + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" + "golang.org/x/crypto/sha3" ) const ( @@ -61,6 +71,158 @@ var ( end = 14 //len(dataLengths) ) +var ( + dummyHashFunc = func() param.SectionWriter { + return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) + } + + // placeholder for cases where a hasher is not necessary + noHashFunc = func() param.SectionWriter { + return nil + } +) + func init() { testutil.Init() } + +// simple param.SectionWriter hasher that keeps the data written to it +// for later inspection +// TODO: see if this can be replaced with the fake hasher from storage module +type dummySectionWriter struct { + sectionSize int + digestSize int + branches int + data []byte + digest []byte + size int + summed bool + writer hash.Hash + mu sync.Mutex + wg sync.WaitGroup +} + +func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter { + return &dummySectionWriter{ + sectionSize: sectionSize, + digestSize: digestSize, + branches: branches, + data: make([]byte, cp), + writer: sha3.NewLegacyKeccak256(), + digest: make([]byte, digestSize), + } +} + +func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { +} + +func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Write(index int, data []byte) { + d.mu.Lock() + copy(d.data[index*d.sectionSize:], data) + d.size += len(data) + log.Trace("dummywriter", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) + if d.isFull() { + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { + log.Trace("dummy Sumcall", "size", size) + d.mu.Lock() + if !d.summed { + d.size = size + d.summed = true + d.mu.Unlock() + d.sum() + } else { + d.mu.Unlock() + } + return d.digest +} + +func (d *dummySectionWriter) sum() { + d.mu.Lock() + defer d.mu.Unlock() + for i := 0; i < d.size; i += d.writer.Size() { + sectionData := d.data[i : i+d.writer.Size()] + log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) + d.writer.Write(sectionData) + } + copy(d.digest, d.writer.Sum(nil)) + log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest)) +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Reset(_ context.Context) { + d.mu.Lock() + defer d.mu.Unlock() + d.data = make([]byte, len(d.data)) + d.digest = make([]byte, d.digestSize) + d.size = 0 + d.summed = false + d.writer.Reset() +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SectionSize() int { + return d.sectionSize +} + +// implements param.SectionWriter +func (d *dummySectionWriter) DigestSize() int { + return d.sectionSize +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Branches() int { + return d.branches +} + +func (d *dummySectionWriter) isFull() bool { + return d.size == d.sectionSize*d.branches +} + +// TestDummySectionWriter +func TestDummySectionWriter(t *testing.T) { + + w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) + w.Reset(context.Background()) + + _, data := testutil.SerialData(sectionSize*2, 255, 0) + + w.Write(branches, data[:sectionSize]) + w.Write(branches+1, data[sectionSize:]) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + } + + correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" + digest := w.Sum(nil, chunkSize*2, nil) + digestHex := hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) + } + + w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) + w.Reset(context.Background()) + w.Write(branches/2, data) + if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { + t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + } + + correctDigestHex += zeroHex + digest = w.Sum(nil, chunkSize*2, nil) + digestHex = hexutil.Encode(digest) + if digestHex != correctDigestHex { + t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) + } +} diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 069ddbd08c..837f542e03 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -1,14 +1,11 @@ package hasher import ( - "bytes" "context" "fmt" - "hash" "math/rand" "strconv" "strings" - "sync" "testing" "time" @@ -24,157 +21,6 @@ const ( zeroHex = "0000000000000000000000000000000000000000000000000000000000000000" ) -var ( - dummyHashFunc = func() param.SectionWriter { - return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) - } - // placeholder for cases where a hasher is not necessary - noHashFunc = func() param.SectionWriter { - return nil - } -) - -// simple param.SectionWriter hasher that keeps the data written to it -// for later inspection -// TODO: see if this can be replaced with the fake hasher from storage module -type dummySectionWriter struct { - sectionSize int - digestSize int - branches int - data []byte - digest []byte - size int - summed bool - writer hash.Hash - mu sync.Mutex - wg sync.WaitGroup -} - -func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int) *dummySectionWriter { - return &dummySectionWriter{ - sectionSize: sectionSize, - digestSize: digestSize, - branches: branches, - data: make([]byte, cp), - writer: sha3.NewLegacyKeccak256(), - digest: make([]byte, digestSize), - } -} - -func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { -} - -func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Write(index int, data []byte) { - d.mu.Lock() - copy(d.data[index*d.sectionSize:], data) - d.size += len(data) - log.Trace("dummywriter", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) - if d.isFull() { - d.summed = true - d.mu.Unlock() - d.sum() - } else { - d.mu.Unlock() - } -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { - log.Trace("dummy Sumcall", "size", size) - d.mu.Lock() - if !d.summed { - d.size = size - d.summed = true - d.mu.Unlock() - d.sum() - } else { - d.mu.Unlock() - } - return d.digest -} - -func (d *dummySectionWriter) sum() { - d.mu.Lock() - defer d.mu.Unlock() - for i := 0; i < d.size; i += d.writer.Size() { - sectionData := d.data[i : i+d.writer.Size()] - log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) - d.writer.Write(sectionData) - } - copy(d.digest, d.writer.Sum(nil)) - log.Trace("dummy sum result", "ref", hexutil.Encode(d.digest)) -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Reset(_ context.Context) { - d.mu.Lock() - defer d.mu.Unlock() - d.data = make([]byte, len(d.data)) - d.digest = make([]byte, d.digestSize) - d.size = 0 - d.summed = false - d.writer.Reset() -} - -// implements param.SectionWriter -func (d *dummySectionWriter) SectionSize() int { - return d.sectionSize -} - -// implements param.SectionWriter -func (d *dummySectionWriter) DigestSize() int { - return d.sectionSize -} - -// implements param.SectionWriter -func (d *dummySectionWriter) Branches() int { - return d.branches -} - -func (d *dummySectionWriter) isFull() bool { - return d.size == d.sectionSize*d.branches -} - -// TestDummySectionWriter -func TestDummySectionWriter(t *testing.T) { - - w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) - w.Reset(context.Background()) - - _, data := testutil.SerialData(sectionSize*2, 255, 0) - - w.Write(branches, data[:sectionSize]) - w.Write(branches+1, data[sectionSize:]) - if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) - } - - correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" - digest := w.Sum(nil, chunkSize*2, nil) - digestHex := hexutil.Encode(digest) - if digestHex != correctDigestHex { - t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) - } - - w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) - w.Reset(context.Background()) - w.Write(branches/2, data) - if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) - } - - correctDigestHex += zeroHex - digest = w.Sum(nil, chunkSize*2, nil) - digestHex = hexutil.Encode(digest) - if digestHex != correctDigestHex { - t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) - } -} - // TestTreeParams verifies that params are set correctly by the param constructor func TestTreeParams(t *testing.T) { @@ -213,9 +59,9 @@ func TestTarget(t *testing.T) { } } -// TestTargetWithinJob verifies the calculation of whether a final data section index -// falls within a particular job's span -func TestTargetWithinJob(t *testing.T) { +// TestTargetWithinJobDefault verifies the calculation of whether a final data section index +// falls within a particular job's span without regard to differing SectionSize +func TestTargetWithinJobDefault(t *testing.T) { params := newTreeParams(sectionSize, branches, dummyHashFunc) index := newJobIndex(9) tgt := newTarget() @@ -235,6 +81,33 @@ func TestTargetWithinJob(t *testing.T) { } } +// TestTargetWithinJobDifferentSections does the same as TestTargetWithinJobDefault but +// with SectionSize/Branches settings differeing between client target and underlying writer +func TestTargetWithinJobDifferentSections(t *testing.T) { + dummyHashDoubleFunc := func() param.SectionWriter { + return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) + } + params := newTreeParams(sectionSize, branches, dummyHashDoubleFunc) + index := newJobIndex(9) + tgt := newTarget() + + //jb := newJob(params, tgt, index, 1, branches*branches) + jb := newJob(params, tgt, index, 1, 0) + defer jb.destroy() + + //finalSize := chunkSize*branches + chunkSize*2 + finalSize := chunkSize + finalCount := dataSizeToSectionCount(finalSize, sectionSize) + log.Trace("within test", "size", finalSize, "count", finalCount) + c, ok := jb.targetWithinJob(finalCount - 1) + if !ok { + t.Fatalf("target %d within %d: expected true", finalCount, jb.level) + } + if c != 1 { + t.Fatalf("target %d within %d: expected %d, got %d", finalCount, jb.level, 1, c) + } +} + // TestNewJob verifies that a job is initialized with the correct values func TestNewJob(t *testing.T) { @@ -625,10 +498,10 @@ func TestJobWriteDoubleSection(t *testing.T) { //poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) //dataHash := bmt.New(poolSync) writeSize := sectionSize * 2 - dummyHashLongSectionFunc := func() param.SectionWriter { + dummyHashDoubleFunc := func() param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } - params := newTreeParams(sectionSize, branches, dummyHashLongSectionFunc) + params := newTreeParams(sectionSize, branches, dummyHashDoubleFunc) tgt := newTarget() jb := newJob(params, tgt, nil, 1, 0) From d78326095df43fdd0b075d0a72f18af6a96e20b3 Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 21:34:39 +0100 Subject: [PATCH 45/67] file: Use same bmt for data and intermediate --- file/hasher/common_test.go | 17 +++++- file/hasher/hasher.go | 121 ++++++------------------------------- file/hasher/hasher_test.go | 69 +++++++++------------ file/hasher/job.go | 6 +- file/hasher/job_test.go | 54 ++++++++--------- file/hasher/param.go | 22 +++++-- file/hasher/reference.go | 2 +- file/hasher/util_test.go | 4 +- 8 files changed, 116 insertions(+), 179 deletions(-) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 62080fd6f2..5f00c5d35d 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -80,6 +80,10 @@ var ( noHashFunc = func() param.SectionWriter { return nil } + + logErrFunc = func(err error) { + log.Error("SectionWriter pipeline error", "err", err) + } ) func init() { @@ -124,7 +128,7 @@ func (d *dummySectionWriter) Write(index int, data []byte) { d.mu.Lock() copy(d.data[index*d.sectionSize:], data) d.size += len(data) - log.Trace("dummywriter", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) + log.Trace("dummywriter write", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) if d.isFull() { d.summed = true d.mu.Unlock() @@ -134,6 +138,17 @@ func (d *dummySectionWriter) Write(index int, data []byte) { } } +// implements param.SectionWriter +func (d *dummySectionWriter) WriteAll(data []byte) { + d.mu.Lock() + copy(d.data, data) + d.size += len(data) + d.mu.Unlock() + log.Trace("dummywriter writeall", "size", d.size, "threshold", d.sectionSize*d.branches) + d.summed = true + d.sum() +} + // implements param.SectionWriter func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { log.Trace("dummy Sumcall", "size", size) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 7649e6d568..2b664c9b2a 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -2,106 +2,39 @@ package hasher import ( "context" - "sync" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/param" ) -// BMTSyncSectionWriter is a wrapper for bmt.Hasher to implement the param.SectionWriter interface -type BMTSyncSectionWriter struct { - hasher *bmt.Hasher - data []byte -} - -// NewBMTSyncSectionWriter creates a new BMTSyncSectionWriter -func NewBMTSyncSectionWriter(hasher *bmt.Hasher) param.SectionWriter { - return &BMTSyncSectionWriter{ - hasher: hasher, - } -} - -// Init implements param.SectionWriter -func (b *BMTSyncSectionWriter) Init(_ context.Context, errFunc func(error)) { -} - -// Link implements param.SectionWriter -func (b *BMTSyncSectionWriter) Link(_ func() param.SectionWriter) { -} - -// Sum implements param.SectionWriter -func (b *BMTSyncSectionWriter) Sum(extra []byte, _ int, span []byte) []byte { - b.hasher.ResetWithLength(span) - b.hasher.Write(b.data) - return b.hasher.Sum(extra) -} - -// Reset implements param.SectionWriter -func (b *BMTSyncSectionWriter) Reset(_ context.Context) { - b.hasher.Reset() -} - -// Write implements param.SectionWriter -func (b *BMTSyncSectionWriter) Write(_ int, data []byte) { - b.data = data -} - -// SectionSize implements param.SectionWriter -func (b *BMTSyncSectionWriter) SectionSize() int { - return b.hasher.ChunkSize() -} - -// DigestSize implements param.SectionWriter -func (b *BMTSyncSectionWriter) DigestSize() int { - return b.hasher.Size() -} - -// Branches implements param.SectionWriter -func (b *BMTSyncSectionWriter) Branches() int { - return b.hasher.Count() -} - // Hasher is a bmt.SectionWriter that executes the file hashing algorithm on arbitary data type Hasher struct { target *target params *treeParams index *jobIndex - job *job // current level 1 job being written to - writerPool sync.Pool - hasherPool sync.Pool - size int - count int + job *job // current level 1 job being written to + size int + count int } // New creates a new Hasher object using the given sectionSize and branch factor // hasherFunc is used to create *bmt.Hashers to hash the incoming data // writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface // TODO: sectionSize and branches should be inferred from underlying writer, not shared across job and hasher -func New(sectionSize int, branches int, hasherFunc func() param.SectionWriter) *Hasher { - h := &Hasher{ +func New(hasherFunc func() param.SectionWriter) *Hasher { + hs := &Hasher{ target: newTarget(), index: newJobIndex(9), } - h.params = newTreeParams(sectionSize, branches, h.getWriter) - h.writerPool.New = func() interface{} { - return h.params.hashFunc() - } - h.hasherPool.New = func() interface{} { - return hasherFunc() - } - h.job = newJob(h.params, h.target, h.index, 1, 0) - return h + hs.params = newTreeParams(hasherFunc) + hs.job = newJob(hs.params, hs.target, hs.index, 1, 0) + return hs } // Init implements param.SectionWriter func (h *Hasher) Init(ctx context.Context, errFunc func(error)) { h.params.SetContext(ctx) -} - -// Link implements param.SectionWriter -func (h *Hasher) Link(writerFunc func() param.SectionWriter) { - h.params.hashFunc = writerFunc h.job.start() } @@ -116,12 +49,20 @@ func (h *Hasher) Write(index int, b []byte) { h.job = h.job.Next() } go func(i int, jb *job) { - hasher := h.getHasher(len(b)) - hasher.Write(0, b) + hasher := h.params.GetWriter() l := len(b) + for i := 0; i < len(b); i += hasher.SectionSize() { + var sl int + if l-i < hasher.SectionSize() { + sl = l - i + } else { + sl = hasher.SectionSize() + } + hasher.Write(i/hasher.SectionSize(), b[i:i+sl]) + } span := bmt.LengthToSpan(l) jb.write(i%h.params.Branches, hasher.Sum(nil, l, span)) - h.putHasher(hasher) + h.params.PutWriter(hasher) }(h.count, h.job) h.size += len(b) h.count++ @@ -157,27 +98,3 @@ func (h *Hasher) DigestSize() int { func (h *Hasher) Branches() int { return h.params.Branches } - -// proxy for sync.Pool -func (h *Hasher) putHasher(w param.SectionWriter) { - h.hasherPool.Put(w) -} - -// proxy for sync.Pool -func (h *Hasher) getHasher(l int) param.SectionWriter { - //span := bmt.LengthToSpan(l) - hasher := h.hasherPool.Get().(param.SectionWriter) - hasher.Reset(h.params.ctx) //WithLength(span) - return hasher -} - -// proxy for sync.Pool -func (h *Hasher) putWriter(w param.SectionWriter) { - w.Reset(h.params.ctx) - h.writerPool.Put(w) -} - -// proxy for sync.Pool -func (h *Hasher) getWriter() param.SectionWriter { - return h.writerPool.Get().(param.SectionWriter) -} diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 2de783ab20..c6c3a183bb 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -1,6 +1,7 @@ package hasher import ( + "context" "fmt" "strconv" "strings" @@ -16,18 +17,16 @@ import ( // TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job func TestHasherJobTopHash(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -43,18 +42,16 @@ func TestHasherJobTopHash(t *testing.T) { // TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher func TestHasherOneFullChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -69,18 +66,16 @@ func TestHasherOneFullChunk(t *testing.T) { // TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds func TestHasherJobChange(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) jobs := make(map[string]int) for i := 0; i < chunkSize*branches*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -100,18 +95,16 @@ func TestHasherJobChange(t *testing.T) { // TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher func TestHasherOneFullLevelOneChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches*branches; i += chunkSize { h.Write(i, data[i:i+chunkSize]) @@ -125,21 +118,19 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { } func TestHasherVector(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } var mismatch int for i, dataLength := range dataLengths { log.Info("hashervector start", "i", i, "l", dataLength) eq := true - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < dataLength; j += chunkSize { size := chunkSize @@ -177,19 +168,17 @@ func benchmarkHasher(b *testing.B) { } dataLength := int(dataLengthParam) - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return NewBMTSyncSectionWriter(bmt.New(poolSync)) - } _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { - h := New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, logErrFunc) for i := 0; i < dataLength; i += chunkSize { size := chunkSize if dataLength-i < chunkSize { diff --git a/file/hasher/job.go b/file/hasher/job.go index ad5f09ca78..9d91ab572c 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -67,7 +67,7 @@ func newJob(params *treeParams, tgt *target, jobIndex *jobIndex, lvl int, dataSe } func (jb *job) start() { - jb.writer = jb.params.hashFunc() + jb.writer = jb.params.GetWriter() go jb.process() } @@ -132,6 +132,8 @@ func (jb *job) write(index int, data []byte) { // - data write is finalized and targetcount is reached on a subsequent job write func (jb *job) process() { + log.Trace("starting job process", "level", jb.level, "sec", jb.dataSection) + var processCount int defer jb.destroy() @@ -317,7 +319,7 @@ func (jb *job) Next() *job { // cleans up the job; reset hasher and remove pointer to job from index func (jb *job) destroy() { if jb.writer != nil { - jb.writer.Reset(jb.params.GetContext()) + jb.params.PutWriter(jb.writer) } jb.index.Delete(jb) } diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 837f542e03..c761c601ec 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -24,7 +24,7 @@ const ( // TestTreeParams verifies that params are set correctly by the param constructor func TestTreeParams(t *testing.T) { - params := newTreeParams(sectionSize, branches, noHashFunc) + params := newTreeParams(dummyHashFunc) if params.SectionSize != 32 { t.Fatalf("section: expected %d, got %d", sectionSize, params.SectionSize) @@ -59,10 +59,10 @@ func TestTarget(t *testing.T) { } } -// TestTargetWithinJobDefault verifies the calculation of whether a final data section index +// TestJobTargetWithinJobDefault verifies the calculation of whether a final data section index // falls within a particular job's span without regard to differing SectionSize -func TestTargetWithinJobDefault(t *testing.T) { - params := newTreeParams(sectionSize, branches, dummyHashFunc) +func TestJobTargetWithinDefault(t *testing.T) { + params := newTreeParams(dummyHashFunc) index := newJobIndex(9) tgt := newTarget() @@ -81,13 +81,13 @@ func TestTargetWithinJobDefault(t *testing.T) { } } -// TestTargetWithinJobDifferentSections does the same as TestTargetWithinJobDefault but +// TestJobTargetWithinDifferentSections does the same as TestTargetWithinJobDefault but // with SectionSize/Branches settings differeing between client target and underlying writer -func TestTargetWithinJobDifferentSections(t *testing.T) { +func TestJobTargetWithinDifferentSections(t *testing.T) { dummyHashDoubleFunc := func() param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } - params := newTreeParams(sectionSize, branches, dummyHashDoubleFunc) + params := newTreeParams(dummyHashDoubleFunc) index := newJobIndex(9) tgt := newTarget() @@ -111,7 +111,7 @@ func TestTargetWithinJobDifferentSections(t *testing.T) { // TestNewJob verifies that a job is initialized with the correct values func TestNewJob(t *testing.T) { - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) params.Debug = true tgt := newTarget() @@ -130,7 +130,7 @@ func TestNewJob(t *testing.T) { // under a particular level reference // it tests both a balanced and an unbalanced tree func TestJobSize(t *testing.T) { - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) params.Debug = true index := newJobIndex(9) @@ -166,7 +166,7 @@ func TestJobSize(t *testing.T) { // a data section index is within a level's span is correct func TestJobTarget(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) params.Debug = true index := newJobIndex(9) @@ -204,7 +204,7 @@ func TestJobTarget(t *testing.T) { // and removes it on job destruction func TestJobIndex(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) jb := newJob(params, tgt, nil, 1, branches) jobIndex := jb.index @@ -218,11 +218,11 @@ func TestJobIndex(t *testing.T) { } } -// TestGetJobNext verifies that the new job constructed through the job.Next() method +// TestJobGetNext verifies that the new job constructed through the job.Next() method // has the correct level and data section index -func TestGetJobNext(t *testing.T) { +func TestJobGetNext(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) params.Debug = true jb := newJob(params, tgt, nil, 1, branches*branches) @@ -243,7 +243,7 @@ func TestGetJobNext(t *testing.T) { func TestJobWriteTwoAndFinish(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) jb := newJob(params, tgt, nil, 1, 0) jb.start() @@ -273,12 +273,12 @@ func TestJobWriteTwoAndFinish(t *testing.T) { } } -// TestGetJobParent verifies that the parent returned from two jobs' parent() calls +// TestJobGetParent verifies that the parent returned from two jobs' parent() calls // that are within the same span as the parent chunk of references is the same // BUG: not guaranteed to return same parent when run with eg -count 100 -func TestGetJobParent(t *testing.T) { +func TestJobGetParent(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) jb := newJob(params, tgt, nil, 1, branches*branches) jb.start() @@ -304,11 +304,11 @@ func TestGetJobParent(t *testing.T) { } } -// TestWriteParentSection verifies that a data write translates to a write +// TestJobWriteParentSection verifies that a data write translates to a write // in the correct section of its parent -func TestWriteParentSection(t *testing.T) { +func TestJobWriteParentSection(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) index := newJobIndex(9) jb := newJob(params, tgt, index, 1, 0) @@ -351,7 +351,7 @@ func TestWriteParentSection(t *testing.T) { func TestJobWriteFull(t *testing.T) { tgt := newTarget() - params := newTreeParams(sectionSize, branches, dummyHashFunc) + params := newTreeParams(dummyHashFunc) jb := newJob(params, tgt, nil, 1, 0) jb.start() @@ -388,7 +388,7 @@ func TestJobWriteSpan(t *testing.T) { hashFunc := func() param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } - params := newTreeParams(sectionSize, branches, hashFunc) + params := newTreeParams(hashFunc) jb := newJob(params, tgt, nil, 1, 0) jb.start() @@ -439,7 +439,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { hashFunc := func() param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } - params := newTreeParams(sectionSize, branches, hashFunc) + params := newTreeParams(hashFunc) jb := newJob(params, tgt, nil, 1, 0) jb.start() @@ -501,7 +501,7 @@ func TestJobWriteDoubleSection(t *testing.T) { dummyHashDoubleFunc := func() param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } - params := newTreeParams(sectionSize, branches, dummyHashDoubleFunc) + params := newTreeParams(dummyHashDoubleFunc) tgt := newTarget() jb := newJob(params, tgt, nil, 1, 0) @@ -538,7 +538,7 @@ func TestJobVector(t *testing.T) { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) - params := newTreeParams(sectionSize, branches, refHashFunc) + params := newTreeParams(refHashFunc) var mismatch int for i := start; i < end; i++ { @@ -621,7 +621,7 @@ func benchmarkJob(b *testing.B) { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) - treeParams := newTreeParams(sectionSize, branches, refHashFunc) + treeParams := newTreeParams(refHashFunc) _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { diff --git a/file/hasher/param.go b/file/hasher/param.go index 38c3290272..5bd22977d0 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -4,6 +4,7 @@ import ( "context" "sync" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -20,15 +21,18 @@ type treeParams struct { ctx context.Context } -func newTreeParams(section int, branches int, hashFunc func() param.SectionWriter) *treeParams { +func newTreeParams(hashFunc func() param.SectionWriter) *treeParams { + h := hashFunc() p := &treeParams{ - SectionSize: section, - Branches: branches, - ChunkSize: section * branches, + SectionSize: h.SectionSize(), + Branches: h.Branches(), + ChunkSize: h.SectionSize() * h.Branches(), hashFunc: hashFunc, ctx: context.Background(), } + h.Reset(p.ctx) + log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) p.writerPool.New = func() interface{} { return p.hashFunc() } @@ -48,3 +52,13 @@ func (p *treeParams) SetContext(ctx context.Context) { func (p *treeParams) GetContext() context.Context { return p.ctx } + +func (p *treeParams) PutWriter(w param.SectionWriter) { + w.Reset(p.ctx) + p.writerPool.Put(w) + +} + +func (p *treeParams) GetWriter() param.SectionWriter { + return p.writerPool.Get().(param.SectionWriter) +} diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 2092a8073c..2c35d5e088 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -27,7 +27,7 @@ type ReferenceFileHasher struct { // the section count will be the Size() of the hasher func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { f := &ReferenceFileHasher{ - params: newTreeParams(hasher.Size(), branches, nil), + params: newTreeParams(dummyHashFunc), hasher: hasher, chunkSize: branches * hasher.Size(), } diff --git a/file/hasher/util_test.go b/file/hasher/util_test.go index bea855f576..f5678b9a38 100644 --- a/file/hasher/util_test.go +++ b/file/hasher/util_test.go @@ -35,7 +35,7 @@ func TestDataSizeToSectionIndex(t *testing.T) { // TestsDataSectionToLevelSection verifies dataSectionToLevelSection func TestDataSectionToLevelSection(t *testing.T) { - params := newTreeParams(sectionSize, branches, nil) + params := newTreeParams(dummyHashFunc) sections := []int{0, branches - 1, branches, branches + 1, branches * 2, branches*2 + 1, branches * branches} levels := []int{1, 2} expects := []int{ @@ -58,7 +58,7 @@ func TestDataSectionToLevelSection(t *testing.T) { // TestDataSectionToLevelBoundary verifies dataSectionToLevelBoundary func TestDataSectionToLevelBoundary(t *testing.T) { - params := newTreeParams(sectionSize, branches, nil) + params := newTreeParams(dummyHashFunc) size := chunkSize*branches + chunkSize*2 section := dataSizeToSectionIndex(size, sectionSize) lvl := 1 From e778505466383280cd95300d29eb4f52392c2f4f Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 22:40:05 +0100 Subject: [PATCH 46/67] file, param, bmt: Amend split and store after bmt change --- bmt/bmt.go | 12 +++++++ file/hasher/hasher.go | 12 ++----- file/hasher/hasher_test.go | 4 +-- file/hasher/reference.go | 8 ++++- file/split.go | 4 ++- file/split_test.go | 73 ++++++++++++++------------------------ file/store/store.go | 10 ++---- file/store/store_test.go | 7 ++-- param/io.go | 1 - 9 files changed, 60 insertions(+), 71 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 5235322f74..214be0afe5 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -450,6 +450,7 @@ type AsyncHasher struct { secsize int // size of base section (size of hash or double) seccount int // base section count write func(i int, section []byte, final bool) + all bool // if all written in one go } // Implements param.SectionWriter @@ -458,6 +459,7 @@ func (sw *AsyncHasher) Init(_ context.Context, errFunc func(error)) { // Implements param.SectionWriter func (sw *AsyncHasher) Reset(_ context.Context) { + sw.all = false sw.Hasher.Reset() } @@ -487,6 +489,13 @@ func (sw *AsyncHasher) Branches() int { // this function can and is meant to be called concurrently // it sets max segment threadsafely func (sw *AsyncHasher) Write(i int, section []byte) { + if i < 0 { + span := LengthToSpan(len(section)) + sw.Hasher.ResetWithLength(span) + sw.Hasher.Write(section) + sw.all = true + return + } sw.mtx.Lock() defer sw.mtx.Unlock() t := sw.getTree() @@ -528,6 +537,9 @@ func (sw *AsyncHasher) Write(i int, section []byte) { // meta: metadata to hash together with BMT root for the final digest // e.g., span for protection against existential forgery func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) { + if sw.all { + return sw.Hasher.Sum(nil) + } sw.mtx.Lock() t := sw.getTree() if length == 0 { diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 2b664c9b2a..e7fe54d196 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -4,6 +4,7 @@ import ( "context" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -50,16 +51,9 @@ func (h *Hasher) Write(index int, b []byte) { } go func(i int, jb *job) { hasher := h.params.GetWriter() + hasher.Write(-1, b) l := len(b) - for i := 0; i < len(b); i += hasher.SectionSize() { - var sl int - if l-i < hasher.SectionSize() { - sl = l - i - } else { - sl = hasher.SectionSize() - } - hasher.Write(i/hasher.SectionSize(), b[i:i+sl]) - } + log.Trace("data write", "count", i, "size", l) span := bmt.LengthToSpan(l) jb.write(i%h.params.Branches, hasher.Sum(nil, l, span)) h.params.PutWriter(hasher) diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index c6c3a183bb..964a7f82e4 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -95,7 +95,7 @@ func TestHasherJobChange(t *testing.T) { // TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher func TestHasherOneFullLevelOneChunk(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -118,7 +118,7 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { } func TestHasherVector(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 2c35d5e088..54562ea47b 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -6,6 +6,8 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" + "golang.org/x/crypto/sha3" ) // ReferenceFileHasher is a non-performant source of truth implementation for the file hashing algorithm used in Swarm @@ -26,8 +28,12 @@ type ReferenceFileHasher struct { // NewReferenceFileHasher creates a new file hasher with the supplied branch factor // the section count will be the Size() of the hasher func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } f := &ReferenceFileHasher{ - params: newTreeParams(dummyHashFunc), + params: newTreeParams(refHashFunc), hasher: hasher, chunkSize: branches * hasher.Size(), } diff --git a/file/split.go b/file/split.go index 28b82d0783..21882a4a4f 100644 --- a/file/split.go +++ b/file/split.go @@ -3,6 +3,7 @@ package file import ( "io" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -37,9 +38,10 @@ func (s *Splitter) Split() ([]byte, error) { } return nil, err } + log.Trace("split read", "c", c, "wc", c, "l", l) s.w.Write(wc, d) wc++ l += c } - return s.w.Sum(nil, l, nil), nil + return s.w.Sum(nil, 0, nil), nil } diff --git a/file/split_test.go b/file/split_test.go index 8e3ce0d3d4..6c5452474d 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -9,6 +9,7 @@ import ( "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/file/hasher" "github.com/ethersphere/swarm/file/store" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/storage" "github.com/ethersphere/swarm/testutil" @@ -25,20 +26,24 @@ func init() { testutil.Init() } +var ( + errFunc = func(err error) { + log.Error("split writer pipeline error", "err", err) + } +) + // TestSplit creates a Splitter with a reader with one chunk of serial data and // a Hasher as the underlying param.SectionWriter // It verifies the returned result func TestSplit(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } - h := hasher.New(sectionSize, branches, dataHashFunc) - h.Link(refHashFunc) + h := hasher.New(refHashFunc) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + h.Init(ctx, errFunc) r, _ := testutil.SerialData(chunkSize, 255, 0) s := NewSplitter(r, h) @@ -55,28 +60,22 @@ func TestSplit(t *testing.T) { // TestSplitWithDataFileStore verifies chunk.Store sink result for data hashing func TestSplitWithDataFileStore(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() chunkStore := &storage.FakeChunkStore{} storeFunc := func() param.SectionWriter { - h := store.New(chunkStore) - h.Init(ctx, func(_ error) {}) - h.Link(dataHashFunc) + h := store.New(chunkStore, refHashFunc) + h.Init(ctx, errFunc) return h } - h := hasher.New(sectionSize, branches, storeFunc) - h.Init(ctx, func(error) {}) - h.Link(refHashFunc) + h := hasher.New(storeFunc) + h.Init(ctx, errFunc) r, _ := testutil.SerialData(chunkSize, 255, 0) s := NewSplitter(r, h) @@ -94,8 +93,7 @@ func TestSplitWithDataFileStore(t *testing.T) { // TestSplitWithIntermediateFileStore verifies chunk.Store sink result for intermediate hashing func TestSplitWithIntermediateFileStore(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -104,18 +102,13 @@ func TestSplitWithIntermediateFileStore(t *testing.T) { defer cancel() chunkStore := &storage.FakeChunkStore{} storeFunc := func() param.SectionWriter { - h := store.New(chunkStore) - h.Init(ctx, func(_ error) {}) - h.Link(refHashFunc) + h := store.New(chunkStore, refHashFunc) + h.Init(ctx, errFunc) return h } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } - - h := hasher.New(sectionSize, branches, dataHashFunc) - h.Link(storeFunc) + h := hasher.New(storeFunc) + h.Init(ctx, errFunc) r, _ := testutil.SerialData(chunkSize*2, 255, 0) s := NewSplitter(r, h) @@ -133,8 +126,7 @@ func TestSplitWithIntermediateFileStore(t *testing.T) { // TestSplitWithBothFileStore verifies chunk.Store sink result for both data and intermediate hashing func TestSplitWithBothFileStore(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -143,24 +135,13 @@ func TestSplitWithBothFileStore(t *testing.T) { defer cancel() chunkStore := &storage.FakeChunkStore{} refStoreFunc := func() param.SectionWriter { - h := store.New(chunkStore) - h.Init(ctx, func(_ error) {}) - h.Link(refHashFunc) - return h - } - - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } - dataStoreFunc := func() param.SectionWriter { - h := store.New(chunkStore) - h.Init(ctx, func(_ error) {}) - h.Link(dataHashFunc) + h := store.New(chunkStore, refHashFunc) + h.Init(ctx, errFunc) return h } - h := hasher.New(sectionSize, branches, dataStoreFunc) - h.Link(refStoreFunc) + h := hasher.New(refStoreFunc) + h.Init(ctx, errFunc) r, _ := testutil.SerialData(chunkSize*128, 255, 0) s := NewSplitter(r, h) diff --git a/file/store/store.go b/file/store/store.go index 0612737641..4eddee2494 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -21,9 +21,10 @@ type FileStore struct { } // New creates a new FileStore with the supplied chunk.Store -func New(chunkStore chunk.Store) *FileStore { +func New(chunkStore chunk.Store, writerFunc func() param.SectionWriter) *FileStore { return &FileStore{ chunkStore: chunkStore, + w: writerFunc(), } } @@ -33,11 +34,6 @@ func (f *FileStore) Init(ctx context.Context, errFunc func(error)) { f.errFunc = errFunc } -// Link implements param.SectionWriter -func (f *FileStore) Link(writerFunc func() param.SectionWriter) { - f.w = writerFunc() -} - // Reset implements param.SectionWriter func (f *FileStore) Reset(ctx context.Context) { f.ctx = ctx @@ -71,7 +67,7 @@ func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { // SectionSize implements param.SectionWriter func (f *FileStore) SectionSize() int { - return chunk.DefaultSize + return f.w.SectionSize() } // DigestSize implements param.SectionWriter diff --git a/file/store/store_test.go b/file/store/store_test.go index e95117d87f..427358672e 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -48,7 +48,7 @@ func (s *testChunkStore) Put(_ context.Context, _ chunk.ModePut, chs ...chunk.Ch // TestStoreWithHasher writes a single chunk and verifies the asynchronusly received chunk // through the underlying chunk store func TestStoreWithHasher(t *testing.T) { - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) hashFunc := func() param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } @@ -58,11 +58,10 @@ func TestStoreWithHasher(t *testing.T) { store := newTestChunkStore(chunkC) // initialize FileStore - h := New(store) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + h := New(store, hashFunc) + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() h.Init(ctx, nil) - h.Link(hashFunc) // Write data to Store _, data := testutil.SerialData(chunkSize, 255, 0) diff --git a/param/io.go b/param/io.go index 485e5c0f58..7bf2005e37 100644 --- a/param/io.go +++ b/param/io.go @@ -7,7 +7,6 @@ import ( // SectionWriter is an asynchronous segment/section writer interface type SectionWriter interface { Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination - Link(writerFunc func() SectionWriter) // sets the writer the current writer should pipeline to Reset(ctx context.Context) // standard init to be called before reuse Write(index int, data []byte) // write into section of index Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer From 5dc2d853b47874e94b301c03b0ce6473090f1bb5 Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 22:55:01 +0100 Subject: [PATCH 47/67] file: Amend encrypt to compile, fix benchmarks --- file/encrypt/encrypt.go | 7 ++-- file/encrypt/encrypt_test.go | 65 ++++++++++++++---------------------- file/hasher/hasher_test.go | 2 +- file/hasher/job_test.go | 1 + 4 files changed, 29 insertions(+), 46 deletions(-) diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 324d09afc9..2df68670f7 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -20,7 +20,7 @@ type Encrypt struct { errFunc func(error) } -func New(key []byte, initCtr uint32) (*Encrypt, error) { +func New(key []byte, initCtr uint32, hashFunc func() param.SectionWriter) (*Encrypt, error) { if key == nil { key = make([]byte, encryption.KeyLength) c, err := crand.Read(key) @@ -37,6 +37,7 @@ func New(key []byte, initCtr uint32) (*Encrypt, error) { e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), key: make([]byte, encryption.KeyLength), keyHash: param.HashFunc(), + w: hashFunc(), } copy(e.key, key) return e, nil @@ -46,10 +47,6 @@ func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { e.errFunc = errFunc } -func (e *Encrypt) Link(writerFunc func() param.SectionWriter) { - e.w = writerFunc() -} - func (e *Encrypt) Write(index int, b []byte) { cipherText, err := e.e.Encrypt(b) if err != nil { diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index ffd811ce94..14956f4d18 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -32,8 +32,12 @@ func init() { } func TestKey(t *testing.T) { + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func() param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } - e, err := New(nil, 42) + e, err := New(nil, 42, refHashFunc) if err != nil { t.Fatal(err) } @@ -49,7 +53,7 @@ func TestKey(t *testing.T) { cacheFunc := func() param.SectionWriter { return cache } - e, err = New(testKey, 42) + e, err = New(testKey, 42, cacheFunc) if err != nil { t.Fatal(err) } @@ -58,7 +62,6 @@ func TestKey(t *testing.T) { } _, data := testutil.SerialData(chunkSize, 255, 0) - e.Link(cacheFunc) e.Write(0, data) span := bmt.LengthToSpan(chunkSize) doubleRef := e.Sum(nil, chunkSize, span) @@ -75,14 +78,10 @@ func TestKey(t *testing.T) { } func TestEncryptOneChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() @@ -90,25 +89,23 @@ func TestEncryptOneChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Link(dataHashFunc) + cache.Link(refHashFunc) cacheFunc := func() param.SectionWriter { return cache } encryptFunc := func() param.SectionWriter { - eFunc, err := New(testKey, uint32(42)) + eFunc, err := New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) } eFunc.Init(ctx, errFunc) - eFunc.Link(cacheFunc) return eFunc } _, data := testutil.SerialData(chunkSize, 255, 0) - h := hasher.New(sectionSize, branches, encryptFunc) + h := hasher.New(encryptFunc) h.Init(ctx, func(error) {}) - h.Link(refHashFunc) h.Write(0, data) doubleRef := h.Sum(nil, 0, nil) @@ -123,7 +120,7 @@ func TestEncryptOneChunk(t *testing.T) { t.Fatalf("encrypt onechunk; data mismatch") } - hc := bmt.New(poolSync) + hc := bmt.New(poolAsync) span := bmt.LengthToSpan(len(cipherText)) hc.ResetWithLength(span) hc.Write(cipherText) @@ -135,14 +132,10 @@ func TestEncryptOneChunk(t *testing.T) { } func TestEncryptChunkWholeAndSections(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() @@ -150,17 +143,16 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Link(dataHashFunc) + cache.Link(refHashFunc) cacheFunc := func() param.SectionWriter { return cache } - e, err := New(testKey, uint32(42)) + e, err := New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) } e.Init(ctx, errFunc) - e.Link(cacheFunc) _, data := testutil.SerialData(chunkSize, 255, 0) e.Write(0, data) @@ -171,13 +163,12 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { copy(cacheCopy, cache.Get(0)) cache.Delete(0) - cache.Link(refHashFunc) - e, err = New(testKey, uint32(42)) + //cache.Link(refHashFunc) + e, err = New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) } e.Init(ctx, errFunc) - e.Link(cacheFunc) for i := 0; i < chunkSize; i += sectionSize { e.Write(i/sectionSize, data[i:i+sectionSize]) @@ -194,14 +185,10 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { } func TestEncryptIntermediateChunk(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) - } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() @@ -218,27 +205,25 @@ func TestEncryptIntermediateChunk(t *testing.T) { } encryptRefFunc := func() param.SectionWriter { - eFunc, err := New(testKey, uint32(42)) + eFunc, err := New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) } eFunc.Init(ctx, errFunc) - eFunc.Link(cacheFunc) return eFunc } - encryptDataFunc := func() param.SectionWriter { - eFunc, err := New(nil, uint32(42)) - if err != nil { - t.Fatal(err) - } - eFunc.Init(ctx, errFunc) - eFunc.Link(dataHashFunc) - return eFunc - } + // encryptDataFunc := func() param.SectionWriter { + // eFunc, err := New(nil, uint32(42)) + // if err != nil { + // t.Fatal(err) + // } + // eFunc.Init(ctx, errFunc) + // eFunc.Link(dataHashFunc) + // return eFunc + // } - h := hasher.New(sectionSize, branches, encryptDataFunc) - h.Link(encryptRefFunc) + h := hasher.New(encryptRefFunc) _, data := testutil.SerialData(chunkSize*branches, 255, 0) for i := 0; i < chunkSize*branches; i += chunkSize { diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 964a7f82e4..910068f355 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -168,7 +168,7 @@ func benchmarkHasher(b *testing.B) { } dataLength := int(dataLengthParam) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) refHashFunc := func() param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index c761c601ec..70c805dc6a 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -627,6 +627,7 @@ func benchmarkJob(b *testing.B) { for j := 0; j < b.N; j++ { tgt := newTarget() jb := newJob(treeParams, tgt, nil, 1, 0) + jb.start() count := 0 //log.Info("test vector", "length", dataLength) for i := 0; i < dataLength; i += chunkSize { From 57c79ed44dddb89d5b04c8cb1a59c2f44c20015b Mon Sep 17 00:00:00 2001 From: nolash Date: Fri, 6 Dec 2019 23:53:24 +0100 Subject: [PATCH 48/67] file, bmt, param: Add Connect method to IF to set underlying writer --- bmt/bmt.go | 5 +++- file/encrypt/encrypt.go | 9 ++++++-- file/encrypt/encrypt_test.go | 39 ++++++++++++-------------------- file/hasher/common_test.go | 19 ++++------------ file/hasher/hasher.go | 24 +++++++++++++------- file/hasher/hasher_test.go | 12 +++++----- file/hasher/job_test.go | 12 +++++----- file/hasher/param.go | 15 ++++++------ file/hasher/reference.go | 3 ++- file/split_test.go | 14 ++++++------ file/store/store.go | 12 +++++++--- file/store/store_test.go | 2 +- file/testutillocal/cache.go | 5 ++-- file/testutillocal/cache_test.go | 11 ++++----- param/io.go | 3 +++ 15 files changed, 96 insertions(+), 89 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 214be0afe5..687a680c89 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -26,6 +26,7 @@ import ( "sync" "sync/atomic" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -464,7 +465,9 @@ func (sw *AsyncHasher) Reset(_ context.Context) { } // Implements param.SectionWriter -func (sw *AsyncHasher) Link(_ func() param.SectionWriter) { +func (sw *AsyncHasher) Connect(_ param.SectionWriterFunc) param.SectionWriter { + log.Warn("Asynchasher does not currently support SectionWriter chaining") + return sw } // SectionSize returns the size of async section unit to use diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 2df68670f7..94768a054e 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -20,7 +20,7 @@ type Encrypt struct { errFunc func(error) } -func New(key []byte, initCtr uint32, hashFunc func() param.SectionWriter) (*Encrypt, error) { +func New(key []byte, initCtr uint32, hashFunc param.SectionWriterFunc) (*Encrypt, error) { if key == nil { key = make([]byte, encryption.KeyLength) c, err := crand.Read(key) @@ -37,12 +37,17 @@ func New(key []byte, initCtr uint32, hashFunc func() param.SectionWriter) (*Encr e: encryption.New(key, 0, initCtr, sha3.NewLegacyKeccak256), key: make([]byte, encryption.KeyLength), keyHash: param.HashFunc(), - w: hashFunc(), } copy(e.key, key) return e, nil } +func (e *Encrypt) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { + e.w = hashFunc(nil) + return e + +} + func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { e.errFunc = errFunc } diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 14956f4d18..2838066124 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -33,7 +33,7 @@ func init() { func TestKey(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -50,7 +50,7 @@ func TestKey(t *testing.T) { errFunc := func(error) {} cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cacheFunc := func() param.SectionWriter { + cacheFunc := func(_ context.Context) param.SectionWriter { return cache } e, err = New(testKey, 42, cacheFunc) @@ -60,6 +60,7 @@ func TestKey(t *testing.T) { if !bytes.Equal(testKey, e.key) { t.Fatalf("key seed; expected %x, got %x", testKey, e.key) } + e.Connect(cacheFunc) _, data := testutil.SerialData(chunkSize, 255, 0) e.Write(0, data) @@ -79,7 +80,7 @@ func TestKey(t *testing.T) { func TestEncryptOneChunk(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -89,16 +90,17 @@ func TestEncryptOneChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Link(refHashFunc) - cacheFunc := func() param.SectionWriter { + cache.Connect(refHashFunc) + cacheFunc := func(_ context.Context) param.SectionWriter { return cache } - encryptFunc := func() param.SectionWriter { + encryptFunc := func(_ context.Context) param.SectionWriter { eFunc, err := New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) } + eFunc.Connect(cacheFunc) eFunc.Init(ctx, errFunc) return eFunc } @@ -133,7 +135,7 @@ func TestEncryptOneChunk(t *testing.T) { func TestEncryptChunkWholeAndSections(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -143,8 +145,8 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Link(refHashFunc) - cacheFunc := func() param.SectionWriter { + cache.Connect(refHashFunc) + cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -163,7 +165,6 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { copy(cacheCopy, cache.Get(0)) cache.Delete(0) - //cache.Link(refHashFunc) e, err = New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) @@ -186,7 +187,7 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { func TestEncryptIntermediateChunk(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -199,12 +200,12 @@ func TestEncryptIntermediateChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Link(refHashFunc) - cacheFunc := func() param.SectionWriter { + cache.Connect(refHashFunc) + cacheFunc := func(_ context.Context) param.SectionWriter { return cache } - encryptRefFunc := func() param.SectionWriter { + encryptRefFunc := func(_ context.Context) param.SectionWriter { eFunc, err := New(testKey, uint32(42), cacheFunc) if err != nil { t.Fatal(err) @@ -213,16 +214,6 @@ func TestEncryptIntermediateChunk(t *testing.T) { return eFunc } - // encryptDataFunc := func() param.SectionWriter { - // eFunc, err := New(nil, uint32(42)) - // if err != nil { - // t.Fatal(err) - // } - // eFunc.Init(ctx, errFunc) - // eFunc.Link(dataHashFunc) - // return eFunc - // } - h := hasher.New(encryptRefFunc) _, data := testutil.SerialData(chunkSize*branches, 255, 0) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 5f00c5d35d..9a5498dd25 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -72,12 +72,12 @@ var ( ) var ( - dummyHashFunc = func() param.SectionWriter { + dummyHashFunc = func(_ context.Context) param.SectionWriter { return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) } // placeholder for cases where a hasher is not necessary - noHashFunc = func() param.SectionWriter { + noHashFunc = func(_ context.Context) param.SectionWriter { return nil } @@ -120,7 +120,9 @@ func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { } -func (d *dummySectionWriter) Link(_ func() param.SectionWriter) { +func (d *dummySectionWriter) Connect(_ param.SectionWriterFunc) param.SectionWriter { + log.Error("dummySectionWriter does not support SectionWriter chaining") + return d } // implements param.SectionWriter @@ -138,17 +140,6 @@ func (d *dummySectionWriter) Write(index int, data []byte) { } } -// implements param.SectionWriter -func (d *dummySectionWriter) WriteAll(data []byte) { - d.mu.Lock() - copy(d.data, data) - d.size += len(data) - d.mu.Unlock() - log.Trace("dummywriter writeall", "size", d.size, "threshold", d.sectionSize*d.branches) - d.summed = true - d.sum() -} - // implements param.SectionWriter func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { log.Trace("dummy Sumcall", "size", size) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index e7fe54d196..db02c0d455 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -10,9 +10,11 @@ import ( // Hasher is a bmt.SectionWriter that executes the file hashing algorithm on arbitary data type Hasher struct { - target *target - params *treeParams - index *jobIndex + target *target + params *treeParams + index *jobIndex + errFunc func(error) + ctx context.Context job *job // current level 1 job being written to size int @@ -23,18 +25,24 @@ type Hasher struct { // hasherFunc is used to create *bmt.Hashers to hash the incoming data // writerFunc is used as the underlying bmt.SectionWriter for the asynchronous hasher jobs. It may be pipelined to other components with the same interface // TODO: sectionSize and branches should be inferred from underlying writer, not shared across job and hasher -func New(hasherFunc func() param.SectionWriter) *Hasher { - hs := &Hasher{ +func New(hashFunc param.SectionWriterFunc) *Hasher { + h := &Hasher{ target: newTarget(), index: newJobIndex(9), + params: newTreeParams(hashFunc), } - hs.params = newTreeParams(hasherFunc) - hs.job = newJob(hs.params, hs.target, hs.index, 1, 0) - return hs + h.job = newJob(h.params, h.target, h.index, 1, 0) + return h +} + +func (h *Hasher) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { + h.params = newTreeParams(hashFunc) + return h } // Init implements param.SectionWriter func (h *Hasher) Init(ctx context.Context, errFunc func(error)) { + h.errFunc = errFunc h.params.SetContext(ctx) h.job.start() } diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 910068f355..8b0212a685 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -18,7 +18,7 @@ import ( // TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job func TestHasherJobTopHash(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -43,7 +43,7 @@ func TestHasherJobTopHash(t *testing.T) { // TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher func TestHasherOneFullChunk(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -67,7 +67,7 @@ func TestHasherOneFullChunk(t *testing.T) { // TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds func TestHasherJobChange(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -96,7 +96,7 @@ func TestHasherJobChange(t *testing.T) { // TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher func TestHasherOneFullLevelOneChunk(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -119,7 +119,7 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { func TestHasherVector(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } @@ -169,7 +169,7 @@ func benchmarkHasher(b *testing.B) { dataLength := int(dataLengthParam) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } _, data := testutil.SerialData(dataLength, 255, 0) diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 70c805dc6a..b01853140d 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -84,7 +84,7 @@ func TestJobTargetWithinDefault(t *testing.T) { // TestJobTargetWithinDifferentSections does the same as TestTargetWithinJobDefault but // with SectionSize/Branches settings differeing between client target and underlying writer func TestJobTargetWithinDifferentSections(t *testing.T) { - dummyHashDoubleFunc := func() param.SectionWriter { + dummyHashDoubleFunc := func(_ context.Context) param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } params := newTreeParams(dummyHashDoubleFunc) @@ -385,7 +385,7 @@ func TestJobWriteSpan(t *testing.T) { tgt := newTarget() pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func() param.SectionWriter { + hashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } params := newTreeParams(hashFunc) @@ -436,7 +436,7 @@ func TestJobWriteSpanShuffle(t *testing.T) { tgt := newTarget() pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func() param.SectionWriter { + hashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } params := newTreeParams(hashFunc) @@ -498,7 +498,7 @@ func TestJobWriteDoubleSection(t *testing.T) { //poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) //dataHash := bmt.New(poolSync) writeSize := sectionSize * 2 - dummyHashDoubleFunc := func() param.SectionWriter { + dummyHashDoubleFunc := func(_ context.Context) param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) } params := newTreeParams(dummyHashDoubleFunc) @@ -534,7 +534,7 @@ func TestJobWriteDoubleSection(t *testing.T) { func TestJobVector(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) @@ -617,7 +617,7 @@ func benchmarkJob(b *testing.B) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } dataHash := bmt.New(poolSync) diff --git a/file/hasher/param.go b/file/hasher/param.go index 5bd22977d0..3648f11625 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -16,27 +16,27 @@ type treeParams struct { ChunkSize int Spans []int Debug bool - hashFunc func() param.SectionWriter + hashFunc param.SectionWriterFunc writerPool sync.Pool ctx context.Context } -func newTreeParams(hashFunc func() param.SectionWriter) *treeParams { +func newTreeParams(hashFunc param.SectionWriterFunc) *treeParams { - h := hashFunc() + h := hashFunc(context.Background()) p := &treeParams{ SectionSize: h.SectionSize(), Branches: h.Branches(), ChunkSize: h.SectionSize() * h.Branches(), hashFunc: hashFunc, - ctx: context.Background(), } - h.Reset(p.ctx) + h.Reset(context.Background()) log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) p.writerPool.New = func() interface{} { - return p.hashFunc() + hf := p.hashFunc(p.ctx) + log.Trace("param new hasher", "h", hf) + return hf } - span := 1 for i := 0; i < 9; i++ { p.Spans = append(p.Spans, span) @@ -56,7 +56,6 @@ func (p *treeParams) GetContext() context.Context { func (p *treeParams) PutWriter(w param.SectionWriter) { w.Reset(p.ctx) p.writerPool.Put(w) - } func (p *treeParams) GetWriter() param.SectionWriter { diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 54562ea47b..890b37515b 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -1,6 +1,7 @@ package hasher import ( + "context" "io" "github.com/ethereum/go-ethereum/common/hexutil" @@ -29,7 +30,7 @@ type ReferenceFileHasher struct { // the section count will be the Size() of the hasher func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } f := &ReferenceFileHasher{ diff --git a/file/split_test.go b/file/split_test.go index 6c5452474d..c0370e3a9d 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -37,7 +37,7 @@ var ( // It verifies the returned result func TestSplit(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } h := hasher.New(refHashFunc) @@ -61,14 +61,14 @@ func TestSplit(t *testing.T) { // TestSplitWithDataFileStore verifies chunk.Store sink result for data hashing func TestSplitWithDataFileStore(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() chunkStore := &storage.FakeChunkStore{} - storeFunc := func() param.SectionWriter { + storeFunc := func(_ context.Context) param.SectionWriter { h := store.New(chunkStore, refHashFunc) h.Init(ctx, errFunc) return h @@ -94,14 +94,14 @@ func TestSplitWithDataFileStore(t *testing.T) { // TestSplitWithIntermediateFileStore verifies chunk.Store sink result for intermediate hashing func TestSplitWithIntermediateFileStore(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() chunkStore := &storage.FakeChunkStore{} - storeFunc := func() param.SectionWriter { + storeFunc := func(_ context.Context) param.SectionWriter { h := store.New(chunkStore, refHashFunc) h.Init(ctx, errFunc) return h @@ -127,14 +127,14 @@ func TestSplitWithIntermediateFileStore(t *testing.T) { // TestSplitWithBothFileStore verifies chunk.Store sink result for both data and intermediate hashing func TestSplitWithBothFileStore(t *testing.T) { poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func() param.SectionWriter { + refHashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(poolAsync).NewAsyncWriter(false) } ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() chunkStore := &storage.FakeChunkStore{} - refStoreFunc := func() param.SectionWriter { + refStoreFunc := func(_ context.Context) param.SectionWriter { h := store.New(chunkStore, refHashFunc) h.Init(ctx, errFunc) return h diff --git a/file/store/store.go b/file/store/store.go index 4eddee2494..f149ef1547 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -21,11 +21,17 @@ type FileStore struct { } // New creates a new FileStore with the supplied chunk.Store -func New(chunkStore chunk.Store, writerFunc func() param.SectionWriter) *FileStore { - return &FileStore{ +func New(chunkStore chunk.Store, writerFunc param.SectionWriterFunc) *FileStore { + f := &FileStore{ chunkStore: chunkStore, - w: writerFunc(), } + f.w = writerFunc(f.ctx) + return f +} + +func (f *FileStore) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { + f.w = hashFunc(f.ctx) + return f } // Init implements param.SectionWriter diff --git a/file/store/store_test.go b/file/store/store_test.go index 427358672e..9b14d81022 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -49,7 +49,7 @@ func (s *testChunkStore) Put(_ context.Context, _ chunk.ModePut, chs ...chunk.Ch // through the underlying chunk store func TestStoreWithHasher(t *testing.T) { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - hashFunc := func() param.SectionWriter { + hashFunc := func(_ context.Context) param.SectionWriter { return bmt.New(pool).NewAsyncWriter(false) } diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index 78c4829271..f302fc9f1c 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -25,8 +25,9 @@ func NewCache() *Cache { func (c *Cache) Init(_ context.Context, _ func(error)) { } -func (c *Cache) Link(writeFunc func() param.SectionWriter) { - c.w = writeFunc() +func (c *Cache) Connect(writeFunc param.SectionWriterFunc) param.SectionWriter { + c.w = writeFunc(nil) + return c } func (c *Cache) Write(index int, b []byte) { diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go index 043c459eeb..f4f19414ca 100644 --- a/file/testutillocal/cache_test.go +++ b/file/testutillocal/cache_test.go @@ -7,7 +7,6 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/file/hasher" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" @@ -35,16 +34,16 @@ func TestCache(t *testing.T) { } func TestCacheLink(t *testing.T) { - poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - dataHashFunc := func() param.SectionWriter { - return hasher.NewBMTSyncSectionWriter(bmt.New(poolSync)) + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + refHashFunc := func(_ context.Context) param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) } c := NewCache() c.Init(context.Background(), func(error) {}) - c.Link(dataHashFunc) + c.Connect(refHashFunc) _, data := testutil.SerialData(chunkSize, 255, 0) - c.Write(0, data) + c.Write(-1, data) span := bmt.LengthToSpan(chunkSize) ref := c.Sum(nil, chunkSize, span) refHex := hexutil.Encode(ref) diff --git a/param/io.go b/param/io.go index 7bf2005e37..ff80c8854e 100644 --- a/param/io.go +++ b/param/io.go @@ -4,8 +4,11 @@ import ( "context" ) +type SectionWriterFunc func(ctx context.Context) SectionWriter + // SectionWriter is an asynchronous segment/section writer interface type SectionWriter interface { + Connect(hashFunc SectionWriterFunc) SectionWriter Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination Reset(ctx context.Context) // standard init to be called before reuse Write(index int, data []byte) // write into section of index From edd6f6e52c00e6bd3f0e63a4fc3add465f8fa9e0 Mon Sep 17 00:00:00 2001 From: nolash Date: Sat, 7 Dec 2019 00:07:44 +0100 Subject: [PATCH 49/67] file: Simplify code by adding function for BMT hashfunc create --- file/encrypt/encrypt_test.go | 33 +++++-------- file/hasher/hasher_test.go | 46 +++++------------ file/hasher/job_test.go | 27 +++------- file/hasher/reference.go | 11 ++--- file/split_test.go | 84 +++----------------------------- file/store/store_test.go | 9 ++-- file/testutillocal/cache_test.go | 11 ++--- file/testutillocal/hash.go | 24 +++++++++ 8 files changed, 71 insertions(+), 174 deletions(-) create mode 100644 file/testutillocal/hash.go diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 2838066124..68a1c4cac3 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -32,12 +32,10 @@ func init() { } func TestKey(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - e, err := New(nil, 42, refHashFunc) + hashFunc := testutillocal.NewBMTHasherFunc(0) + + e, err := New(nil, 42, hashFunc) if err != nil { t.Fatal(err) } @@ -79,10 +77,8 @@ func TestKey(t *testing.T) { } func TestEncryptOneChunk(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + + hashFunc := testutillocal.NewBMTHasherFunc(128 * 128) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() @@ -90,7 +86,7 @@ func TestEncryptOneChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(refHashFunc) + cache.Connect(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -122,7 +118,8 @@ func TestEncryptOneChunk(t *testing.T) { t.Fatalf("encrypt onechunk; data mismatch") } - hc := bmt.New(poolAsync) + bmtTreePool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + hc := bmt.New(bmtTreePool) span := bmt.LengthToSpan(len(cipherText)) hc.ResetWithLength(span) hc.Write(cipherText) @@ -134,10 +131,7 @@ func TestEncryptOneChunk(t *testing.T) { } func TestEncryptChunkWholeAndSections(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128 * 128) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() @@ -145,7 +139,7 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(refHashFunc) + cache.Connect(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -186,10 +180,7 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { } func TestEncryptIntermediateChunk(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128 * 128) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel() @@ -200,7 +191,7 @@ func TestEncryptIntermediateChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(refHashFunc) + cache.Connect(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 8b0212a685..dbdca5a552 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -8,22 +8,17 @@ import ( "testing" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/log" - "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" - "golang.org/x/crypto/sha3" ) // TestHasherJobTopHash verifies that the top hash on the first level is correctly set even though the Hasher writes asynchronously to the underlying job func TestHasherJobTopHash(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(0) _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) @@ -42,13 +37,10 @@ func TestHasherJobTopHash(t *testing.T) { // TestHasherOneFullChunk verifies the result of writing a single data chunk to Hasher func TestHasherOneFullChunk(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(0) _, data := testutil.SerialData(chunkSize*branches, 255, 0) - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) @@ -66,13 +58,10 @@ func TestHasherOneFullChunk(t *testing.T) { // TestHasherOneFullChunk verifies that Hasher creates new jobs on branch thresholds func TestHasherJobChange(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(0) _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) @@ -95,13 +84,10 @@ func TestHasherJobChange(t *testing.T) { // TestHasherONeFullLevelOneChunk verifies the result of writing branches times data chunks to Hasher func TestHasherOneFullLevelOneChunk(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128) _, data := testutil.SerialData(chunkSize*branches*branches, 255, 0) - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) @@ -118,16 +104,13 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { } func TestHasherVector(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128) var mismatch int for i, dataLength := range dataLengths { log.Info("hashervector start", "i", i, "l", dataLength) eq := true - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) @@ -168,14 +151,11 @@ func benchmarkHasher(b *testing.B) { } dataLength := int(dataLengthParam) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128) _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { - h := New(refHashFunc) + h := New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, logErrFunc) diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index b01853140d..1d09fd5cab 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -11,6 +11,7 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" @@ -384,10 +385,7 @@ func TestJobWriteFull(t *testing.T) { func TestJobWriteSpan(t *testing.T) { tgt := newTarget() - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(pool).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(0) params := newTreeParams(hashFunc) jb := newJob(params, tgt, nil, 1, 0) @@ -435,10 +433,7 @@ func TestJobWriteSpan(t *testing.T) { func TestJobWriteSpanShuffle(t *testing.T) { tgt := newTarget() - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - hashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(pool).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(0) params := newTreeParams(hashFunc) jb := newJob(params, tgt, nil, 1, 0) @@ -495,8 +490,6 @@ func TestJobWriteSpanShuffle(t *testing.T) { } func TestJobWriteDoubleSection(t *testing.T) { - //poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - //dataHash := bmt.New(poolSync) writeSize := sectionSize * 2 dummyHashDoubleFunc := func(_ context.Context) param.SectionWriter { return newDummySectionWriter(chunkSize, sectionSize*2, sectionSize*2, branches/2) @@ -533,12 +526,9 @@ func TestJobWriteDoubleSection(t *testing.T) { // TODO: vet dynamically against the referencefilehasher instead of expect vector func TestJobVector(t *testing.T) { poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } dataHash := bmt.New(poolSync) - params := newTreeParams(refHashFunc) + hashFunc := testutillocal.NewBMTHasherFunc(0) + params := newTreeParams(hashFunc) var mismatch int for i := start; i < end; i++ { @@ -616,12 +606,9 @@ func benchmarkJob(b *testing.B) { dataLength := int(dataLengthParam) poolSync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } dataHash := bmt.New(poolSync) - treeParams := newTreeParams(refHashFunc) + hashFunc := testutillocal.NewBMTHasherFunc(0) + treeParams := newTreeParams(hashFunc) _, data := testutil.SerialData(dataLength, 255, 0) for j := 0; j < b.N; j++ { diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 890b37515b..b80d5b4fc9 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -1,14 +1,12 @@ package hasher import ( - "context" "io" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/log" - "github.com/ethersphere/swarm/param" - "golang.org/x/crypto/sha3" ) // ReferenceFileHasher is a non-performant source of truth implementation for the file hashing algorithm used in Swarm @@ -29,12 +27,9 @@ type ReferenceFileHasher struct { // NewReferenceFileHasher creates a new file hasher with the supplied branch factor // the section count will be the Size() of the hasher func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128) f := &ReferenceFileHasher{ - params: newTreeParams(refHashFunc), + params: newTreeParams(hashFunc), hasher: hasher, chunkSize: branches * hasher.Size(), } diff --git a/file/split_test.go b/file/split_test.go index c0370e3a9d..b7ffd043b3 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -6,14 +6,13 @@ import ( "time" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/file/hasher" "github.com/ethersphere/swarm/file/store" + "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/storage" "github.com/ethersphere/swarm/testutil" - "golang.org/x/crypto/sha3" ) const ( @@ -36,11 +35,9 @@ var ( // a Hasher as the underlying param.SectionWriter // It verifies the returned result func TestSplit(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - h := hasher.New(refHashFunc) + + hashFunc := testutillocal.NewBMTHasherFunc(0) + h := hasher.New(hashFunc) ctx, cancel := context.WithCancel(context.Background()) defer cancel() h.Init(ctx, errFunc) @@ -60,16 +57,13 @@ func TestSplit(t *testing.T) { // TestSplitWithDataFileStore verifies chunk.Store sink result for data hashing func TestSplitWithDataFileStore(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + hashFunc := testutillocal.NewBMTHasherFunc(128) ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) defer cancel() chunkStore := &storage.FakeChunkStore{} storeFunc := func(_ context.Context) param.SectionWriter { - h := store.New(chunkStore, refHashFunc) + h := store.New(chunkStore, hashFunc) h.Init(ctx, errFunc) return h } @@ -90,69 +84,3 @@ func TestSplitWithDataFileStore(t *testing.T) { t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) } } - -// TestSplitWithIntermediateFileStore verifies chunk.Store sink result for intermediate hashing -func TestSplitWithIntermediateFileStore(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) - defer cancel() - chunkStore := &storage.FakeChunkStore{} - storeFunc := func(_ context.Context) param.SectionWriter { - h := store.New(chunkStore, refHashFunc) - h.Init(ctx, errFunc) - return h - } - - h := hasher.New(storeFunc) - h.Init(ctx, errFunc) - - r, _ := testutil.SerialData(chunkSize*2, 255, 0) - s := NewSplitter(r, h) - ref, err := s.Split() - if err != nil { - t.Fatal(err) - } - time.Sleep(time.Second) - refHex := hexutil.Encode(ref) - correctRefHex := "0x29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9" - if refHex != correctRefHex { - t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) - } -} - -// TestSplitWithBothFileStore verifies chunk.Store sink result for both data and intermediate hashing -func TestSplitWithBothFileStore(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) - defer cancel() - chunkStore := &storage.FakeChunkStore{} - refStoreFunc := func(_ context.Context) param.SectionWriter { - h := store.New(chunkStore, refHashFunc) - h.Init(ctx, errFunc) - return h - } - - h := hasher.New(refStoreFunc) - h.Init(ctx, errFunc) - - r, _ := testutil.SerialData(chunkSize*128, 255, 0) - s := NewSplitter(r, h) - ref, err := s.Split() - if err != nil { - t.Fatal(err) - } - time.Sleep(time.Second) - refHex := hexutil.Encode(ref) - correctRefHex := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" - if refHex != correctRefHex { - t.Fatalf("split, expected %s, got %s", correctRefHex, refHex) - } -} diff --git a/file/store/store_test.go b/file/store/store_test.go index 9b14d81022..359cd29d37 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -8,10 +8,9 @@ import ( "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/chunk" - "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/storage" "github.com/ethersphere/swarm/testutil" - "golang.org/x/crypto/sha3" ) const ( @@ -48,10 +47,8 @@ func (s *testChunkStore) Put(_ context.Context, _ chunk.ModePut, chs ...chunk.Ch // TestStoreWithHasher writes a single chunk and verifies the asynchronusly received chunk // through the underlying chunk store func TestStoreWithHasher(t *testing.T) { - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize*128) - hashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(pool).NewAsyncWriter(false) - } + + hashFunc := testutillocal.NewBMTHasherFunc(128) // initialize chunk store with channel to intercept chunk chunkC := make(chan chunk.Chunk) diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go index f4f19414ca..c17fd2c878 100644 --- a/file/testutillocal/cache_test.go +++ b/file/testutillocal/cache_test.go @@ -7,14 +7,11 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" - "golang.org/x/crypto/sha3" ) const ( sectionSize = 32 - branches = 128 chunkSize = 4096 ) @@ -34,14 +31,12 @@ func TestCache(t *testing.T) { } func TestCacheLink(t *testing.T) { - poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - refHashFunc := func(_ context.Context) param.SectionWriter { - return bmt.New(poolAsync).NewAsyncWriter(false) - } + + hashFunc := NewBMTHasherFunc(0) c := NewCache() c.Init(context.Background(), func(error) {}) - c.Connect(refHashFunc) + c.Connect(hashFunc) _, data := testutil.SerialData(chunkSize, 255, 0) c.Write(-1, data) span := bmt.LengthToSpan(chunkSize) diff --git a/file/testutillocal/hash.go b/file/testutillocal/hash.go new file mode 100644 index 0000000000..d30e513f37 --- /dev/null +++ b/file/testutillocal/hash.go @@ -0,0 +1,24 @@ +package testutillocal + +import ( + "context" + + "github.com/ethersphere/swarm/bmt" + "github.com/ethersphere/swarm/param" + "golang.org/x/crypto/sha3" +) + +var ( + branches = 128 +) + +func NewBMTHasherFunc(poolSize int) param.SectionWriterFunc { + if poolSize == 0 { + poolSize = bmt.PoolSize + } + poolAsync := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, poolSize) + refHashFunc := func(_ context.Context) param.SectionWriter { + return bmt.New(poolAsync).NewAsyncWriter(false) + } + return refHashFunc +} From 888d38f8ae50c715ce8f13afb7c41eebeced2921 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 12:24:38 +0100 Subject: [PATCH 50/67] file: Extract generate span sizes to separate function --- file/hasher/param.go | 6 +----- file/hasher/util.go | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/file/hasher/param.go b/file/hasher/param.go index 3648f11625..eca97e2db9 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -37,11 +37,7 @@ func newTreeParams(hashFunc param.SectionWriterFunc) *treeParams { log.Trace("param new hasher", "h", hf) return hf } - span := 1 - for i := 0; i < 9; i++ { - p.Spans = append(p.Spans, span) - span *= p.Branches - } + p.Spans = generateSpanSizes(p.branches, 9) return p } diff --git a/file/hasher/util.go b/file/hasher/util.go index b082789ac5..f0e9aef4e5 100644 --- a/file/hasher/util.go +++ b/file/hasher/util.go @@ -4,13 +4,23 @@ import ( "math" ) -// TODO: use params instead of sectionSize +// TODO: level 0 should be SectionSize() not Branches() +// generates a dictionary of maximum span lengths per level represented by one SectionSize() of data +func generateSpanSizes(branches int, levels int) int { + spans := make([]int, levels) + span := 1 + for i := 0; i < 9; i++ { + spans = append(spans, span) + span *= p.Branches + } + return spans +} + // calculates the section index of the given byte size func dataSizeToSectionIndex(length int, sectionSize int) int { return (length - 1) / sectionSize } -// TODO: use params instead of sectionSize // calculates the section count of the given byte size func dataSizeToSectionCount(length int, sectionSize int) int { return dataSizeToSectionIndex(length, sectionSize) + 1 From f790e339c850fd2ff56071b3b0944eb7bf9d472a Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 14:50:34 +0100 Subject: [PATCH 51/67] file, bmt: Simplify ReferenceHasher --- bmt/bmt.go | 17 +++ file/hasher/common_test.go | 2 +- file/hasher/param.go | 2 +- file/hasher/reference.go | 245 +++++++++++++++------------------- file/hasher/reference_test.go | 36 +++-- file/hasher/util.go | 6 +- 6 files changed, 154 insertions(+), 154 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 687a680c89..e5c2acf10e 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -284,10 +284,15 @@ func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree { // methods needed to implement hash.Hash // Size returns the size +// TODO: Remove func (h *Hasher) Size() int { return h.pool.SegmentSize } +func (h *Hasher) SectionSize() int { + return h.pool.SegmentSize +} + // BlockSize returns the block size func (h *Hasher) BlockSize() int { return 2 * h.pool.SegmentSize @@ -297,10 +302,22 @@ func (h *Hasher) ChunkSize() int { return h.pool.Size } +// TODO: remove func (h *Hasher) Count() int { return h.pool.SegmentCount } +func (h *Hasher) Branches() int { + return h.pool.SegmentCount +} + +func (h *Hasher) DigestSize() int { + return h.pool.SegmentSize +} + +func (h *Hasher) Init(_ context.Context, _ func(error)) { +} + // Sum returns the BMT root hash of the buffer // using Sum presupposes sequential synchronous writes (io.Writer interface) // hash.Hash interface Sum method appends the byte slice to the underlying diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 9a5498dd25..74fdba0ace 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -68,7 +68,7 @@ var ( } start = 0 - end = 14 //len(dataLengths) + end = len(dataLengths) ) var ( diff --git a/file/hasher/param.go b/file/hasher/param.go index eca97e2db9..591c921ef6 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -37,7 +37,7 @@ func newTreeParams(hashFunc param.SectionWriterFunc) *treeParams { log.Trace("param new hasher", "h", hf) return hf } - p.Spans = generateSpanSizes(p.branches, 9) + p.Spans = generateSpanSizes(p.Branches, 9) return p } diff --git a/file/hasher/reference.go b/file/hasher/reference.go index b80d5b4fc9..09088ef72b 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -1,167 +1,140 @@ package hasher import ( - "io" + "context" - "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" - "github.com/ethersphere/swarm/file/testutillocal" "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" ) -// ReferenceFileHasher is a non-performant source of truth implementation for the file hashing algorithm used in Swarm -// the aim of its design is that is should be easy to understand -// TODO: bmt.Hasher should instead be passed as hash.Hash and ResetWithLength() should be abolished -type ReferenceFileHasher struct { - params *treeParams - hasher *bmt.Hasher // synchronous hasher - chunkSize int // cached chunk size, equals branches * sectionSize - buffer []byte // keeps intermediate chunks during hashing - cursors []int // write cursors in sectionSize units for each tree level - totalBytes int // total data bytes to be written - totalLevel int // total number of levels in tree. (level 0 is the data level) - writeByteCount int // amount of bytes currently written - writeCount int // amount of sections currently written +type BMTHasherSectionWriter struct { + *bmt.Hasher } -// NewReferenceFileHasher creates a new file hasher with the supplied branch factor -// the section count will be the Size() of the hasher -func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher { - hashFunc := testutillocal.NewBMTHasherFunc(128) - f := &ReferenceFileHasher{ - params: newTreeParams(hashFunc), - hasher: hasher, - chunkSize: branches * hasher.Size(), - } - return f +func (b *BMTHasherSectionWriter) Write(_ int, data []byte) { + b.Hasher.Write(data) +} + +func (b *BMTHasherSectionWriter) Sum(data []byte, _ int, _ []byte) []byte { + return b.Hasher.Sum(data) +} + +func (b *BMTHasherSectionWriter) Connect(_ param.SectionWriterFunc) param.SectionWriter { + return b } -// Hash executes l reads of up to sectionSize bytes from r -// and performs the filehashing algorithm on the data -// it returns the root hash -func (f *ReferenceFileHasher) Hash(r io.Reader, l int) []byte { +func (b *BMTHasherSectionWriter) Reset(_ context.Context) { + b.Hasher.Reset() +} - f.totalBytes = l - f.totalLevel = getLevelsFromLength(l, f.params.SectionSize, f.params.Branches) + 1 - log.Trace("Starting reference file hasher", "levels", f.totalLevel, "length", f.totalBytes, "b", f.params.Branches, "s", f.params.SectionSize) +// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm +type ReferenceHasher struct { + params *treeParams + cursors []int // section write position, indexed per level + length int // number of bytes written to the data level of the hasher + buffer []byte // keeps data and hashes, indexed by cursors + counts []int // number of sums performed, indexed per level + hasher *bmt.Hasher // underlying hasher +} - // prepare a buffer for intermediate the chunks - bufLen := f.params.SectionSize - for i := 1; i < f.totalLevel; i++ { - bufLen *= f.params.Branches +// NewReferenceHasher constructs and returns a new ReferenceHasher +func NewReferenceHasher(params *treeParams) *ReferenceHasher { + // TODO: remove when bmt interface is amended + h := params.GetWriter().(*BMTHasherSectionWriter).Hasher + return &ReferenceHasher{ + params: params, + cursors: make([]int, 9), + counts: make([]int, 9), + buffer: make([]byte, params.ChunkSize*9), + hasher: h, } - f.buffer = make([]byte, bufLen) - f.cursors = make([]int, f.totalLevel) - - var res bool - for !res { - - // read a data section into input copy buffer - input := make([]byte, f.params.SectionSize) - c, err := r.Read(input) - log.Trace("read", "bytes", c, "total read", f.writeByteCount) - if err != nil { - if err == io.EOF { - panic("EOF") - } else { - panic(err) - } - } +} - // read only up to the announced length, since we dimensioned buffer and level count accordingly - readSize := f.params.SectionSize - remainingBytes := f.totalBytes - f.writeByteCount - if remainingBytes <= f.params.SectionSize { - readSize = remainingBytes - input = input[:remainingBytes] - res = true +// Hash computes and returns the root hash of arbitrary data +func (r *ReferenceHasher) Hash(data []byte) []byte { + l := r.params.ChunkSize + for i := 0; i < len(data); i += r.params.ChunkSize { + if len(data)-i < r.params.ChunkSize { + l = len(data) - i } - f.writeByteCount += readSize - f.write(input, 0, res) + r.update(0, data[i:i+l]) } - if f.cursors[f.totalLevel-1] != 0 { - panic("totallevel cursor misaligned") + for i := 0; i < 9; i++ { + log.Trace("cursor", "lvl", i, "pos", r.cursors[i]) } - return f.buffer[0:f.params.SectionSize] + return r.digest() } -// performs recursive hashing on complete batches or data end -func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool { +// write to the data buffer on the specified level +// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash +// adjusts cursors accordingly +func (r *ReferenceHasher) update(lvl int, data []byte) { + if lvl == 0 { + r.length += len(data) + } + copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data) + r.cursors[lvl] += len(data) + if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize { + ref := r.sum(lvl) + r.update(lvl+1, ref) + r.cursors[lvl] = r.cursors[lvl+1] + } +} - log.Trace("write", "level", level, "bytes", len(b), "total written", f.writeByteCount, "end", end, "data", hexutil.Encode(b)) +// calculates and returns the bmt sum of the last written data on the level +func (r *ReferenceHasher) sum(lvl int) []byte { + r.counts[lvl]++ + spanSize := r.params.Spans[lvl] * r.params.ChunkSize + span := (r.length-1)%spanSize + 1 + spanBytes := bmt.LengthToSpan(span) - // copy data from input copy buffer to current position of corresponding level in intermediate chunk buffer - copy(f.buffer[f.cursors[level]*f.params.SectionSize:], b) - for i, l := range f.cursors { - log.Trace("cursor", "level", i, "position", l) - } + toSumSize := r.cursors[lvl] - r.cursors[lvl+1] - // if we are at the tree root the result will be in the first sectionSize bytes of the buffer. - // the true bool return will bubble up to the data write frame in the call stack and terminate the loop - //if level == len(f.cursors)-1 { - if level == f.totalLevel-1 { - return true - } + r.hasher.ResetWithLength(spanBytes) + r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) + ref := r.hasher.Sum(nil) + return ref +} - // if we are at the end of the write, AND - // if the offset of a chunk reference is the same one level up, THEN - // we have a "dangling chunk" and we merely pass it to the next level - if end && level > 0 && f.cursors[level] == f.cursors[level+1] { - res := f.write(b, level+1, end) - return res +// called after all data has been written +// sums the final chunks of each level +// skips intermediate levels that end on span boundary +func (r *ReferenceHasher) digest() []byte { + + // if we did not end on a chunk boundary, the last chunk hasn't been hashed + // we need to do this first + if r.length%r.params.ChunkSize != 0 { + ref := r.sum(0) + copy(r.buffer[r.cursors[1]:], ref) + r.cursors[1] += len(ref) + r.cursors[0] = r.cursors[1] } - // we've written to the buffer a particular level - // so we increment the cursor of that level - f.cursors[level]++ - - // hash the intermediate chunk buffer data for this level if: - // - the difference of cursors between this level and the one above equals the branch factor (equals one full chunk of data) - // - end is set - // the resulting digest will be written to the corresponding section of the level above - var res bool - if f.cursors[level]-f.cursors[level+1] == f.params.Branches || end { - - // calculate the actual data under this span - // if we're at end, the span is given by the period of the potential span - // if not, it will be the full span (since we then must have full chunk writes in the levels below) - var dataUnderSpan int - span := f.params.Spans[level] * f.params.ChunkSize - if end { - dataUnderSpan = (f.totalBytes-1)%span + 1 - } else { - dataUnderSpan = span - } - - // calculate the data in this chunk (the data to be hashed) - // on level 0 it is merely the actual spanned data - // on levels above data level, we get number of sections the data equals, and divide by the level span - var hashDataSize int - if level == 0 { - hashDataSize = dataUnderSpan - } else { - dataSectionCount := dataSizeToSectionCount(dataUnderSpan, f.params.SectionSize) - // TODO: this is the same as dataSectionToLevelSection, but without wrap to 0 on end boundary. Inspect whether the function should be amended, and necessary changes made to Hasher - levelSectionCount := (dataSectionCount-1)/f.params.Spans[level] + 1 - hashDataSize = levelSectionCount * f.params.SectionSize + // calculate the total number of levels needed to represent the data (including the data level) + targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches) + + // sum every intermediate level and write to the level above it + for i := 1; i < targetLevel; i++ { + + // if the tree is balanced or if there is a single reference outside a balanced tree on this level + // don't hash it again but pass it on to the next level + if r.counts[i] > 0 { + // TODO: simplify if possible + if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 { + log.Trace("skip") + r.cursors[i+1] = r.cursors[i] + r.cursors[i] = r.cursors[i-1] + continue + } } - // prepare the hasher, - // write data since previous hash operation from the current level cursor position - // and sum - spanBytes := bmt.LengthToSpan(dataUnderSpan) - f.hasher.ResetWithLength(spanBytes) - hasherWriteOffset := f.cursors[level+1] * f.params.SectionSize - f.hasher.Write(f.buffer[hasherWriteOffset : hasherWriteOffset+hashDataSize]) - hashResult := f.hasher.Sum(nil) - log.Debug("summed", "level", level, "cursor", f.cursors[level], "parent cursor", f.cursors[level+1], "span", spanBytes, "digest", hexutil.Encode(hashResult)) - - // write the digest to the current cursor position of the next level - // note the f.write() call will move the next level's cursor according to the write and possible hash operation - res = f.write(hashResult, level+1, end) - - // recycle buffer space from the threshold of just written hash - f.cursors[level] = f.cursors[level+1] + ref := r.sum(i) + copy(r.buffer[r.cursors[i+1]:], ref) + r.cursors[i+1] += len(ref) + r.cursors[i] = r.cursors[i+1] } - return res + + // the first section of the buffer will hold the root hash + return r.buffer[:r.params.SectionSize] } diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go index c73bc84d69..27909326ef 100644 --- a/file/hasher/reference_test.go +++ b/file/hasher/reference_test.go @@ -1,6 +1,7 @@ package hasher import ( + "context" "fmt" "strconv" "strings" @@ -9,6 +10,7 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" + "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" ) @@ -84,16 +86,21 @@ func TestManualDanglingChunk(t *testing.T) { // the "expected" array in common_test.go is generated by this implementation, and test failure due to // result mismatch is nothing else than an indication that something has changed in the reference filehasher // or the underlying hashing algorithm -func TestReferenceFileHasherVector(t *testing.T) { - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - h := bmt.New(pool) +func TestReferenceHasherVector(t *testing.T) { + + hashFunc := func(_ context.Context) param.SectionWriter { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + h := bmt.New(pool) + return &BMTHasherSectionWriter{Hasher: h} + } + params := newTreeParams(hashFunc) var mismatch int for i := start; i < end; i++ { dataLength := dataLengths[i] log.Info("start", "i", i, "len", dataLength) - fh := NewReferenceFileHasher(h, branches) - r, data := testutil.SerialData(dataLength, 255, 0) - refHash := fh.Hash(r, len(data)) + rh := NewReferenceHasher(params) + _, data := testutil.SerialData(dataLength, 255, 0) + refHash := rh.Hash(data) eq := true if expected[i] != fmt.Sprintf("%x", refHash) { mismatch++ @@ -114,19 +121,22 @@ func BenchmarkReferenceHasher(b *testing.B) { } } -func benchmarkReferenceFileHasher(b *testing.B) { - params := strings.Split(b.Name(), "/") - dataLength, err := strconv.ParseInt(params[1], 10, 64) +func benchmarkReferenceHasher(b *testing.B) { + benchParams := strings.Split(b.Name(), "/") + dataLength, err := strconv.ParseInt(benchParams[1], 10, 64) if err != nil { b.Fatal(err) } - pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - log.Trace("running reference bench", "l", dataLength) + hashFunc := func(_ context.Context) param.SectionWriter { + pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) + h := bmt.New(pool) + return &BMTHasherSectionWriter{Hasher: h} + } + params := newTreeParams(hashFunc) b.ResetTimer() for i := 0; i < b.N; i++ { r, data := testutil.SerialData(int(dataLength), 255, 0) - h := bmt.New(pool) - fh := NewReferenceFileHasher(h, branches) + fh := NewReferenceFileHasher(params) fh.Hash(r, len(data)) } } diff --git a/file/hasher/util.go b/file/hasher/util.go index f0e9aef4e5..8dd8b4a27f 100644 --- a/file/hasher/util.go +++ b/file/hasher/util.go @@ -6,12 +6,12 @@ import ( // TODO: level 0 should be SectionSize() not Branches() // generates a dictionary of maximum span lengths per level represented by one SectionSize() of data -func generateSpanSizes(branches int, levels int) int { +func generateSpanSizes(branches int, levels int) []int { spans := make([]int, levels) span := 1 for i := 0; i < 9; i++ { - spans = append(spans, span) - span *= p.Branches + spans[i] = span + span *= branches } return spans } From 1c877e406d0bd92952c61dd0f931c9ca102e00b2 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 14:53:44 +0100 Subject: [PATCH 52/67] file: Fix wrong call in ReferenceHasher benchmark --- file/hasher/reference_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go index 27909326ef..37b83e7a6a 100644 --- a/file/hasher/reference_test.go +++ b/file/hasher/reference_test.go @@ -117,7 +117,7 @@ func TestReferenceHasherVector(t *testing.T) { // it will be vastly inefficient func BenchmarkReferenceHasher(b *testing.B) { for i := start; i < end; i++ { - b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceFileHasher) + b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceHasher) } } @@ -135,8 +135,8 @@ func benchmarkReferenceHasher(b *testing.B) { params := newTreeParams(hashFunc) b.ResetTimer() for i := 0; i < b.N; i++ { - r, data := testutil.SerialData(int(dataLength), 255, 0) - fh := NewReferenceFileHasher(params) - fh.Hash(r, len(data)) + _, data := testutil.SerialData(int(dataLength), 255, 0) + fh := NewReferenceHasher(params) + fh.Hash(data) } } From 83edc4b14ea081d23b4312922a721112583c1c28 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 17:02:17 +0100 Subject: [PATCH 53/67] bmt, param: Use hash.Hash for SectionWriter, implement in bmt.Hasher --- bmt/bmt.go | 79 +++++++++++++++++++++++++++++++---------------------- param/io.go | 23 ++++++++++++---- 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index e5c2acf10e..76b735416c 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -281,41 +281,43 @@ func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree { } } -// methods needed to implement hash.Hash - -// Size returns the size -// TODO: Remove -func (h *Hasher) Size() int { - return h.pool.SegmentSize -} - +// Implements param.SectionWriter func (h *Hasher) SectionSize() int { return h.pool.SegmentSize } -// BlockSize returns the block size -func (h *Hasher) BlockSize() int { - return 2 * h.pool.SegmentSize +func (h *Hasher) SetLength(length int) { + span := LengthToSpan(length) + h.getTree().span = span } -func (h *Hasher) ChunkSize() int { - return h.pool.Size -} +//func (h *Hasher) Count() int { +// return h.pool.SegmentCount +//} -// TODO: remove -func (h *Hasher) Count() int { +// Implements param.SectionWriter +func (h *Hasher) Branches() int { return h.pool.SegmentCount } -func (h *Hasher) Branches() int { - return h.pool.SegmentCount +// Implements param.SectionWriter +func (h *Hasher) Init(_ context.Context, _ func(error)) { } -func (h *Hasher) DigestSize() int { +// Size returns the digest size +// Implements hash.Hash in param.SectionWriter +func (h *Hasher) Size() int { return h.pool.SegmentSize } -func (h *Hasher) Init(_ context.Context, _ func(error)) { +func (h *Hasher) Seek(offset int64, whence int) (int64, error) { + return 0, nil +} + +// BlockSize returns the block size +// Implements hash.Hash in param.SectionWriter +func (h *Hasher) BlockSize() int { + return 2 * h.pool.SegmentSize } // Sum returns the BMT root hash of the buffer @@ -323,6 +325,7 @@ func (h *Hasher) Init(_ context.Context, _ func(error)) { // hash.Hash interface Sum method appends the byte slice to the underlying // data before it calculates and returns the hash of the chunk // caller must make sure Sum is not called concurrently with Write, writeSection +// Implements hash.Hash in param.SectionWriter func (h *Hasher) Sum(b []byte) (s []byte) { t := h.getTree() // write the last section with final flag set to true @@ -339,10 +342,9 @@ func (h *Hasher) Sum(b []byte) (s []byte) { return doSum(h.pool.hasher(), b, span, s) } -// methods needed to implement the SwarmHash and the io.Writer interfaces - // Write calls sequentially add to the buffer to be hashed, // with every full segment calls writeSection in a go routine +// Implements hash.Hash (io.Writer) in param.SectionWriter func (h *Hasher) Write(b []byte) (int, error) { l := len(b) if l == 0 || l > h.pool.Size { @@ -389,6 +391,7 @@ func (h *Hasher) Write(b []byte) (int, error) { } // Reset needs to be called before writing to the hasher +// Implements hash.Hash in param.SectionWriter func (h *Hasher) Reset() { h.releaseTree() } @@ -398,10 +401,10 @@ func (h *Hasher) Reset() { // ResetWithLength needs to be called before writing to the hasher // the argument is supposed to be the byte slice binary representation of // the length of the data subsumed under the hash, i.e., span -func (h *Hasher) ResetWithLength(span []byte) { - h.Reset() - h.getTree().span = span -} +//func (h *Hasher) ResetWithLength(span []byte) { +// h.Reset() +// h.getTree().span = span +//} // releaseTree gives back the Tree to the pool whereby it unlocks // it resets tree, segment and index @@ -476,13 +479,13 @@ func (sw *AsyncHasher) Init(_ context.Context, errFunc func(error)) { } // Implements param.SectionWriter -func (sw *AsyncHasher) Reset(_ context.Context) { +func (sw *AsyncHasher) Reset() { sw.all = false sw.Hasher.Reset() } // Implements param.SectionWriter -func (sw *AsyncHasher) Connect(_ param.SectionWriterFunc) param.SectionWriter { +func (sw *AsyncHasher) SetWriter(_ param.SectionWriterFunc) param.SectionWriter { log.Warn("Asynchasher does not currently support SectionWriter chaining") return sw } @@ -508,10 +511,17 @@ func (sw *AsyncHasher) Branches() int { // Write writes the i-th section of the BMT base // this function can and is meant to be called concurrently // it sets max segment threadsafely -func (sw *AsyncHasher) Write(i int, section []byte) { +func (sw *AsyncHasher) Write(section []byte) (int, error) { + sw.writeSection(0, section) + return 0, nil +} + +func (sw *AsyncHasher) writeSection(i int, section []byte) { if i < 0 { - span := LengthToSpan(len(section)) - sw.Hasher.ResetWithLength(span) + //span := LengthToSpan(len(section)) + //sw.Hasher.ResetWithLength(span) + sw.Hasher.Reset() + sw.Hasher.SetLength(len(section)) sw.Hasher.Write(section) sw.all = true return @@ -556,7 +566,12 @@ func (sw *AsyncHasher) Write(i int, section []byte) { // length: known length of the input (unsafe; undefined if out of range) // meta: metadata to hash together with BMT root for the final digest // e.g., span for protection against existential forgery -func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) { + +func (sw *AsyncHasher) Sum(b []byte) []byte { + return sw.sum(b, 0, nil) +} + +func (sw *AsyncHasher) sum(b []byte, length int, meta []byte) (s []byte) { if sw.all { return sw.Hasher.Sum(nil) } diff --git a/param/io.go b/param/io.go index ff80c8854e..e093c12e9d 100644 --- a/param/io.go +++ b/param/io.go @@ -2,18 +2,31 @@ package param import ( "context" + "hash" + "io" ) type SectionWriterFunc func(ctx context.Context) SectionWriter // SectionWriter is an asynchronous segment/section writer interface +//type SectionWriter interface { +// Connect(hashFunc SectionWriterFunc) SectionWriter +// Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination +// Reset(ctx context.Context) // standard init to be called before reuse +// Write(index int, data []byte) // write into section of index +// Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer +// SectionSize() int // size of the async section unit to use +// DigestSize() int +// Branches() int +//} + type SectionWriter interface { - Connect(hashFunc SectionWriterFunc) SectionWriter + hash.Hash + io.Seeker + SetWriter(hashFunc SectionWriterFunc) SectionWriter Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination - Reset(ctx context.Context) // standard init to be called before reuse - Write(index int, data []byte) // write into section of index - Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer - SectionSize() int // size of the async section unit to use + SetLength(length int) + SectionSize() int // size of the async section unit to use DigestSize() int Branches() int } From d18b51cba31675b6356eb6457371a775a2426886 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 18:04:10 +0100 Subject: [PATCH 54/67] file: Move reference hasher to own package, bmt compiles w changes --- bmt/bmt.go | 59 +++++++++++-------- bmt/bmt_test.go | 39 ++++++------ file/hasher/{ => reference}/common_test.go | 0 file/hasher/{ => reference}/pyramid_test.go | 0 file/hasher/{ => reference}/reference.go | 0 file/hasher/{ => reference}/reference_test.go | 0 6 files changed, 57 insertions(+), 41 deletions(-) rename file/hasher/{ => reference}/common_test.go (100%) rename file/hasher/{ => reference}/pyramid_test.go (100%) rename file/hasher/{ => reference}/reference.go (100%) rename file/hasher/{ => reference}/reference_test.go (100%) diff --git a/bmt/bmt.go b/bmt/bmt.go index 76b735416c..2b0528b22a 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -82,6 +82,7 @@ type BaseHasherFunc func() hash.Hash type Hasher struct { pool *TreePool // BMT resource pool bmt *tree // prebuilt BMT resource for flowcontrol and proofs + size int // bytes written to Hasher since last Reset() } // New creates a reusable BMT Hasher that @@ -310,8 +311,14 @@ func (h *Hasher) Size() int { return h.pool.SegmentSize } +// TODO: Rework seek to work for AsyncHasher transparently +// TODO: whence ignored +// Seek sets the section that will be written to on the next Write() +// Implements io.Seeker in param.SectionWriter func (h *Hasher) Seek(offset int64, whence int) (int64, error) { - return 0, nil + //return 0, errors.New("Seek not supported currently, use AsyncHasher for Seek") + h.getTree().cursor = int(offset) + return offset, nil } // BlockSize returns the block size @@ -326,6 +333,7 @@ func (h *Hasher) BlockSize() int { // data before it calculates and returns the hash of the chunk // caller must make sure Sum is not called concurrently with Write, writeSection // Implements hash.Hash in param.SectionWriter +// TODO: if span is nil return the zero-hash func (h *Hasher) Sum(b []byte) (s []byte) { t := h.getTree() // write the last section with final flag set to true @@ -336,7 +344,8 @@ func (h *Hasher) Sum(b []byte) (s []byte) { // release the tree resource back to the pool h.releaseTree() // b + sha3(span + BMT(pure_chunk)) - if len(span) == 0 { + //if len(span) == 0 { + if span == nil { return append(b, s...) } return doSum(h.pool.hasher(), b, span, s) @@ -393,6 +402,7 @@ func (h *Hasher) Write(b []byte) (int, error) { // Reset needs to be called before writing to the hasher // Implements hash.Hash in param.SectionWriter func (h *Hasher) Reset() { + h.size = 0 h.releaseTree() } @@ -472,6 +482,7 @@ type AsyncHasher struct { seccount int // base section count write func(i int, section []byte, final bool) all bool // if all written in one go + index int // index to write to on next Write() call } // Implements param.SectionWriter @@ -512,49 +523,48 @@ func (sw *AsyncHasher) Branches() int { // this function can and is meant to be called concurrently // it sets max segment threadsafely func (sw *AsyncHasher) Write(section []byte) (int, error) { - sw.writeSection(0, section) - return 0, nil + return sw.writeSection(sw.getTree().cursor, section) } -func (sw *AsyncHasher) writeSection(i int, section []byte) { - if i < 0 { - //span := LengthToSpan(len(section)) - //sw.Hasher.ResetWithLength(span) +func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { + // TODO: Temporary workaround for chunkwise write + if sw.index < 0 { sw.Hasher.Reset() sw.Hasher.SetLength(len(section)) sw.Hasher.Write(section) sw.all = true - return + return len(section), nil } sw.mtx.Lock() defer sw.mtx.Unlock() t := sw.getTree() // cursor keeps track of the rightmost section written so far // if index is lower than cursor then just write non-final section as is - if i < t.cursor { + if sw.index < t.cursor { // if index is not the rightmost, safe to write section - go sw.write(i, section, false) - return + go sw.write(sw.index, section, false) + return len(section), nil } // if there is a previous rightmost section safe to write section if t.offset > 0 { - if i == t.cursor { + if sw.index == t.cursor { // i==cursor implies cursor was set by Hash call so we can write section as final one // since it can be shorter, first we copy it to the padded buffer t.section = make([]byte, sw.secsize) copy(t.section, section) - go sw.write(i, t.section, true) - return + go sw.write(sw.index, t.section, true) + return len(section), nil } // the rightmost section just changed, so we write the previous one as non-final go sw.write(t.cursor, t.section, false) } // set i as the index of the righmost section written so far // set t.offset to cursor*secsize+1 - t.cursor = i + t.cursor = sw.index t.offset = i*sw.secsize + 1 t.section = make([]byte, sw.secsize) copy(t.section, section) + return len(section), nil } // Sum can be called any time once the length and the span is known @@ -567,16 +577,14 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) { // meta: metadata to hash together with BMT root for the final digest // e.g., span for protection against existential forgery -func (sw *AsyncHasher) Sum(b []byte) []byte { - return sw.sum(b, 0, nil) -} - -func (sw *AsyncHasher) sum(b []byte, length int, meta []byte) (s []byte) { +//func (sw *AsyncHasher) sum(b []byte, length int, meta []byte) (s []byte) { +func (sw *AsyncHasher) Sum(b []byte) (s []byte) { if sw.all { return sw.Hasher.Sum(nil) } sw.mtx.Lock() t := sw.getTree() + length := sw.Hasher.size if length == 0 { sw.mtx.Unlock() s = sw.pool.zerohashes[sw.pool.Depth] @@ -597,15 +605,17 @@ func (sw *AsyncHasher) sum(b []byte, length int, meta []byte) (s []byte) { } // relesase the tree back to the pool sw.releaseTree() + meta := t.span // if no meta is given just append digest to b - if len(meta) == 0 { - return append(b, s...) - } + //if len(meta) == 0 { + // return append(b, s...) + //} // hash together meta and BMT root hash using the pools return doSum(sw.pool.hasher(), b, meta, s) } // writeSection writes the hash of i-th section into level 1 node of the BMT tree +// TODO: h.size increases even on multiple writes to the same section of a section func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) { // select the leaf node for the section var n *node @@ -613,6 +623,7 @@ func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) { var hasher hash.Hash var level int t := h.getTree() + h.size += len(section) if double { level++ n = t.leaves[i] diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index 071b273ca5..a60268985b 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -143,7 +143,7 @@ func TestHasherEmptyData(t *testing.T) { bmt := New(pool) rbmt := NewRefHasher(hasher, count) refHash := rbmt.Hash(data) - expHash := syncHash(bmt, nil, data) + expHash := syncHash(bmt, 0, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) } @@ -199,12 +199,12 @@ func TestAsyncCorrectness(t *testing.T) { d := data[:n] rbmt := NewRefHasher(hasher, count) exp := rbmt.Hash(d) - got := syncHash(bmt, nil, d) + got := syncHash(bmt, 0, d) if !bytes.Equal(got, exp) { t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got) } sw := bmt.NewAsyncWriter(double) - got = asyncHashRandom(sw, nil, d, wh) + got = asyncHashRandom(sw, 0, d, wh) if !bytes.Equal(got, exp) { t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got) } @@ -290,7 +290,7 @@ func TestBMTWriterBuffers(t *testing.T) { data := testutil.RandomBytes(1, n) rbmt := NewRefHasher(hasher, count) refHash := rbmt.Hash(data) - expHash := syncHash(bmt, nil, data) + expHash := syncHash(bmt, 0, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) } @@ -351,7 +351,7 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun data := d[:n] rbmt := NewRefHasher(hasher, count) exp := sha3hash(span, rbmt.Hash(data)) - got := syncHash(bmt, span, data) + got := syncHash(bmt, n, data) if !bytes.Equal(got, exp) { return fmt.Errorf("wrong hash: expected %x, got %x", exp, got) } @@ -461,7 +461,7 @@ func benchmarkBMT(t *testing.B, n int) { t.ReportAllocs() t.ResetTimer() for i := 0; i < t.N; i++ { - syncHash(bmt, nil, data) + syncHash(bmt, 0, data) } } @@ -479,7 +479,7 @@ func benchmarkBMTAsync(t *testing.B, n int, wh whenHash, double bool) { t.ReportAllocs() t.ResetTimer() for i := 0; i < t.N; i++ { - asyncHash(bmt, nil, n, wh, idxs, segments) + asyncHash(bmt, 0, n, wh, idxs, segments) } } @@ -499,7 +499,7 @@ func benchmarkPool(t *testing.B, poolsize, n int) { go func() { defer wg.Done() bmt := New(pool) - syncHash(bmt, nil, data) + syncHash(bmt, 0, data) }() } wg.Wait() @@ -520,8 +520,9 @@ func benchmarkRefHasher(t *testing.B, n int) { } // Hash hashes the data and the span using the bmt hasher -func syncHash(h *Hasher, span, data []byte) []byte { - h.ResetWithLength(span) +func syncHash(h *Hasher, spanLength int, data []byte) []byte { + h.Reset() + h.SetLength(spanLength) h.Write(data) return h.Sum(nil) } @@ -548,23 +549,25 @@ func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) { } // splits the input data performs a random shuffle to mock async section writes -func asyncHashRandom(bmt param.SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) { +func asyncHashRandom(bmt param.SectionWriter, spanLength int, data []byte, wh whenHash) (s []byte) { idxs, segments := splitAndShuffle(bmt.SectionSize(), data) - return asyncHash(bmt, span, len(data), wh, idxs, segments) + return asyncHash(bmt, spanLength, len(data), wh, idxs, segments) } // mock for async section writes for param.SectionWriter // requires a permutation (a random shuffle) of list of all indexes of segments // and writes them in order to the appropriate section // the Sum function is called according to the wh parameter (first, last, random [relative to segment writes]) -func asyncHash(bmt param.SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { +func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { bmt.Reset() if l == 0 { - return bmt.Sum(nil, l, span) + bmt.SetLength(spanLength) + return bmt.Sum(nil) } c := make(chan []byte, 1) hashf := func() { - c <- bmt.Sum(nil, l, span) + bmt.SetLength(spanLength) + c <- bmt.Sum(nil) } maxsize := len(idxs) var r int @@ -572,13 +575,15 @@ func asyncHash(bmt param.SectionWriter, span []byte, l int, wh whenHash, idxs [] r = rand.Intn(maxsize) } for i, idx := range idxs { - bmt.Write(idx, segments[idx]) + bmt.Seek(int64(idx*bmt.SectionSize()), 0) + bmt.Write(segments[idx]) if (wh == first || wh == random) && i == r { go hashf() } } if wh == last { - return bmt.Sum(nil, l, span) + bmt.SetLength(spanLength) + return bmt.Sum(nil) } return <-c } diff --git a/file/hasher/common_test.go b/file/hasher/reference/common_test.go similarity index 100% rename from file/hasher/common_test.go rename to file/hasher/reference/common_test.go diff --git a/file/hasher/pyramid_test.go b/file/hasher/reference/pyramid_test.go similarity index 100% rename from file/hasher/pyramid_test.go rename to file/hasher/reference/pyramid_test.go diff --git a/file/hasher/reference.go b/file/hasher/reference/reference.go similarity index 100% rename from file/hasher/reference.go rename to file/hasher/reference/reference.go diff --git a/file/hasher/reference_test.go b/file/hasher/reference/reference_test.go similarity index 100% rename from file/hasher/reference_test.go rename to file/hasher/reference/reference_test.go From 63d2149845df17e5ecbf38b8f8cc33fe05428b3b Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 19:35:04 +0100 Subject: [PATCH 55/67] bmt: Tests pass after changes to interface --- bmt/bmt.go | 43 +++++++++++++++++++++++++++++-------------- bmt/bmt_test.go | 32 ++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 2b0528b22a..a8bbcd2b80 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -28,8 +28,13 @@ import ( "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" + "github.com/ethersphere/swarm/testutil" ) +func init() { + testutil.Init() +} + /* Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size. It is defined as the root hash of the binary merkle tree built over fixed size segments @@ -65,6 +70,10 @@ const ( PoolSize = 8 ) +var ( + zeroSpan = make([]byte, 8) +) + // BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. // implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256 type BaseHasherFunc func() hash.Hash @@ -80,9 +89,11 @@ type BaseHasherFunc func() hash.Hash // the tree and itself in a state reusable for hashing a new chunk // - generates and verifies segment inclusion proofs (TODO:) type Hasher struct { - pool *TreePool // BMT resource pool - bmt *tree // prebuilt BMT resource for flowcontrol and proofs - size int // bytes written to Hasher since last Reset() + pool *TreePool // BMT resource pool + bmt *tree // prebuilt BMT resource for flowcontrol and proofs + size int32 // bytes written to Hasher since last Reset() + jobSize int // size of data written in current session + cursor int64 // cursor to write to on next Write() call } // New creates a reusable BMT Hasher that @@ -288,6 +299,7 @@ func (h *Hasher) SectionSize() int { } func (h *Hasher) SetLength(length int) { + h.jobSize = (length-1)%h.pool.Size + 1 span := LengthToSpan(length) h.getTree().span = span } @@ -317,7 +329,7 @@ func (h *Hasher) Size() int { // Implements io.Seeker in param.SectionWriter func (h *Hasher) Seek(offset int64, whence int) (int64, error) { //return 0, errors.New("Seek not supported currently, use AsyncHasher for Seek") - h.getTree().cursor = int(offset) + atomic.StoreInt64(&h.cursor, offset) return offset, nil } @@ -402,6 +414,7 @@ func (h *Hasher) Write(b []byte) (int, error) { // Reset needs to be called before writing to the hasher // Implements hash.Hash in param.SectionWriter func (h *Hasher) Reset() { + h.cursor = 0 h.size = 0 h.releaseTree() } @@ -482,7 +495,6 @@ type AsyncHasher struct { seccount int // base section count write func(i int, section []byte, final bool) all bool // if all written in one go - index int // index to write to on next Write() call } // Implements param.SectionWriter @@ -523,12 +535,13 @@ func (sw *AsyncHasher) Branches() int { // this function can and is meant to be called concurrently // it sets max segment threadsafely func (sw *AsyncHasher) Write(section []byte) (int, error) { - return sw.writeSection(sw.getTree().cursor, section) + c := atomic.LoadInt64(&sw.Hasher.cursor) + return sw.writeSection(int(c)/sw.secsize, section) } func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { // TODO: Temporary workaround for chunkwise write - if sw.index < 0 { + if i < 0 { sw.Hasher.Reset() sw.Hasher.SetLength(len(section)) sw.Hasher.Write(section) @@ -540,19 +553,19 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { t := sw.getTree() // cursor keeps track of the rightmost section written so far // if index is lower than cursor then just write non-final section as is - if sw.index < t.cursor { + if i < t.cursor { // if index is not the rightmost, safe to write section - go sw.write(sw.index, section, false) + go sw.write(i, section, false) return len(section), nil } // if there is a previous rightmost section safe to write section if t.offset > 0 { - if sw.index == t.cursor { + if i == t.cursor { // i==cursor implies cursor was set by Hash call so we can write section as final one // since it can be shorter, first we copy it to the padded buffer t.section = make([]byte, sw.secsize) copy(t.section, section) - go sw.write(sw.index, t.section, true) + go sw.write(i, t.section, true) return len(section), nil } // the rightmost section just changed, so we write the previous one as non-final @@ -560,7 +573,7 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { } // set i as the index of the righmost section written so far // set t.offset to cursor*secsize+1 - t.cursor = sw.index + t.cursor = i t.offset = i*sw.secsize + 1 t.section = make([]byte, sw.secsize) copy(t.section, section) @@ -584,7 +597,8 @@ func (sw *AsyncHasher) Sum(b []byte) (s []byte) { } sw.mtx.Lock() t := sw.getTree() - length := sw.Hasher.size + length := int(sw.Hasher.jobSize) + log.Trace("async sum", "l", length) if length == 0 { sw.mtx.Unlock() s = sw.pool.zerohashes[sw.pool.Depth] @@ -623,7 +637,8 @@ func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) { var hasher hash.Hash var level int t := h.getTree() - h.size += len(section) + log.Trace("hasher writesection adding", "len", len(section)) + atomic.AddInt32(&h.size, int32(len(section))) if double { level++ n = t.leaves[i] diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index a60268985b..46b0a1bb85 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -26,6 +26,7 @@ import ( "testing" "time" + "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" @@ -142,7 +143,11 @@ func TestHasherEmptyData(t *testing.T) { defer pool.Drain(0) bmt := New(pool) rbmt := NewRefHasher(hasher, count) - refHash := rbmt.Hash(data) + refNoMetaHash := rbmt.Hash(data) + h := hasher() + h.Write(zeroSpan) + h.Write(refNoMetaHash) + refHash := h.Sum(nil) expHash := syncHash(bmt, 0, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) @@ -198,15 +203,19 @@ func TestAsyncCorrectness(t *testing.T) { bmt := New(pool) d := data[:n] rbmt := NewRefHasher(hasher, count) - exp := rbmt.Hash(d) + expNoMeta := rbmt.Hash(d) + h := hasher() + h.Write(zeroSpan) + h.Write(expNoMeta) + exp := h.Sum(nil) got := syncHash(bmt, 0, d) if !bytes.Equal(got, exp) { - t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got) + t.Fatalf("wrong sync hash (syncpart) for datalength %v: expected %x (ref), got %x", n, exp, got) } sw := bmt.NewAsyncWriter(double) got = asyncHashRandom(sw, 0, d, wh) if !bytes.Equal(got, exp) { - t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got) + t.Fatalf("wrong async hash (asyncpart) for datalength %v: expected %x, got %x", n, exp, got) } } }) @@ -289,7 +298,11 @@ func TestBMTWriterBuffers(t *testing.T) { bmt := New(pool) data := testutil.RandomBytes(1, n) rbmt := NewRefHasher(hasher, count) - refHash := rbmt.Hash(data) + refNoMetaHash := rbmt.Hash(data) + h := hasher() + h.Write(zeroSpan) + h.Write(refNoMetaHash) + refHash := h.Sum(nil) expHash := syncHash(bmt, 0, data) if !bytes.Equal(expHash, refHash) { t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) @@ -309,6 +322,7 @@ func TestBMTWriterBuffers(t *testing.T) { return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read) } } + bmt.SetLength(0) hash := bmt.Sum(nil) if !bytes.Equal(hash, expHash) { return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash) @@ -347,7 +361,7 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun if len(d) < n { n = len(d) } - binary.BigEndian.PutUint64(span, uint64(n)) + binary.LittleEndian.PutUint64(span, uint64(n)) data := d[:n] rbmt := NewRefHasher(hasher, count) exp := sha3hash(span, rbmt.Hash(data)) @@ -522,7 +536,9 @@ func benchmarkRefHasher(t *testing.B, n int) { // Hash hashes the data and the span using the bmt hasher func syncHash(h *Hasher, spanLength int, data []byte) []byte { h.Reset() + //if spanLength > 0 { h.SetLength(spanLength) + //} h.Write(data) return h.Sum(nil) } @@ -562,11 +578,13 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs bmt.Reset() if l == 0 { bmt.SetLength(spanLength) + bmt.(*AsyncHasher).Hasher.jobSize = l return bmt.Sum(nil) } c := make(chan []byte, 1) hashf := func() { bmt.SetLength(spanLength) + bmt.(*AsyncHasher).Hasher.jobSize = l c <- bmt.Sum(nil) } maxsize := len(idxs) @@ -582,7 +600,9 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs } } if wh == last { + log.Trace("asyncHash", "length", l) bmt.SetLength(spanLength) + bmt.(*AsyncHasher).Hasher.jobSize = l return bmt.Sum(nil) } return <-c From 685fa40791486dc9c21c0aa9e56baca45dcec33d Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 21:14:17 +0100 Subject: [PATCH 56/67] bmt: Fix zerohash bug in sync BMT --- bmt/bmt.go | 59 ++++++++++-------------- bmt/bmt_test.go | 117 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 47 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index a8bbcd2b80..33c112a856 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -28,13 +28,8 @@ import ( "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" - "github.com/ethersphere/swarm/testutil" ) -func init() { - testutil.Init() -} - /* Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size. It is defined as the root hash of the binary merkle tree built over fixed size segments @@ -91,7 +86,7 @@ type BaseHasherFunc func() hash.Hash type Hasher struct { pool *TreePool // BMT resource pool bmt *tree // prebuilt BMT resource for flowcontrol and proofs - size int32 // bytes written to Hasher since last Reset() + size uint64 // bytes written to Hasher since last Reset() jobSize int // size of data written in current session cursor int64 // cursor to write to on next Write() call } @@ -304,10 +299,6 @@ func (h *Hasher) SetLength(length int) { h.getTree().span = span } -//func (h *Hasher) Count() int { -// return h.pool.SegmentCount -//} - // Implements param.SectionWriter func (h *Hasher) Branches() int { return h.pool.SegmentCount @@ -328,7 +319,6 @@ func (h *Hasher) Size() int { // Seek sets the section that will be written to on the next Write() // Implements io.Seeker in param.SectionWriter func (h *Hasher) Seek(offset int64, whence int) (int64, error) { - //return 0, errors.New("Seek not supported currently, use AsyncHasher for Seek") atomic.StoreInt64(&h.cursor, offset) return offset, nil } @@ -348,10 +338,18 @@ func (h *Hasher) BlockSize() int { // TODO: if span is nil return the zero-hash func (h *Hasher) Sum(b []byte) (s []byte) { t := h.getTree() + if h.size == 0 && t.offset == 0 { + h.releaseTree() + return h.pool.zerohashes[h.pool.Depth] + } // write the last section with final flag set to true go h.writeSection(t.cursor, t.section, true, true) // wait for the result s = <-t.result + if t.span == nil { + t.span = make([]byte, 8) + binary.LittleEndian.PutUint64(t.span, h.size) + } span := t.span // release the tree resource back to the pool h.releaseTree() @@ -371,6 +369,7 @@ func (h *Hasher) Write(b []byte) (int, error) { if l == 0 || l > h.pool.Size { return 0, nil } + atomic.AddUint64(&h.size, uint64(len(b))) t := h.getTree() secsize := 2 * h.pool.SegmentSize // calculate length of missing bit to complete current open section @@ -416,19 +415,10 @@ func (h *Hasher) Write(b []byte) (int, error) { func (h *Hasher) Reset() { h.cursor = 0 h.size = 0 + h.jobSize = 0 h.releaseTree() } -// methods needed to implement the SwarmHash interface - -// ResetWithLength needs to be called before writing to the hasher -// the argument is supposed to be the byte slice binary representation of -// the length of the data subsumed under the hash, i.e., span -//func (h *Hasher) ResetWithLength(span []byte) { -// h.Reset() -// h.getTree().span = span -//} - // releaseTree gives back the Tree to the pool whereby it unlocks // it resets tree, segment and index func (h *Hasher) releaseTree() { @@ -531,14 +521,20 @@ func (sw *AsyncHasher) Branches() int { return sw.seccount } -// Write writes the i-th section of the BMT base -// this function can and is meant to be called concurrently -// it sets max segment threadsafely +// Write writes to the current position cursor of the Hasher +// The cursor must be manually set with Seek(). +// The method will NOT advance the cursor. +// +// Implements hash.hash in param.SectionWriter func (sw *AsyncHasher) Write(section []byte) (int, error) { - c := atomic.LoadInt64(&sw.Hasher.cursor) - return sw.writeSection(int(c)/sw.secsize, section) + atomic.AddUint64(&sw.Hasher.size, uint64(len(section))) + cursor := atomic.LoadInt64(&sw.Hasher.cursor) + return sw.writeSection(int(cursor)/sw.secsize, section) } +// Write writes the i-th section of the BMT base +// this function can and is meant to be called concurrently +// it sets max segment threadsafely func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { // TODO: Temporary workaround for chunkwise write if i < 0 { @@ -589,8 +585,8 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { // length: known length of the input (unsafe; undefined if out of range) // meta: metadata to hash together with BMT root for the final digest // e.g., span for protection against existential forgery - -//func (sw *AsyncHasher) sum(b []byte, length int, meta []byte) (s []byte) { +// +// Implements hash.hash in param.SectionWriter func (sw *AsyncHasher) Sum(b []byte) (s []byte) { if sw.all { return sw.Hasher.Sum(nil) @@ -598,7 +594,6 @@ func (sw *AsyncHasher) Sum(b []byte) (s []byte) { sw.mtx.Lock() t := sw.getTree() length := int(sw.Hasher.jobSize) - log.Trace("async sum", "l", length) if length == 0 { sw.mtx.Unlock() s = sw.pool.zerohashes[sw.pool.Depth] @@ -620,10 +615,6 @@ func (sw *AsyncHasher) Sum(b []byte) (s []byte) { // relesase the tree back to the pool sw.releaseTree() meta := t.span - // if no meta is given just append digest to b - //if len(meta) == 0 { - // return append(b, s...) - //} // hash together meta and BMT root hash using the pools return doSum(sw.pool.hasher(), b, meta, s) } @@ -637,8 +628,6 @@ func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) { var hasher hash.Hash var level int t := h.getTree() - log.Trace("hasher writesection adding", "len", len(section)) - atomic.AddInt32(&h.size, int32(len(section))) if double { level++ n = t.leaves[i] diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index 46b0a1bb85..1e63e2a50f 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -32,6 +32,10 @@ import ( "golang.org/x/crypto/sha3" ) +func init() { + testutil.Init() +} + // the actual data length generated (could be longer than max datalength of the BMT) const BufferSize = 4128 @@ -143,14 +147,10 @@ func TestHasherEmptyData(t *testing.T) { defer pool.Drain(0) bmt := New(pool) rbmt := NewRefHasher(hasher, count) - refNoMetaHash := rbmt.Hash(data) - h := hasher() - h.Write(zeroSpan) - h.Write(refNoMetaHash) - refHash := h.Sum(nil) - expHash := syncHash(bmt, 0, data) - if !bytes.Equal(expHash, refHash) { - t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) + expHash := rbmt.Hash(data) + resHash := syncHash(bmt, 0, data) + if !bytes.Equal(expHash, resHash) { + t.Fatalf("hash mismatch with reference. expected %x, got %x", resHash, expHash) } }) } @@ -364,7 +364,13 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun binary.LittleEndian.PutUint64(span, uint64(n)) data := d[:n] rbmt := NewRefHasher(hasher, count) - exp := sha3hash(span, rbmt.Hash(data)) + var exp []byte + log.Trace("correct", "n", n, "count", count, "depth", bmt.pool.Depth) + if n == 0 { + exp = bmt.pool.zerohashes[bmt.pool.Depth] + } else { + exp = sha3hash(span, rbmt.Hash(data)) + } got := syncHash(bmt, n, data) if !bytes.Equal(got, exp) { return fmt.Errorf("wrong hash: expected %x, got %x", exp, got) @@ -536,9 +542,7 @@ func benchmarkRefHasher(t *testing.B, n int) { // Hash hashes the data and the span using the bmt hasher func syncHash(h *Hasher, spanLength int, data []byte) []byte { h.Reset() - //if spanLength > 0 { h.SetLength(spanLength) - //} h.Write(data) return h.Sum(nil) } @@ -600,10 +604,99 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs } } if wh == last { - log.Trace("asyncHash", "length", l) bmt.SetLength(spanLength) bmt.(*AsyncHasher).Hasher.jobSize = l return bmt.Sum(nil) } return <-c } + +func TestHashSpanCases(t *testing.T) { + hasher := sha3.NewLegacyKeccak256 + pool := NewTreePool(hasher, 128, PoolSize) + zeroHash := pool.zerohashes[7] + refRes := zeroHash + + // check that SetLength(0) is equivalent to no Write() in all cases + h := New(pool) + res := h.Sum(nil) + if !bytes.Equal(refRes, res) { + t.Fatalf("nilspan vs zerohash; expected %x, got %x", refRes, res) + } + h.Reset() + h.SetLength(0) + res = h.Sum(nil) + if !bytes.Equal(refRes, res) { + t.Fatalf("length 0 vs zerohash; expected %x, got %x", refRes, res) + } + h.Reset() + h.Write([]byte("foo")) + h.SetLength(0) + res = h.Sum(nil) + refh := NewRefHasher(hasher, 128) + resh := refh.Hash([]byte("foo")) + hsub := hasher() + hsub.Write(zeroSpan) + hsub.Write(resh) + refRes = hsub.Sum(nil) + if !bytes.Equal(refRes, res) { + t.Fatalf("length 0 overwrite vs zerohash; expected %x, got %x", refRes, res) + } + + // span and length is automatically set if SetLength() is not called + h.Reset() + h.Write([]byte("foo")) + resNoLength := h.Sum(nil) + + h.Reset() + h.Write([]byte("foo")) + h.SetLength(3) + resLength := h.Sum(nil) + + if !bytes.Equal(resLength, resNoLength) { + t.Fatalf("foo length %d, expected %x, got %x", 3, resLength, resNoLength) + } + + h.Reset() + h.Write([]byte("foo")) + h.SetLength(4) + resLength = h.Sum(nil) + if bytes.Equal(resLength, resNoLength) { + t.Fatalf("foo length %d; unexpected %x == %x", 4, resLength, resNoLength) + } + + // correct length is calculated when span exceeds size of bottom tree level + h.Reset() + h.Write([]byte("foo")) + h.SetLength(4096 + 3) + res = h.Sum(nil) + refh = NewRefHasher(hasher, 128) + resh = refh.Hash([]byte("foo")) + hsub = hasher() + span := make([]byte, 8) + binary.LittleEndian.PutUint64(span, 4096+3) + hsub.Write(span) + hsub.Write(resh) + refRes = hsub.Sum(nil) + + if !bytes.Equal(refRes, res) { + t.Fatalf("foo length %d, expected %x, got %x", 4096+3, refRes, res) + } + + h.Reset() + h.Write([]byte("foo")) + h.SetLength(4096 + 4) + res = h.Sum(nil) + refh = NewRefHasher(hasher, 128) + resh = refh.Hash([]byte("foo")) + hsub = hasher() + span = make([]byte, 8) + binary.LittleEndian.PutUint64(span, 4096+4) + hsub.Write(span) + hsub.Write(resh) + refRes = hsub.Sum(nil) + + if !bytes.Equal(refRes, res) { + t.Fatalf("foo length %d; expected %x, got %x", 4096+4, refRes, res) + } +} From 677d0237e6df61893d685fc32d61d45f7e121cdd Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 21:42:07 +0100 Subject: [PATCH 57/67] bmt, param: Cleanup, implement proper seek for async --- bmt/bmt.go | 44 ++++++++++++++++++++++++++++++++------------ bmt/bmt_test.go | 1 + param/io.go | 12 ------------ 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 33c112a856..046798affc 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -20,6 +20,7 @@ package bmt import ( "context" "encoding/binary" + "errors" "fmt" "hash" "strings" @@ -86,9 +87,9 @@ type BaseHasherFunc func() hash.Hash type Hasher struct { pool *TreePool // BMT resource pool bmt *tree // prebuilt BMT resource for flowcontrol and proofs - size uint64 // bytes written to Hasher since last Reset() + size int // bytes written to Hasher since last Reset() jobSize int // size of data written in current session - cursor int64 // cursor to write to on next Write() call + cursor int // cursor to write to on next Write() call } // New creates a reusable BMT Hasher that @@ -314,13 +315,22 @@ func (h *Hasher) Size() int { return h.pool.SegmentSize } -// TODO: Rework seek to work for AsyncHasher transparently -// TODO: whence ignored +// TODO: Rework seek to work for AsyncHasher transparently when asynchasher doesn't have "double" anymore +// TODO: performant offset to cursor calculation - or consider sectionwise Seek +// TODO: whence // Seek sets the section that will be written to on the next Write() // Implements io.Seeker in param.SectionWriter func (h *Hasher) Seek(offset int64, whence int) (int64, error) { - atomic.StoreInt64(&h.cursor, offset) - return offset, nil + if whence > 0 { + return 0, errors.New("whence is not currently implemented") + } + cursor := int(offset) / h.pool.SegmentSize + h.seek(cursor) + return int64(cursor), nil +} + +func (h *Hasher) seek(cursor int) { + h.cursor = cursor } // BlockSize returns the block size @@ -348,7 +358,7 @@ func (h *Hasher) Sum(b []byte) (s []byte) { s = <-t.result if t.span == nil { t.span = make([]byte, 8) - binary.LittleEndian.PutUint64(t.span, h.size) + binary.LittleEndian.PutUint64(t.span, uint64(h.size)) } span := t.span // release the tree resource back to the pool @@ -369,7 +379,7 @@ func (h *Hasher) Write(b []byte) (int, error) { if l == 0 || l > h.pool.Size { return 0, nil } - atomic.AddUint64(&h.size, uint64(len(b))) + h.size += len(b) t := h.getTree() secsize := 2 * h.pool.SegmentSize // calculate length of missing bit to complete current open section @@ -484,7 +494,7 @@ type AsyncHasher struct { secsize int // size of base section (size of hash or double) seccount int // base section count write func(i int, section []byte, final bool) - all bool // if all written in one go + all bool // if all written in one go, temporary workaround } // Implements param.SectionWriter @@ -521,15 +531,25 @@ func (sw *AsyncHasher) Branches() int { return sw.seccount } +// Seek is a temporary override for Hasher.Seek() to handle double sections from AsyncHasher +// Implements io.Seeker in param.SectionWriter +func (sw *AsyncHasher) Seek(offset int64, whence int) (int64, error) { + if whence > 0 { + return 0, errors.New("whence is not currently implemented") + } + cursor := int(offset) / sw.secsize + sw.Hasher.seek(cursor) + return int64(cursor), nil +} + // Write writes to the current position cursor of the Hasher // The cursor must be manually set with Seek(). // The method will NOT advance the cursor. // // Implements hash.hash in param.SectionWriter func (sw *AsyncHasher) Write(section []byte) (int, error) { - atomic.AddUint64(&sw.Hasher.size, uint64(len(section))) - cursor := atomic.LoadInt64(&sw.Hasher.cursor) - return sw.writeSection(int(cursor)/sw.secsize, section) + sw.Hasher.size += len(section) + return sw.writeSection(sw.Hasher.cursor, section) } // Write writes the i-th section of the BMT base diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index 1e63e2a50f..9cbafbd146 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -611,6 +611,7 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs return <-c } +// TestHashSpanCases verifies that span and size is set automatically even if SetLength() is not explicitly called func TestHashSpanCases(t *testing.T) { hasher := sha3.NewLegacyKeccak256 pool := NewTreePool(hasher, 128, PoolSize) diff --git a/param/io.go b/param/io.go index e093c12e9d..f95fc282b1 100644 --- a/param/io.go +++ b/param/io.go @@ -8,18 +8,6 @@ import ( type SectionWriterFunc func(ctx context.Context) SectionWriter -// SectionWriter is an asynchronous segment/section writer interface -//type SectionWriter interface { -// Connect(hashFunc SectionWriterFunc) SectionWriter -// Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination -// Reset(ctx context.Context) // standard init to be called before reuse -// Write(index int, data []byte) // write into section of index -// Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer -// SectionSize() int // size of the async section unit to use -// DigestSize() int -// Branches() int -//} - type SectionWriter interface { hash.Hash io.Seeker From 84749e0e1aeed3d5729949e9ed16282a2455424d Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 22:08:36 +0100 Subject: [PATCH 58/67] file: Fix file/testutillocal/Cache --- bmt/bmt.go | 8 ++++- file/hasher/hasher.go | 19 +++++++++--- file/testutillocal/cache.go | 52 +++++++++++++++++++++++--------- file/testutillocal/cache_test.go | 11 +++---- param/io.go | 1 - 5 files changed, 65 insertions(+), 26 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 046798affc..7e431ae1b1 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -537,7 +537,12 @@ func (sw *AsyncHasher) Seek(offset int64, whence int) (int64, error) { if whence > 0 { return 0, errors.New("whence is not currently implemented") } - cursor := int(offset) / sw.secsize + var cursor int + if offset < 0 { + cursor = int(offset) + } else { + cursor = int(offset) / sw.secsize + } sw.Hasher.seek(cursor) return int64(cursor), nil } @@ -558,6 +563,7 @@ func (sw *AsyncHasher) Write(section []byte) (int, error) { func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { // TODO: Temporary workaround for chunkwise write if i < 0 { + sw.Hasher.cursor = 0 sw.Hasher.Reset() sw.Hasher.SetLength(len(section)) sw.Hasher.Write(section) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index db02c0d455..ea5b80d607 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -2,6 +2,7 @@ package hasher import ( "context" + "errors" "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" @@ -35,7 +36,7 @@ func New(hashFunc param.SectionWriterFunc) *Hasher { return h } -func (h *Hasher) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { +func (h *Hasher) SetWriter(hashFunc param.SectionWriterFunc) param.SectionWriter { h.params = newTreeParams(hashFunc) return h } @@ -53,7 +54,7 @@ func (h *Hasher) Init(ctx context.Context, errFunc func(error)) { // TODO: enforce buffered writes and limits // TODO: attempt omit modulo calc on every pass // TODO: preallocate full size span slice -func (h *Hasher) Write(index int, b []byte) { +func (h *Hasher) Write(b []byte) (int, error) { if h.count%h.params.Branches == 0 && h.count > 0 { h.job = h.job.Next() } @@ -68,17 +69,27 @@ func (h *Hasher) Write(index int, b []byte) { }(h.count, h.job) h.size += len(b) h.count++ + return len(b), nil } // Sum implements param.SectionWriter // It is a blocking call that calculates the target level and section index of the received data // and alerts hasher jobs the end of write is reached // It returns the root hash -func (h *Hasher) Sum(_ []byte, length int, _ []byte) []byte { +func (h *Hasher) Sum(b []byte) []byte { sectionCount := dataSizeToSectionIndex(h.size, h.params.SectionSize) targetLevel := getLevelsFromLength(h.size, h.params.SectionSize, h.params.Branches) h.target.Set(h.size, sectionCount, targetLevel) - return <-h.target.Done() + ref := <-h.target.Done() + if b == nil { + return ref + } + return append(b, ref...) +} + +// Seek implements io.Seeker in param.SectionWriter +func (h *Hasher) Seek(offset uint64, whence int) (int64, error) { + return int64(h.size), errors.New("Hasher cannot seek") } // Reset implements param.SectionWriter diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index f302fc9f1c..09ee85e655 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -2,6 +2,7 @@ package testutillocal import ( "context" + "errors" "github.com/ethersphere/swarm/param" ) @@ -12,8 +13,9 @@ var ( ) type Cache struct { - data map[int][]byte - w param.SectionWriter + data map[int][]byte + index int + w param.SectionWriter } func NewCache() *Cache { @@ -25,31 +27,49 @@ func NewCache() *Cache { func (c *Cache) Init(_ context.Context, _ func(error)) { } -func (c *Cache) Connect(writeFunc param.SectionWriterFunc) param.SectionWriter { +func (c *Cache) SetWriter(writeFunc param.SectionWriterFunc) param.SectionWriter { c.w = writeFunc(nil) return c } -func (c *Cache) Write(index int, b []byte) { - c.data[index] = b - if c.w == nil { - return +func (c *Cache) SetLength(length int) { + if c.w != nil { + c.w.SetLength(length) } - c.w.Write(index, b) + } -func (c *Cache) Sum(b []byte, length int, span []byte) []byte { +func (c *Cache) Seek(offset int64, whence int) (int64, error) { + if whence > 0 { + return 0, errors.New("whence for Cache.Seek not implemented") + } + c.index = int(offset) / c.SectionSize() + if c.w != nil { + return c.w.Seek(offset, whence) + } + return int64(c.index), nil +} + +func (c *Cache) Write(b []byte) (int, error) { + c.data[c.index] = b + if c.w != nil { + return c.w.Write(b) + } + return len(b), nil +} + +func (c *Cache) Sum(b []byte) []byte { if c.w == nil { return nil } - return c.w.Sum(b, length, span) + return c.w.Sum(b) } -func (c *Cache) Reset(ctx context.Context) { +func (c *Cache) Reset() { if c.w == nil { return } - c.w.Reset(ctx) + c.w.Reset() } func (c *Cache) SectionSize() int { @@ -59,9 +79,13 @@ func (c *Cache) SectionSize() int { return defaultSectionSize } -func (c *Cache) DigestSize() int { +func (c *Cache) BlockSize() int { + return c.SectionSize() +} + +func (c *Cache) Size() int { if c.w != nil { - return c.w.DigestSize() + return c.w.Size() } return defaultSectionSize } diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go index c17fd2c878..6300a8f3ea 100644 --- a/file/testutillocal/cache_test.go +++ b/file/testutillocal/cache_test.go @@ -6,7 +6,6 @@ import ( "testing" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/testutil" ) @@ -23,7 +22,7 @@ func TestCache(t *testing.T) { c := NewCache() c.Init(context.Background(), func(error) {}) _, data := testutil.SerialData(chunkSize, 255, 0) - c.Write(0, data) + c.Write(data) cachedData := c.Get(0) if !bytes.Equal(cachedData, data) { t.Fatalf("cache data; expected %x, got %x", data, cachedData) @@ -36,11 +35,11 @@ func TestCacheLink(t *testing.T) { c := NewCache() c.Init(context.Background(), func(error) {}) - c.Connect(hashFunc) + c.SetWriter(hashFunc) _, data := testutil.SerialData(chunkSize, 255, 0) - c.Write(-1, data) - span := bmt.LengthToSpan(chunkSize) - ref := c.Sum(nil, chunkSize, span) + c.Seek(-1, 0) + c.Write(data) + ref := c.Sum(nil) refHex := hexutil.Encode(ref) correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" if refHex != correctRefHex { diff --git a/param/io.go b/param/io.go index f95fc282b1..a877fa30c3 100644 --- a/param/io.go +++ b/param/io.go @@ -15,6 +15,5 @@ type SectionWriter interface { Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination SetLength(length int) SectionSize() int // size of the async section unit to use - DigestSize() int Branches() int } From b28375adca0112340605dc56095d8d8be52b0fa6 Mon Sep 17 00:00:00 2001 From: nolash Date: Mon, 9 Dec 2019 23:09:49 +0100 Subject: [PATCH 59/67] file: Move reference back, make hasher tests compile Concurrency issues in asynchasher, Replace Seek with SeekSection --- bmt/bmt.go | 13 ++-- file/hasher/{reference => }/common_test.go | 60 ++++++++++++------- file/hasher/hasher.go | 26 +++++--- file/hasher/hasher_test.go | 28 +++++---- file/hasher/job.go | 11 ++-- file/hasher/job_test.go | 9 ++- file/hasher/param.go | 4 +- file/hasher/{reference => }/pyramid_test.go | 0 file/hasher/{reference => }/reference.go | 39 +++--------- file/hasher/{reference => }/reference_test.go | 22 ++++--- param/io.go | 3 +- storage/hasherstore.go | 7 ++- storage/swarmhasher.go | 14 ++++- storage/types.go | 8 ++- 14 files changed, 134 insertions(+), 110 deletions(-) rename file/hasher/{reference => }/common_test.go (85%) rename file/hasher/{reference => }/pyramid_test.go (100%) rename file/hasher/{reference => }/reference.go (77%) rename file/hasher/{reference => }/reference_test.go (92%) diff --git a/bmt/bmt.go b/bmt/bmt.go index 7e431ae1b1..fd54629c02 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -289,6 +289,12 @@ func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree { } } +// Implements param.SectionWriter +func (h *Hasher) SetWriter(_ param.SectionWriterFunc) param.SectionWriter { + log.Warn("Synchasher does not currently support SectionWriter chaining") + return h +} + // Implements param.SectionWriter func (h *Hasher) SectionSize() int { return h.pool.SegmentSize @@ -519,12 +525,6 @@ func (sw *AsyncHasher) SectionSize() int { return sw.secsize } -// DigestSize returns the size of the result -// Implements param.SectionWriter -func (sw *AsyncHasher) DigestSize() int { - return sw.secsize -} - // DigestSize returns the branching factor, which is equivalent to the size of the BMT input // Implements param.SectionWriter func (sw *AsyncHasher) Branches() int { @@ -543,6 +543,7 @@ func (sw *AsyncHasher) Seek(offset int64, whence int) (int64, error) { } else { cursor = int(offset) / sw.secsize } + log.Trace("async seek", "offset", offset, "cursor", cursor) sw.Hasher.seek(cursor) return int64(cursor), nil } diff --git a/file/hasher/reference/common_test.go b/file/hasher/common_test.go similarity index 85% rename from file/hasher/reference/common_test.go rename to file/hasher/common_test.go index 74fdba0ace..fd9ac725f7 100644 --- a/file/hasher/reference/common_test.go +++ b/file/hasher/common_test.go @@ -71,6 +71,10 @@ var ( end = len(dataLengths) ) +func init() { + testutil.Init() +} + var ( dummyHashFunc = func(_ context.Context) param.SectionWriter { return newDummySectionWriter(chunkSize*branches, sectionSize, sectionSize, branches) @@ -86,10 +90,6 @@ var ( } ) -func init() { - testutil.Init() -} - // simple param.SectionWriter hasher that keeps the data written to it // for later inspection // TODO: see if this can be replaced with the fake hasher from storage module @@ -101,6 +101,7 @@ type dummySectionWriter struct { digest []byte size int summed bool + index int writer hash.Hash mu sync.Mutex wg sync.WaitGroup @@ -120,17 +121,28 @@ func newDummySectionWriter(cp int, sectionSize int, digestSize int, branches int func (d *dummySectionWriter) Init(_ context.Context, _ func(error)) { } -func (d *dummySectionWriter) Connect(_ param.SectionWriterFunc) param.SectionWriter { +func (d *dummySectionWriter) SetWriter(_ param.SectionWriterFunc) param.SectionWriter { log.Error("dummySectionWriter does not support SectionWriter chaining") return d } // implements param.SectionWriter -func (d *dummySectionWriter) Write(index int, data []byte) { +func (d *dummySectionWriter) Seek(offset int64, whence int) (int64, error) { + d.index = int(offset) + return offset, nil +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SetLength(length int) { + d.size = length +} + +// implements param.SectionWriter +func (d *dummySectionWriter) Write(data []byte) (int, error) { d.mu.Lock() - copy(d.data[index*d.sectionSize:], data) + copy(d.data[d.index:], data) d.size += len(data) - log.Trace("dummywriter write", "index", index, "size", d.size, "threshold", d.sectionSize*d.branches) + log.Trace("dummywriter write", "index", d.index, "size", d.size, "threshold", d.sectionSize*d.branches) if d.isFull() { d.summed = true d.mu.Unlock() @@ -138,14 +150,14 @@ func (d *dummySectionWriter) Write(index int, data []byte) { } else { d.mu.Unlock() } + return len(data), nil } // implements param.SectionWriter -func (d *dummySectionWriter) Sum(_ []byte, size int, _ []byte) []byte { - log.Trace("dummy Sumcall", "size", size) +func (d *dummySectionWriter) Sum(_ []byte) []byte { + log.Trace("dummy Sumcall", "size", d.size) d.mu.Lock() if !d.summed { - d.size = size d.summed = true d.mu.Unlock() d.sum() @@ -168,7 +180,7 @@ func (d *dummySectionWriter) sum() { } // implements param.SectionWriter -func (d *dummySectionWriter) Reset(_ context.Context) { +func (d *dummySectionWriter) Reset() { d.mu.Lock() defer d.mu.Unlock() d.data = make([]byte, len(d.data)) @@ -178,13 +190,18 @@ func (d *dummySectionWriter) Reset(_ context.Context) { d.writer.Reset() } +// implements param.SectionWriter +func (d *dummySectionWriter) BlockSize() int { + return d.sectionSize +} + // implements param.SectionWriter func (d *dummySectionWriter) SectionSize() int { return d.sectionSize } // implements param.SectionWriter -func (d *dummySectionWriter) DigestSize() int { +func (d *dummySectionWriter) Size() int { return d.sectionSize } @@ -201,32 +218,35 @@ func (d *dummySectionWriter) isFull() bool { func TestDummySectionWriter(t *testing.T) { w := newDummySectionWriter(chunkSize*2, sectionSize, sectionSize, branches) - w.Reset(context.Background()) + w.Reset() _, data := testutil.SerialData(sectionSize*2, 255, 0) - w.Write(branches, data[:sectionSize]) - w.Write(branches+1, data[sectionSize:]) + w.Seek(int64(branches), 0) + w.Write(data[:sectionSize]) + w.Seek(int64(branches+1), 0) + w.Write(data[sectionSize:]) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" - digest := w.Sum(nil, chunkSize*2, nil) + digest := w.Sum(nil) digestHex := hexutil.Encode(digest) if digestHex != correctDigestHex { t.Fatalf("Digest: 2xsectionSize*1; expected %s, got %s", correctDigestHex, digestHex) } w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) - w.Reset(context.Background()) - w.Write(branches/2, data) + w.Reset() + w.Seek(int64(branches/2), 0) + w.Write(data) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } correctDigestHex += zeroHex - digest = w.Sum(nil, chunkSize*2, nil) + digest = w.Sum(nil) digestHex = hexutil.Encode(digest) if digestHex != correctDigestHex { t.Fatalf("Digest 1xsectionSize*2; expected %s, got %s", correctDigestHex, digestHex) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index ea5b80d607..46e7e11334 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -4,7 +4,6 @@ import ( "context" "errors" - "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -60,11 +59,11 @@ func (h *Hasher) Write(b []byte) (int, error) { } go func(i int, jb *job) { hasher := h.params.GetWriter() - hasher.Write(-1, b) + hasher.Seek(-1, 0) + hasher.Write(b) l := len(b) log.Trace("data write", "count", i, "size", l) - span := bmt.LengthToSpan(l) - jb.write(i%h.params.Branches, hasher.Sum(nil, l, span)) + jb.write(i%h.params.Branches, hasher.Sum(nil)) h.params.PutWriter(hasher) }(h.count, h.job) h.size += len(b) @@ -87,14 +86,25 @@ func (h *Hasher) Sum(b []byte) []byte { return append(b, ref...) } +func (h *Hasher) SetLength(length int) { + h.size = length +} + // Seek implements io.Seeker in param.SectionWriter -func (h *Hasher) Seek(offset uint64, whence int) (int64, error) { +func (h *Hasher) Seek(offset int64, whence int) (int64, error) { return int64(h.size), errors.New("Hasher cannot seek") } // Reset implements param.SectionWriter -func (h *Hasher) Reset(ctx context.Context) { - h.params.ctx = ctx +func (h *Hasher) Reset() { + h.size = 0 + h.count = 0 + h.target = newTarget() + h.job = newJob(h.params, h.target, h.index, 1, 0) +} + +func (h *Hasher) BlockSize() int { + return h.params.ChunkSize } // SectionSize implements param.SectionWriter @@ -103,7 +113,7 @@ func (h *Hasher) SectionSize() int { } // DigestSize implements param.SectionWriter -func (h *Hasher) DigestSize() int { +func (h *Hasher) Size() int { return h.params.SectionSize } diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index dbdca5a552..1bad9b6d0d 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -24,9 +24,10 @@ func TestHasherJobTopHash(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { - h.Write(i, data[i:i+chunkSize]) + h.Seek(int64(i*h.SectionSize()), 0) + h.Write(data[i : i+chunkSize]) } - h.Sum(nil, i, nil) + h.Sum(nil) levelOneTopHash := hexutil.Encode(h.index.GetTopHash(1)) correctLevelOneTopHash := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" if levelOneTopHash != correctLevelOneTopHash { @@ -46,9 +47,10 @@ func TestHasherOneFullChunk(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { - h.Write(i, data[i:i+chunkSize]) + h.Seek(int64(i*h.SectionSize()), 0) + h.Write(data[i : i+chunkSize]) } - ref := h.Sum(nil, i, nil) + ref := h.Sum(nil) correctRootHash := "0x3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09" rootHash := hexutil.Encode(ref) if rootHash != correctRootHash { @@ -67,7 +69,8 @@ func TestHasherJobChange(t *testing.T) { h.Init(ctx, logErrFunc) jobs := make(map[string]int) for i := 0; i < chunkSize*branches*branches; i += chunkSize { - h.Write(i, data[i:i+chunkSize]) + h.Seek(int64(i*h.SectionSize()), 0) + h.Write(data[i : i+chunkSize]) jobs[h.job.String()]++ } i := 0 @@ -93,9 +96,10 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches*branches; i += chunkSize { - h.Write(i, data[i:i+chunkSize]) + h.Seek(int64(i*h.SectionSize()), 0) + h.Write(data[i : i+chunkSize]) } - ref := h.Sum(nil, i, nil) + ref := h.Sum(nil) correctRootHash := "0x522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b" rootHash := hexutil.Encode(ref) if rootHash != correctRootHash { @@ -120,9 +124,10 @@ func TestHasherVector(t *testing.T) { if dataLength-j < chunkSize { size = dataLength - j } - h.Write(j, data[j:j+size]) + h.Seek(int64(j*h.SectionSize()), 0) + h.Write(data[j : j+size]) } - ref := h.Sum(nil, dataLength, nil) + ref := h.Sum(nil) correctRefHex := "0x" + expected[i] refHex := hexutil.Encode(ref) if refHex != correctRefHex { @@ -164,8 +169,9 @@ func benchmarkHasher(b *testing.B) { if dataLength-i < chunkSize { size = dataLength - i } - h.Write(i, data[i:i+size]) + h.Seek(int64(i*h.SectionSize()), 0) + h.Write(data[i : i+size]) } - h.Sum(nil, dataLength, nil) + h.Sum(nil) } } diff --git a/file/hasher/job.go b/file/hasher/job.go index 9d91ab572c..7342c164f0 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -6,7 +6,6 @@ import ( "sync/atomic" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) @@ -166,7 +165,8 @@ OUTER: idx := entry.index + i data := entry.data[offset : offset+jb.writer.SectionSize()] log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", oldProcessCount+i, "endcount", endCount, "index", entry.index+i, "data", hexutil.Encode(data)) - jb.writer.Write(idx, data) + jb.writer.Seek(int64(idx*jb.writer.SectionSize()), 0) + jb.writer.Write(data) offset += jb.writer.SectionSize() } @@ -222,10 +222,11 @@ func (jb *job) sum() { // get the size of the span and execute the hash digest of the content size := jb.size() - span := bmt.LengthToSpan(size) + //span := bmt.LengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "span", span, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) - ref := jb.writer.Sum(nil, refSize, span) + jb.writer.SetLength(size) + log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) + ref := jb.writer.Sum(nil) // endCount > 0 means this is the last chunk on the level // the hash from the level below the target level will be the result diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 1d09fd5cab..4f53ac2c39 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -545,9 +545,8 @@ func TestJobVector(t *testing.T) { ie = dataLength } writeSize := ie - i - span := bmt.LengthToSpan(writeSize) - log.Debug("data write", "i", i, "length", writeSize, "span", span) - dataHash.ResetWithLength(span) + dataHash.Reset() + dataHash.SetLength(writeSize) c, err := dataHash.Write(data[i:ie]) if err != nil { jb.destroy() @@ -623,8 +622,8 @@ func benchmarkJob(b *testing.B) { ie = dataLength } writeSize := ie - i - span := bmt.LengthToSpan(writeSize) - dataHash.ResetWithLength(span) + dataHash.Reset() + dataHash.SetLength(writeSize) c, err := dataHash.Write(data[i:ie]) if err != nil { jb.destroy() diff --git a/file/hasher/param.go b/file/hasher/param.go index 591c921ef6..7adaa17eb9 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -30,7 +30,7 @@ func newTreeParams(hashFunc param.SectionWriterFunc) *treeParams { ChunkSize: h.SectionSize() * h.Branches(), hashFunc: hashFunc, } - h.Reset(context.Background()) + h.Reset() log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) p.writerPool.New = func() interface{} { hf := p.hashFunc(p.ctx) @@ -50,7 +50,7 @@ func (p *treeParams) GetContext() context.Context { } func (p *treeParams) PutWriter(w param.SectionWriter) { - w.Reset(p.ctx) + w.Reset() p.writerPool.Put(w) } diff --git a/file/hasher/reference/pyramid_test.go b/file/hasher/pyramid_test.go similarity index 100% rename from file/hasher/reference/pyramid_test.go rename to file/hasher/pyramid_test.go diff --git a/file/hasher/reference/reference.go b/file/hasher/reference.go similarity index 77% rename from file/hasher/reference/reference.go rename to file/hasher/reference.go index 09088ef72b..9a6ce31836 100644 --- a/file/hasher/reference/reference.go +++ b/file/hasher/reference.go @@ -1,47 +1,24 @@ package hasher import ( - "context" - - "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" ) -type BMTHasherSectionWriter struct { - *bmt.Hasher -} - -func (b *BMTHasherSectionWriter) Write(_ int, data []byte) { - b.Hasher.Write(data) -} - -func (b *BMTHasherSectionWriter) Sum(data []byte, _ int, _ []byte) []byte { - return b.Hasher.Sum(data) -} - -func (b *BMTHasherSectionWriter) Connect(_ param.SectionWriterFunc) param.SectionWriter { - return b -} - -func (b *BMTHasherSectionWriter) Reset(_ context.Context) { - b.Hasher.Reset() -} - // ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm type ReferenceHasher struct { params *treeParams - cursors []int // section write position, indexed per level - length int // number of bytes written to the data level of the hasher - buffer []byte // keeps data and hashes, indexed by cursors - counts []int // number of sums performed, indexed per level - hasher *bmt.Hasher // underlying hasher + cursors []int // section write position, indexed per level + length int // number of bytes written to the data level of the hasher + buffer []byte // keeps data and hashes, indexed by cursors + counts []int // number of sums performed, indexed per level + hasher param.SectionWriter // underlying hasher } // NewReferenceHasher constructs and returns a new ReferenceHasher func NewReferenceHasher(params *treeParams) *ReferenceHasher { // TODO: remove when bmt interface is amended - h := params.GetWriter().(*BMTHasherSectionWriter).Hasher + h := params.GetWriter() return &ReferenceHasher{ params: params, cursors: make([]int, 9), @@ -87,11 +64,11 @@ func (r *ReferenceHasher) sum(lvl int) []byte { r.counts[lvl]++ spanSize := r.params.Spans[lvl] * r.params.ChunkSize span := (r.length-1)%spanSize + 1 - spanBytes := bmt.LengthToSpan(span) toSumSize := r.cursors[lvl] - r.cursors[lvl+1] - r.hasher.ResetWithLength(spanBytes) + r.hasher.Reset() + r.hasher.SetLength(span) r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) ref := r.hasher.Sum(nil) return ref diff --git a/file/hasher/reference/reference_test.go b/file/hasher/reference_test.go similarity index 92% rename from file/hasher/reference/reference_test.go rename to file/hasher/reference_test.go index 37b83e7a6a..91fbfeb1a5 100644 --- a/file/hasher/reference/reference_test.go +++ b/file/hasher/reference_test.go @@ -33,9 +33,9 @@ func TestManualDanglingChunk(t *testing.T) { // hash the balanced tree portion of the data level and write to level 1 _, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0) - span := bmt.LengthToSpan(chunkSize) for i := 0; i < chunkSize*branches; i += chunkSize { - h.ResetWithLength(span) + h.Reset() + h.SetLength(chunkSize) h.Write(levels[0][i : i+chunkSize]) copy(levels[1][i/branches:], h.Sum(nil)) } @@ -47,8 +47,8 @@ func TestManualDanglingChunk(t *testing.T) { // write the dangling chunk // hash it and write the reference on the second section of level 2 - span = bmt.LengthToSpan(chunkSize) - h.ResetWithLength(span) + h.Reset() + h.SetLength(chunkSize) h.Write(levels[0][chunkSize*branches:]) copy(levels[2][sectionSize:], h.Sum(nil)) refHex = hexutil.Encode(levels[2][sectionSize:]) @@ -58,8 +58,8 @@ func TestManualDanglingChunk(t *testing.T) { } // hash the chunk on level 1 and write into the first section of level 2 - span = bmt.LengthToSpan(chunkSize * branches) - h.ResetWithLength(span) + h.Reset() + h.SetLength(chunkSize * branches) h.Write(levels[1]) copy(levels[2], h.Sum(nil)) refHex = hexutil.Encode(levels[2][:sectionSize]) @@ -69,8 +69,8 @@ func TestManualDanglingChunk(t *testing.T) { } // hash the two sections on level 2 to obtain the root hash - span = bmt.LengthToSpan(chunkSize*branches + chunkSize) - h.ResetWithLength(span) + h.Reset() + h.SetLength(chunkSize*branches + chunkSize) h.Write(levels[2]) ref := h.Sum(nil) refHex = hexutil.Encode(ref) @@ -90,8 +90,7 @@ func TestReferenceHasherVector(t *testing.T) { hashFunc := func(_ context.Context) param.SectionWriter { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - h := bmt.New(pool) - return &BMTHasherSectionWriter{Hasher: h} + return bmt.New(pool) } params := newTreeParams(hashFunc) var mismatch int @@ -129,8 +128,7 @@ func benchmarkReferenceHasher(b *testing.B) { } hashFunc := func(_ context.Context) param.SectionWriter { pool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) - h := bmt.New(pool) - return &BMTHasherSectionWriter{Hasher: h} + return bmt.New(pool) } params := newTreeParams(hashFunc) b.ResetTimer() diff --git a/param/io.go b/param/io.go index a877fa30c3..68e00b5d53 100644 --- a/param/io.go +++ b/param/io.go @@ -3,15 +3,14 @@ package param import ( "context" "hash" - "io" ) type SectionWriterFunc func(ctx context.Context) SectionWriter type SectionWriter interface { hash.Hash - io.Seeker SetWriter(hashFunc SectionWriterFunc) SectionWriter + SeekSection(section int) Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination SetLength(length int) SectionSize() int // size of the async section unit to use diff --git a/storage/hasherstore.go b/storage/hasherstore.go index 4890219a15..b71d31a536 100644 --- a/storage/hasherstore.go +++ b/storage/hasherstore.go @@ -18,6 +18,7 @@ package storage import ( "context" + "encoding/binary" "fmt" "sync" "sync/atomic" @@ -184,8 +185,10 @@ func (h *hasherStore) startWait(ctx context.Context) { func (h *hasherStore) createHash(chunkData ChunkData) Address { hasher := h.hashFunc() - hasher.ResetWithLength(chunkData[:8]) // 8 bytes of length - hasher.Write(chunkData[8:]) // minus 8 []byte length + hasher.Reset() + lengthNumber := int(binary.LittleEndian.Uint64(chunkData[:8])) + hasher.SetLength(lengthNumber) // 8 bytes of length + hasher.Write(chunkData[8:]) // minus 8 []byte length return hasher.Sum(nil) } diff --git a/storage/swarmhasher.go b/storage/swarmhasher.go index fae03f0c72..f67c9a09e8 100644 --- a/storage/swarmhasher.go +++ b/storage/swarmhasher.go @@ -18,6 +18,8 @@ package storage import ( "hash" + + "github.com/ethersphere/swarm/bmt" ) const ( @@ -28,14 +30,20 @@ const ( type SwarmHash interface { hash.Hash - ResetWithLength([]byte) + //ResetWithLength([]byte) + SetLength(int) } type HashWithLength struct { hash.Hash } -func (h *HashWithLength) ResetWithLength(length []byte) { +//func (h *HashWithLength) ResetWithLength(length []byte) { +// h.Reset() +// h.Write(length) +//} +func (h *HashWithLength) SetLength(length int) { h.Reset() - h.Write(length) + span := bmt.LengthToSpan(length) + h.Write(span) } diff --git a/storage/types.go b/storage/types.go index a4b102a62c..3ff4d69bb0 100644 --- a/storage/types.go +++ b/storage/types.go @@ -92,8 +92,8 @@ func GenerateRandomChunk(dataSize int64) Chunk { hasher := MakeHashFunc(DefaultHash)() sdata := make([]byte, dataSize+8) rand.Read(sdata[8:]) - binary.LittleEndian.PutUint64(sdata[:8], uint64(dataSize)) - hasher.ResetWithLength(sdata[:8]) + hasher.Reset() + hasher.SetLength(int(dataSize)) hasher.Write(sdata[8:]) return NewChunk(hasher.Sum(nil), sdata) } @@ -202,7 +202,9 @@ func (v *ContentAddressValidator) Validate(ch Chunk) bool { } hasher := v.Hasher() - hasher.ResetWithLength(data[:8]) + hasher.Reset() + lengthNumber := int(binary.LittleEndian.Uint64(data[8:])) + hasher.SetLength(lengthNumber) hasher.Write(data[8:]) hash := hasher.Sum(nil) From 28e424c91fef809a99b9d7b72663b02c8775224b Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 09:26:09 +0100 Subject: [PATCH 60/67] file, bmt: Implement param.SectionWriter for all in file --- bmt/bmt.go | 37 +++------------------------ bmt/bmt_test.go | 2 +- file/encrypt/encrypt.go | 38 ++++++++++++++++++++-------- file/encrypt/encrypt_test.go | 43 +++++++++++++++++--------------- file/hasher/common_test.go | 11 ++++---- file/hasher/hasher.go | 6 ++--- file/hasher/hasher_test.go | 6 ----- file/hasher/job.go | 2 +- file/split.go | 4 +-- file/store/store.go | 38 ++++++++++++++++++++-------- file/store/store_test.go | 6 +++-- file/testutillocal/cache.go | 11 +++----- file/testutillocal/cache_test.go | 2 +- 13 files changed, 101 insertions(+), 105 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index fd54629c02..6391a769ee 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -20,7 +20,6 @@ package bmt import ( "context" "encoding/binary" - "errors" "fmt" "hash" "strings" @@ -321,22 +320,9 @@ func (h *Hasher) Size() int { return h.pool.SegmentSize } -// TODO: Rework seek to work for AsyncHasher transparently when asynchasher doesn't have "double" anymore -// TODO: performant offset to cursor calculation - or consider sectionwise Seek -// TODO: whence // Seek sets the section that will be written to on the next Write() -// Implements io.Seeker in param.SectionWriter -func (h *Hasher) Seek(offset int64, whence int) (int64, error) { - if whence > 0 { - return 0, errors.New("whence is not currently implemented") - } - cursor := int(offset) / h.pool.SegmentSize - h.seek(cursor) - return int64(cursor), nil -} - -func (h *Hasher) seek(cursor int) { - h.cursor = cursor +func (h *Hasher) SeekSection(offset int) { + h.cursor = offset } // BlockSize returns the block size @@ -531,25 +517,8 @@ func (sw *AsyncHasher) Branches() int { return sw.seccount } -// Seek is a temporary override for Hasher.Seek() to handle double sections from AsyncHasher -// Implements io.Seeker in param.SectionWriter -func (sw *AsyncHasher) Seek(offset int64, whence int) (int64, error) { - if whence > 0 { - return 0, errors.New("whence is not currently implemented") - } - var cursor int - if offset < 0 { - cursor = int(offset) - } else { - cursor = int(offset) / sw.secsize - } - log.Trace("async seek", "offset", offset, "cursor", cursor) - sw.Hasher.seek(cursor) - return int64(cursor), nil -} - // Write writes to the current position cursor of the Hasher -// The cursor must be manually set with Seek(). +// The cursor must be manually set with SeekSection(). // The method will NOT advance the cursor. // // Implements hash.hash in param.SectionWriter diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index 9cbafbd146..0f8489e29a 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -597,7 +597,7 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs r = rand.Intn(maxsize) } for i, idx := range idxs { - bmt.Seek(int64(idx*bmt.SectionSize()), 0) + bmt.SeekSection(idx) bmt.Write(segments[idx]) if (wh == first || wh == random) && i == r { go hashf() diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 94768a054e..f7f34ccb74 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -16,6 +16,7 @@ type Encrypt struct { key []byte e encryption.Encryption w param.SectionWriter + length int keyHash hash.Hash errFunc func(error) } @@ -42,7 +43,7 @@ func New(key []byte, initCtr uint32, hashFunc param.SectionWriterFunc) (*Encrypt return e, nil } -func (e *Encrypt) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { +func (e *Encrypt) SetWriter(hashFunc param.SectionWriterFunc) param.SectionWriter { e.w = hashFunc(nil) return e @@ -52,21 +53,30 @@ func (e *Encrypt) Init(_ context.Context, errFunc func(error)) { e.errFunc = errFunc } -func (e *Encrypt) Write(index int, b []byte) { +func (e *Encrypt) SeekSection(offset int) { + e.w.SeekSection(offset) +} + +func (e *Encrypt) Write(b []byte) (int, error) { cipherText, err := e.e.Encrypt(b) if err != nil { e.errFunc(err) - return + return 0, err } - e.w.Write(index, cipherText) + return e.w.Write(cipherText) } -func (e *Encrypt) Reset(ctx context.Context) { +func (e *Encrypt) Reset() { e.e.Reset() - e.w.Reset(ctx) + e.w.Reset() +} + +func (e *Encrypt) SetLength(length int) { + e.length = length + e.w.SetLength(length) } -func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { +func (e *Encrypt) Sum(b []byte) []byte { // derive new key oldKey := make([]byte, encryption.KeyLength) copy(oldKey, e.key) @@ -74,15 +84,20 @@ func (e *Encrypt) Sum(b []byte, length int, span []byte) []byte { e.keyHash.Write(e.key) newKey := e.keyHash.Sum(nil) copy(e.key, newKey) - s := e.w.Sum(b, length, span) + s := e.w.Sum(b) log.Trace("key", "key", oldKey, "ekey", e.key, "newkey", newKey) return append(oldKey, s...) } +// DigestSize implements param.SectionWriter +func (e *Encrypt) BlockSize() int { + return e.Size() +} + // DigestSize implements param.SectionWriter // TODO: cache these calculations -func (e *Encrypt) DigestSize() int { - return e.w.DigestSize() + encryption.KeyLength +func (e *Encrypt) Size() int { + return e.w.Size() + encryption.KeyLength } // SectionSize implements param.SectionWriter @@ -91,6 +106,7 @@ func (e *Encrypt) SectionSize() int { } // Branches implements param.SectionWriter +// TODO: cache these calculations func (e *Encrypt) Branches() int { - return e.w.Branches() / (e.DigestSize() / e.w.SectionSize()) + return e.w.Branches() / (e.Size() / e.w.SectionSize()) } diff --git a/file/encrypt/encrypt_test.go b/file/encrypt/encrypt_test.go index 68a1c4cac3..e0ab7673c7 100644 --- a/file/encrypt/encrypt_test.go +++ b/file/encrypt/encrypt_test.go @@ -58,12 +58,12 @@ func TestKey(t *testing.T) { if !bytes.Equal(testKey, e.key) { t.Fatalf("key seed; expected %x, got %x", testKey, e.key) } - e.Connect(cacheFunc) + e.SetWriter(cacheFunc) _, data := testutil.SerialData(chunkSize, 255, 0) - e.Write(0, data) - span := bmt.LengthToSpan(chunkSize) - doubleRef := e.Sum(nil, chunkSize, span) + e.Write(data) // 0 + e.SetLength(chunkSize) + doubleRef := e.Sum(nil) refKey := doubleRef[:encryption.KeyLength] if !bytes.Equal(refKey, testKey) { t.Fatalf("returned ref key, expected %x, got %x", testKey, refKey) @@ -86,7 +86,7 @@ func TestEncryptOneChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(hashFunc) + cache.SetWriter(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -96,7 +96,7 @@ func TestEncryptOneChunk(t *testing.T) { if err != nil { t.Fatal(err) } - eFunc.Connect(cacheFunc) + eFunc.SetWriter(cacheFunc) eFunc.Init(ctx, errFunc) return eFunc } @@ -104,8 +104,8 @@ func TestEncryptOneChunk(t *testing.T) { _, data := testutil.SerialData(chunkSize, 255, 0) h := hasher.New(encryptFunc) h.Init(ctx, func(error) {}) - h.Write(0, data) - doubleRef := h.Sum(nil, 0, nil) + h.Write(data) //0 + doubleRef := h.Sum(nil) enc := encryption.New(testKey, 0, 42, sha3.NewLegacyKeccak256) cipherText, err := enc.Encrypt(data) @@ -120,8 +120,8 @@ func TestEncryptOneChunk(t *testing.T) { bmtTreePool := bmt.NewTreePool(sha3.NewLegacyKeccak256, branches, bmt.PoolSize) hc := bmt.New(bmtTreePool) - span := bmt.LengthToSpan(len(cipherText)) - hc.ResetWithLength(span) + hc.Reset() + hc.SetLength(len(cipherText)) hc.Write(cipherText) cipherRef := hc.Sum(nil) dataRef := doubleRef[encryption.KeyLength:] @@ -139,7 +139,7 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(hashFunc) + cache.SetWriter(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -151,9 +151,9 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { e.Init(ctx, errFunc) _, data := testutil.SerialData(chunkSize, 255, 0) - e.Write(0, data) - span := bmt.LengthToSpan(chunkSize) - e.Sum(nil, chunkSize, span) + e.Write(data) // 0 + e.SetLength(chunkSize) + e.Sum(nil) cacheCopy := make([]byte, chunkSize) copy(cacheCopy, cache.Get(0)) @@ -166,9 +166,11 @@ func TestEncryptChunkWholeAndSections(t *testing.T) { e.Init(ctx, errFunc) for i := 0; i < chunkSize; i += sectionSize { - e.Write(i/sectionSize, data[i:i+sectionSize]) + e.SeekSection(i / sectionSize) + e.Write(data[i : i+sectionSize]) } - e.Sum(nil, chunkSize, span) + e.SetLength(chunkSize) + e.Sum(nil) for i := 0; i < chunkSize; i += sectionSize { chunked := cacheCopy[i : i+sectionSize] @@ -191,7 +193,7 @@ func TestEncryptIntermediateChunk(t *testing.T) { cache := testutillocal.NewCache() cache.Init(ctx, errFunc) - cache.Connect(hashFunc) + cache.SetWriter(hashFunc) cacheFunc := func(_ context.Context) param.SectionWriter { return cache } @@ -209,10 +211,11 @@ func TestEncryptIntermediateChunk(t *testing.T) { _, data := testutil.SerialData(chunkSize*branches, 255, 0) for i := 0; i < chunkSize*branches; i += chunkSize { - h.Write(i/chunkSize, data[i:i+chunkSize]) + h.SeekSection(i / chunkSize) + h.Write(data[i : i+chunkSize]) } - span := bmt.LengthToSpan(chunkSize * branches) - ref := h.Sum(nil, chunkSize*branches, span) + h.SetLength(chunkSize * branches) + ref := h.Sum(nil) select { case <-ctx.Done(): t.Fatalf("ctx done: %v", ctx.Err()) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index fd9ac725f7..893b67e611 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -127,9 +127,8 @@ func (d *dummySectionWriter) SetWriter(_ param.SectionWriterFunc) param.SectionW } // implements param.SectionWriter -func (d *dummySectionWriter) Seek(offset int64, whence int) (int64, error) { - d.index = int(offset) - return offset, nil +func (d *dummySectionWriter) SeekSection(offset int) { + d.index = offset } // implements param.SectionWriter @@ -222,9 +221,9 @@ func TestDummySectionWriter(t *testing.T) { _, data := testutil.SerialData(sectionSize*2, 255, 0) - w.Seek(int64(branches), 0) + w.SeekSection(branches) w.Write(data[:sectionSize]) - w.Seek(int64(branches+1), 0) + w.SeekSection(branches + 1) w.Write(data[sectionSize:]) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) @@ -239,7 +238,7 @@ func TestDummySectionWriter(t *testing.T) { w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) w.Reset() - w.Seek(int64(branches/2), 0) + w.SeekSection(branches) w.Write(data) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index 46e7e11334..f59ea6dbf9 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -59,7 +59,7 @@ func (h *Hasher) Write(b []byte) (int, error) { } go func(i int, jb *job) { hasher := h.params.GetWriter() - hasher.Seek(-1, 0) + hasher.SeekSection(-1) hasher.Write(b) l := len(b) log.Trace("data write", "count", i, "size", l) @@ -91,8 +91,8 @@ func (h *Hasher) SetLength(length int) { } // Seek implements io.Seeker in param.SectionWriter -func (h *Hasher) Seek(offset int64, whence int) (int64, error) { - return int64(h.size), errors.New("Hasher cannot seek") +func (h *Hasher) SeekSection(offset int) { + h.errFunc(errors.New("Hasher cannot seek")) } // Reset implements param.SectionWriter diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 1bad9b6d0d..463482a919 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -24,7 +24,6 @@ func TestHasherJobTopHash(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { - h.Seek(int64(i*h.SectionSize()), 0) h.Write(data[i : i+chunkSize]) } h.Sum(nil) @@ -47,7 +46,6 @@ func TestHasherOneFullChunk(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches; i += chunkSize { - h.Seek(int64(i*h.SectionSize()), 0) h.Write(data[i : i+chunkSize]) } ref := h.Sum(nil) @@ -69,7 +67,6 @@ func TestHasherJobChange(t *testing.T) { h.Init(ctx, logErrFunc) jobs := make(map[string]int) for i := 0; i < chunkSize*branches*branches; i += chunkSize { - h.Seek(int64(i*h.SectionSize()), 0) h.Write(data[i : i+chunkSize]) jobs[h.job.String()]++ } @@ -96,7 +93,6 @@ func TestHasherOneFullLevelOneChunk(t *testing.T) { h.Init(ctx, logErrFunc) var i int for i = 0; i < chunkSize*branches*branches; i += chunkSize { - h.Seek(int64(i*h.SectionSize()), 0) h.Write(data[i : i+chunkSize]) } ref := h.Sum(nil) @@ -124,7 +120,6 @@ func TestHasherVector(t *testing.T) { if dataLength-j < chunkSize { size = dataLength - j } - h.Seek(int64(j*h.SectionSize()), 0) h.Write(data[j : j+size]) } ref := h.Sum(nil) @@ -169,7 +164,6 @@ func benchmarkHasher(b *testing.B) { if dataLength-i < chunkSize { size = dataLength - i } - h.Seek(int64(i*h.SectionSize()), 0) h.Write(data[i : i+size]) } h.Sum(nil) diff --git a/file/hasher/job.go b/file/hasher/job.go index 7342c164f0..7d0f6c90db 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -165,7 +165,7 @@ OUTER: idx := entry.index + i data := entry.data[offset : offset+jb.writer.SectionSize()] log.Trace("job write", "datasection", jb.dataSection, "level", jb.level, "processCount", oldProcessCount+i, "endcount", endCount, "index", entry.index+i, "data", hexutil.Encode(data)) - jb.writer.Seek(int64(idx*jb.writer.SectionSize()), 0) + jb.writer.SeekSection(idx) jb.writer.Write(data) offset += jb.writer.SectionSize() } diff --git a/file/split.go b/file/split.go index 21882a4a4f..fdcf213e57 100644 --- a/file/split.go +++ b/file/split.go @@ -39,9 +39,9 @@ func (s *Splitter) Split() ([]byte, error) { return nil, err } log.Trace("split read", "c", c, "wc", c, "l", l) - s.w.Write(wc, d) + s.w.Write(d) wc++ l += c } - return s.w.Sum(nil, 0, nil), nil + return s.w.Sum(nil), nil } diff --git a/file/store/store.go b/file/store/store.go index f149ef1547..de14053e35 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -3,6 +3,7 @@ package store import ( "context" + "github.com/ethersphere/swarm/bmt" "github.com/ethersphere/swarm/chunk" "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" @@ -17,6 +18,7 @@ type FileStore struct { w param.SectionWriter ctx context.Context data [][]byte + length int errFunc func(error) } @@ -29,7 +31,7 @@ func New(chunkStore chunk.Store, writerFunc param.SectionWriterFunc) *FileStore return f } -func (f *FileStore) Connect(hashFunc param.SectionWriterFunc) param.SectionWriter { +func (f *FileStore) SetWriter(hashFunc param.SectionWriterFunc) param.SectionWriter { f.w = hashFunc(f.ctx) return f } @@ -41,23 +43,29 @@ func (f *FileStore) Init(ctx context.Context, errFunc func(error)) { } // Reset implements param.SectionWriter -func (f *FileStore) Reset(ctx context.Context) { - f.ctx = ctx +func (f *FileStore) Reset() { + f.length = 0 + f.data = [][]byte{} + f.w.Reset() +} + +func (f *FileStore) SeekSection(index int) { + f.w.SeekSection(index) } // Write implements param.SectionWriter // it asynchronously writes to the underlying writer while caching the data slice -func (f *FileStore) Write(index int, b []byte) { - f.w.Write(index, b) +func (f *FileStore) Write(b []byte) (int, error) { f.data = append(f.data, b) + return f.w.Write(b) } // Sum implements param.SectionWriter // calls underlying writer's Sum and sends the result with data as a chunk to chunk.Store -func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { - ref := f.w.Sum(b, length, span) +func (f *FileStore) Sum(b []byte) []byte { + ref := f.w.Sum(b) go func(ref []byte) { - b = span + b = bmt.LengthToSpan(f.length) for _, data := range f.data { b = append(b, data...) } @@ -71,14 +79,24 @@ func (f *FileStore) Sum(b []byte, length int, span []byte) []byte { return ref } +func (f *FileStore) SetLength(length int) { + f.length = length + f.w.SetLength(length) +} + +// SectionSize implements param.SectionWriter +func (f *FileStore) BlockSize() int { + return f.w.BlockSize() +} + // SectionSize implements param.SectionWriter func (f *FileStore) SectionSize() int { return f.w.SectionSize() } // DigestSize implements param.SectionWriter -func (f *FileStore) DigestSize() int { - return f.w.DigestSize() +func (f *FileStore) Size() int { + return f.w.Size() } // Branches implements param.SectionWriter diff --git a/file/store/store_test.go b/file/store/store_test.go index 359cd29d37..431992c6f0 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -65,9 +65,11 @@ func TestStoreWithHasher(t *testing.T) { span := bmt.LengthToSpan(chunkSize) go func() { for i := 0; i < chunkSize; i += sectionSize { - h.Write(i/sectionSize, data[i:i+sectionSize]) + h.SeekSection(i / sectionSize) + h.Write(data[i : i+sectionSize]) } - h.Sum(nil, chunkSize, span) + h.SetLength(chunkSize) + h.Sum(nil) }() // capture chunk and verify contents diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index 09ee85e655..4c3524ce32 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -2,7 +2,6 @@ package testutillocal import ( "context" - "errors" "github.com/ethersphere/swarm/param" ) @@ -39,15 +38,11 @@ func (c *Cache) SetLength(length int) { } -func (c *Cache) Seek(offset int64, whence int) (int64, error) { - if whence > 0 { - return 0, errors.New("whence for Cache.Seek not implemented") - } - c.index = int(offset) / c.SectionSize() +func (c *Cache) SeekSection(offset int) { + c.index = offset if c.w != nil { - return c.w.Seek(offset, whence) + c.w.SeekSection(offset) } - return int64(c.index), nil } func (c *Cache) Write(b []byte) (int, error) { diff --git a/file/testutillocal/cache_test.go b/file/testutillocal/cache_test.go index 6300a8f3ea..cf43f0d3cb 100644 --- a/file/testutillocal/cache_test.go +++ b/file/testutillocal/cache_test.go @@ -37,7 +37,7 @@ func TestCacheLink(t *testing.T) { c.Init(context.Background(), func(error) {}) c.SetWriter(hashFunc) _, data := testutil.SerialData(chunkSize, 255, 0) - c.Seek(-1, 0) + c.SeekSection(-1) c.Write(data) ref := c.Sum(nil) refHex := hexutil.Encode(ref) From d64359a96f5c033007c90845df0db20d32bf3aff Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 09:32:36 +0100 Subject: [PATCH 61/67] bmt: Set lock across SeekSection() and Write() for async --- bmt/bmt.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 6391a769ee..b850becb1c 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -517,12 +517,18 @@ func (sw *AsyncHasher) Branches() int { return sw.seccount } +func (sw *AsyncHasher) SeekSection(offset int) { + sw.mtx.Lock() + sw.Hasher.SeekSection(offset) +} + // Write writes to the current position cursor of the Hasher // The cursor must be manually set with SeekSection(). // The method will NOT advance the cursor. // // Implements hash.hash in param.SectionWriter func (sw *AsyncHasher) Write(section []byte) (int, error) { + defer sw.mtx.Unlock() sw.Hasher.size += len(section) return sw.writeSection(sw.Hasher.cursor, section) } @@ -540,8 +546,7 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { sw.all = true return len(section), nil } - sw.mtx.Lock() - defer sw.mtx.Unlock() + //sw.mtx.Lock() t := sw.getTree() // cursor keeps track of the rightmost section written so far // if index is lower than cursor then just write non-final section as is From 9dc23510f830fc86a860eae357a87270276b650c Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 11:08:21 +0100 Subject: [PATCH 62/67] file: Fix dummySectionWriter test fails --- file/hasher/common_test.go | 18 +++++++++++++----- file/hasher/hasher_test.go | 1 + file/hasher/job_test.go | 6 +++--- file/hasher/param.go | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 893b67e611..5295605821 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -3,6 +3,7 @@ package hasher import ( "bytes" "context" + "encoding/binary" "hash" "sync" "testing" @@ -100,6 +101,7 @@ type dummySectionWriter struct { data []byte digest []byte size int + span []byte summed bool index int writer hash.Hash @@ -128,12 +130,13 @@ func (d *dummySectionWriter) SetWriter(_ param.SectionWriterFunc) param.SectionW // implements param.SectionWriter func (d *dummySectionWriter) SeekSection(offset int) { - d.index = offset + d.index = offset * d.SectionSize() } // implements param.SectionWriter func (d *dummySectionWriter) SetLength(length int) { - d.size = length + d.span = make([]byte, 8) + binary.LittleEndian.PutUint64(d.span, uint64(length)) } // implements param.SectionWriter @@ -169,6 +172,8 @@ func (d *dummySectionWriter) Sum(_ []byte) []byte { func (d *dummySectionWriter) sum() { d.mu.Lock() defer d.mu.Unlock() + d.writer.Write(d.span) + log.Trace("dummy sum writing span", "span", d.span) for i := 0; i < d.size; i += d.writer.Size() { sectionData := d.data[i : i+d.writer.Size()] log.Trace("dummy sum write", "i", i/d.writer.Size(), "data", hexutil.Encode(sectionData), "size", d.size) @@ -186,6 +191,7 @@ func (d *dummySectionWriter) Reset() { d.digest = make([]byte, d.digestSize) d.size = 0 d.summed = false + d.span = nil d.writer.Reset() } @@ -226,10 +232,11 @@ func TestDummySectionWriter(t *testing.T) { w.SeekSection(branches + 1) w.Write(data[sectionSize:]) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" + w.SetLength(chunkSize * 2) digest := w.Sum(nil) digestHex := hexutil.Encode(digest) if digestHex != correctDigestHex { @@ -238,13 +245,14 @@ func TestDummySectionWriter(t *testing.T) { w = newDummySectionWriter(chunkSize*2, sectionSize*2, sectionSize*2, branches/2) w.Reset() - w.SeekSection(branches) + w.SeekSection(branches / 2) w.Write(data) if !bytes.Equal(w.data[chunkSize:chunkSize+sectionSize*2], data) { - t.Fatalf("Write pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) + t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } correctDigestHex += zeroHex + w.SetLength(chunkSize * 2) digest = w.Sum(nil) digestHex = hexutil.Encode(digest) if digestHex != correctDigestHex { diff --git a/file/hasher/hasher_test.go b/file/hasher/hasher_test.go index 463482a919..11a7a080c8 100644 --- a/file/hasher/hasher_test.go +++ b/file/hasher/hasher_test.go @@ -122,6 +122,7 @@ func TestHasherVector(t *testing.T) { } h.Write(data[j : j+size]) } + //h.SetLength(dataLength) ref := h.Sum(nil) correctRefHex := "0x" + expected[i] refHex := hexutil.Encode(ref) diff --git a/file/hasher/job_test.go b/file/hasher/job_test.go index 4f53ac2c39..6d4cbaa157 100644 --- a/file/hasher/job_test.go +++ b/file/hasher/job_test.go @@ -260,10 +260,10 @@ func TestJobWriteTwoAndFinish(t *testing.T) { defer cancel() select { case ref := <-tgt.Done(): - correctRefHex := "0x002030bde3d4cf89919649775cd71875c4d0ab1708a380e03fefc3a28aa24831" + correctRefHex := "0xe1553e1a3a6b73f96e6fc48318895e401e7db2972962ee934633fa8b3eaaf78b" refHex := hexutil.Encode(ref) if refHex != correctRefHex { - t.Fatalf("job write full: expected %s, got %s", correctRefHex, refHex) + t.Fatalf("job write two and finish: expected %s, got %s", correctRefHex, refHex) } case <-ctx.Done(): t.Fatalf("timeout: %v", ctx.Err()) @@ -333,7 +333,7 @@ func TestJobWriteParentSection(t *testing.T) { if jbnp.count() != 1 { t.Fatalf("parent count: expected %d, got %d", 1, jbnp.count()) } - correctRefHex := "0x002030bde3d4cf89919649775cd71875c4d0ab1708a380e03fefc3a28aa24831" + correctRefHex := "0xe1553e1a3a6b73f96e6fc48318895e401e7db2972962ee934633fa8b3eaaf78b" // extract data in section 2 from the writer // TODO: overload writer to provide a get method to extract data to improve clarity diff --git a/file/hasher/param.go b/file/hasher/param.go index 7adaa17eb9..33210ae835 100644 --- a/file/hasher/param.go +++ b/file/hasher/param.go @@ -34,7 +34,7 @@ func newTreeParams(hashFunc param.SectionWriterFunc) *treeParams { log.Trace("new tree params", "sectionsize", p.SectionSize, "branches", p.Branches, "chunksize", p.ChunkSize) p.writerPool.New = func() interface{} { hf := p.hashFunc(p.ctx) - log.Trace("param new hasher", "h", hf) + //log.Trace("param new hasher", "h", hf) return hf } p.Spans = generateSpanSizes(p.Branches, 9) From fad18c144aa3f413dbb0b447ef89b96392fc008e Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 13:11:44 +0100 Subject: [PATCH 63/67] bmt, file, param: Make all tests in file/hasher pass --- bmt/bmt.go | 6 +++++- file/hasher/common_test.go | 9 ++++++++- file/hasher/hasher.go | 3 +++ file/hasher/job.go | 3 ++- file/hasher/reference.go | 2 +- file/hasher/reference_test.go | 8 ++++---- file/store/store.go | 12 ++++++++---- file/store/store_test.go | 1 + file/testutillocal/cache.go | 7 ++++++- param/io.go | 1 + 10 files changed, 39 insertions(+), 13 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index b850becb1c..44c0e7355b 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -300,8 +300,12 @@ func (h *Hasher) SectionSize() int { } func (h *Hasher) SetLength(length int) { - h.jobSize = (length-1)%h.pool.Size + 1 + h.jobSize = length //(length-1)%h.pool.Size + 1 +} + +func (h *Hasher) SetSpan(length int) { span := LengthToSpan(length) + log.Trace("setlength", "span", span, "length", length) h.getTree().span = span } diff --git a/file/hasher/common_test.go b/file/hasher/common_test.go index 5295605821..fdb7a817d1 100644 --- a/file/hasher/common_test.go +++ b/file/hasher/common_test.go @@ -135,6 +135,11 @@ func (d *dummySectionWriter) SeekSection(offset int) { // implements param.SectionWriter func (d *dummySectionWriter) SetLength(length int) { + d.size = length +} + +// implements param.SectionWriter +func (d *dummySectionWriter) SetSpan(length int) { d.span = make([]byte, 8) binary.LittleEndian.PutUint64(d.span, uint64(length)) } @@ -235,8 +240,9 @@ func TestDummySectionWriter(t *testing.T) { t.Fatalf("Write double pos %d: expected %x, got %x", chunkSize, w.data[chunkSize:chunkSize+sectionSize*2], data) } - correctDigestHex := "0xfbc16f6db3534b456cb257d00148127f69909000c89f8ce5bc6183493ef01da1" + correctDigestHex := "0x52eefd0c37895a8845d4a6cf6c6b56980e448376e55eb45717663ab7b3fc8d53" w.SetLength(chunkSize * 2) + w.SetSpan(chunkSize * 2) digest := w.Sum(nil) digestHex := hexutil.Encode(digest) if digestHex != correctDigestHex { @@ -253,6 +259,7 @@ func TestDummySectionWriter(t *testing.T) { correctDigestHex += zeroHex w.SetLength(chunkSize * 2) + w.SetSpan(chunkSize * 2) digest = w.Sum(nil) digestHex = hexutil.Encode(digest) if digestHex != correctDigestHex { diff --git a/file/hasher/hasher.go b/file/hasher/hasher.go index f59ea6dbf9..8ed47403fe 100644 --- a/file/hasher/hasher.go +++ b/file/hasher/hasher.go @@ -86,6 +86,9 @@ func (h *Hasher) Sum(b []byte) []byte { return append(b, ref...) } +func (h *Hasher) SetSpan(length int) { +} + func (h *Hasher) SetLength(length int) { h.size = length } diff --git a/file/hasher/job.go b/file/hasher/job.go index 7d0f6c90db..1ada233210 100644 --- a/file/hasher/job.go +++ b/file/hasher/job.go @@ -224,7 +224,8 @@ func (jb *job) sum() { size := jb.size() //span := bmt.LengthToSpan(size) refSize := jb.count() * jb.params.SectionSize - jb.writer.SetLength(size) + jb.writer.SetLength(refSize) + jb.writer.SetSpan(size) log.Trace("job sum", "count", jb.count(), "refsize", refSize, "size", size, "datasection", jb.dataSection, "level", jb.level, "targetlevel", targetLevel, "endcount", jb.endCount) ref := jb.writer.Sum(nil) diff --git a/file/hasher/reference.go b/file/hasher/reference.go index 9a6ce31836..8c316ec618 100644 --- a/file/hasher/reference.go +++ b/file/hasher/reference.go @@ -68,7 +68,7 @@ func (r *ReferenceHasher) sum(lvl int) []byte { toSumSize := r.cursors[lvl] - r.cursors[lvl+1] r.hasher.Reset() - r.hasher.SetLength(span) + r.hasher.SetSpan(span) r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) ref := r.hasher.Sum(nil) return ref diff --git a/file/hasher/reference_test.go b/file/hasher/reference_test.go index 91fbfeb1a5..a72999874e 100644 --- a/file/hasher/reference_test.go +++ b/file/hasher/reference_test.go @@ -35,7 +35,7 @@ func TestManualDanglingChunk(t *testing.T) { _, levels[0] = testutil.SerialData(chunkSize*branches+chunkSize, 255, 0) for i := 0; i < chunkSize*branches; i += chunkSize { h.Reset() - h.SetLength(chunkSize) + h.SetSpan(chunkSize) h.Write(levels[0][i : i+chunkSize]) copy(levels[1][i/branches:], h.Sum(nil)) } @@ -48,7 +48,7 @@ func TestManualDanglingChunk(t *testing.T) { // write the dangling chunk // hash it and write the reference on the second section of level 2 h.Reset() - h.SetLength(chunkSize) + h.SetSpan(chunkSize) h.Write(levels[0][chunkSize*branches:]) copy(levels[2][sectionSize:], h.Sum(nil)) refHex = hexutil.Encode(levels[2][sectionSize:]) @@ -59,7 +59,7 @@ func TestManualDanglingChunk(t *testing.T) { // hash the chunk on level 1 and write into the first section of level 2 h.Reset() - h.SetLength(chunkSize * branches) + h.SetSpan(chunkSize * branches) h.Write(levels[1]) copy(levels[2], h.Sum(nil)) refHex = hexutil.Encode(levels[2][:sectionSize]) @@ -70,7 +70,7 @@ func TestManualDanglingChunk(t *testing.T) { // hash the two sections on level 2 to obtain the root hash h.Reset() - h.SetLength(chunkSize*branches + chunkSize) + h.SetSpan(chunkSize*branches + chunkSize) h.Write(levels[2]) ref := h.Sum(nil) refHex = hexutil.Encode(ref) diff --git a/file/store/store.go b/file/store/store.go index de14053e35..8d238d71ef 100644 --- a/file/store/store.go +++ b/file/store/store.go @@ -18,7 +18,7 @@ type FileStore struct { w param.SectionWriter ctx context.Context data [][]byte - length int + span int errFunc func(error) } @@ -44,7 +44,7 @@ func (f *FileStore) Init(ctx context.Context, errFunc func(error)) { // Reset implements param.SectionWriter func (f *FileStore) Reset() { - f.length = 0 + f.span = 0 f.data = [][]byte{} f.w.Reset() } @@ -65,7 +65,7 @@ func (f *FileStore) Write(b []byte) (int, error) { func (f *FileStore) Sum(b []byte) []byte { ref := f.w.Sum(b) go func(ref []byte) { - b = bmt.LengthToSpan(f.length) + b = bmt.LengthToSpan(f.span) for _, data := range f.data { b = append(b, data...) } @@ -79,8 +79,12 @@ func (f *FileStore) Sum(b []byte) []byte { return ref } +func (f *FileStore) SetSpan(length int) { + f.span = length + f.w.SetSpan(length) +} + func (f *FileStore) SetLength(length int) { - f.length = length f.w.SetLength(length) } diff --git a/file/store/store_test.go b/file/store/store_test.go index 431992c6f0..4c3620df23 100644 --- a/file/store/store_test.go +++ b/file/store/store_test.go @@ -68,6 +68,7 @@ func TestStoreWithHasher(t *testing.T) { h.SeekSection(i / sectionSize) h.Write(data[i : i+sectionSize]) } + h.SetSpan(chunkSize) h.SetLength(chunkSize) h.Sum(nil) }() diff --git a/file/testutillocal/cache.go b/file/testutillocal/cache.go index 4c3524ce32..7b2968139b 100644 --- a/file/testutillocal/cache.go +++ b/file/testutillocal/cache.go @@ -31,11 +31,16 @@ func (c *Cache) SetWriter(writeFunc param.SectionWriterFunc) param.SectionWriter return c } +func (c *Cache) SetSpan(length int) { + if c.w != nil { + c.w.SetSpan(length) + } +} + func (c *Cache) SetLength(length int) { if c.w != nil { c.w.SetLength(length) } - } func (c *Cache) SeekSection(offset int) { diff --git a/param/io.go b/param/io.go index 68e00b5d53..6140d6319e 100644 --- a/param/io.go +++ b/param/io.go @@ -13,6 +13,7 @@ type SectionWriter interface { SeekSection(section int) Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination SetLength(length int) + SetSpan(length int) SectionSize() int // size of the async section unit to use Branches() int } From 94c6cb6b434a4c8a1cdd95b379b64e03e6f54512 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 13:57:46 +0100 Subject: [PATCH 64/67] bmt: rehabilitiate file/encrypt, make bmt tests pass BMT is now slower :/ --- bmt/bmt.go | 31 ++++++----- bmt/bmt_test.go | 110 +++++++--------------------------------- file/encrypt/encrypt.go | 6 +++ 3 files changed, 41 insertions(+), 106 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 44c0e7355b..53e22cc07b 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -84,11 +84,10 @@ type BaseHasherFunc func() hash.Hash // the tree and itself in a state reusable for hashing a new chunk // - generates and verifies segment inclusion proofs (TODO:) type Hasher struct { - pool *TreePool // BMT resource pool - bmt *tree // prebuilt BMT resource for flowcontrol and proofs - size int // bytes written to Hasher since last Reset() - jobSize int // size of data written in current session - cursor int // cursor to write to on next Write() call + pool *TreePool // BMT resource pool + bmt *tree // prebuilt BMT resource for flowcontrol and proofs + size int // bytes written to Hasher since last Reset() + cursor int // cursor to write to on next Write() call } // New creates a reusable BMT Hasher that @@ -300,12 +299,10 @@ func (h *Hasher) SectionSize() int { } func (h *Hasher) SetLength(length int) { - h.jobSize = length //(length-1)%h.pool.Size + 1 } func (h *Hasher) SetSpan(length int) { span := LengthToSpan(length) - log.Trace("setlength", "span", span, "length", length) h.getTree().span = span } @@ -353,17 +350,16 @@ func (h *Hasher) Sum(b []byte) (s []byte) { // wait for the result s = <-t.result if t.span == nil { - t.span = make([]byte, 8) - binary.LittleEndian.PutUint64(t.span, uint64(h.size)) + t.span = LengthToSpan(h.size) } span := t.span // release the tree resource back to the pool h.releaseTree() // b + sha3(span + BMT(pure_chunk)) //if len(span) == 0 { - if span == nil { - return append(b, s...) - } + //if span == nil { + // return append(b, s...) + //} return doSum(h.pool.hasher(), b, span, s) } @@ -421,7 +417,6 @@ func (h *Hasher) Write(b []byte) (int, error) { func (h *Hasher) Reset() { h.cursor = 0 h.size = 0 - h.jobSize = 0 h.releaseTree() } @@ -466,6 +461,7 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { secsize: secsize, seccount: seccount, write: write, + jobSize: 0, } } @@ -491,6 +487,7 @@ type AsyncHasher struct { seccount int // base section count write func(i int, section []byte, final bool) all bool // if all written in one go, temporary workaround + jobSize int } // Implements param.SectionWriter @@ -499,10 +496,15 @@ func (sw *AsyncHasher) Init(_ context.Context, errFunc func(error)) { // Implements param.SectionWriter func (sw *AsyncHasher) Reset() { + sw.jobSize = 0 sw.all = false sw.Hasher.Reset() } +func (sw *AsyncHasher) SetLength(length int) { + sw.jobSize = length +} + // Implements param.SectionWriter func (sw *AsyncHasher) SetWriter(_ param.SectionWriterFunc) param.SectionWriter { log.Warn("Asynchasher does not currently support SectionWriter chaining") @@ -598,10 +600,11 @@ func (sw *AsyncHasher) Sum(b []byte) (s []byte) { } sw.mtx.Lock() t := sw.getTree() - length := int(sw.Hasher.jobSize) + length := sw.jobSize if length == 0 { sw.mtx.Unlock() s = sw.pool.zerohashes[sw.pool.Depth] + return } else { // for non-zero input the rightmost section is written to the tree asynchronously // if the actual last section has been written (t.cursor == length/t.secsize) diff --git a/bmt/bmt_test.go b/bmt/bmt_test.go index 0f8489e29a..1cfd611a22 100644 --- a/bmt/bmt_test.go +++ b/bmt/bmt_test.go @@ -26,7 +26,6 @@ import ( "testing" "time" - "github.com/ethersphere/swarm/log" "github.com/ethersphere/swarm/param" "github.com/ethersphere/swarm/testutil" "golang.org/x/crypto/sha3" @@ -322,7 +321,7 @@ func TestBMTWriterBuffers(t *testing.T) { return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read) } } - bmt.SetLength(0) + bmt.SetSpan(0) hash := bmt.Sum(nil) if !bytes.Equal(hash, expHash) { return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash) @@ -365,7 +364,6 @@ func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, coun data := d[:n] rbmt := NewRefHasher(hasher, count) var exp []byte - log.Trace("correct", "n", n, "count", count, "depth", bmt.pool.Depth) if n == 0 { exp = bmt.pool.zerohashes[bmt.pool.Depth] } else { @@ -542,7 +540,7 @@ func benchmarkRefHasher(t *testing.B, n int) { // Hash hashes the data and the span using the bmt hasher func syncHash(h *Hasher, spanLength int, data []byte) []byte { h.Reset() - h.SetLength(spanLength) + h.SetSpan(spanLength) h.Write(data) return h.Sum(nil) } @@ -581,14 +579,14 @@ func asyncHashRandom(bmt param.SectionWriter, spanLength int, data []byte, wh wh func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { bmt.Reset() if l == 0 { - bmt.SetLength(spanLength) - bmt.(*AsyncHasher).Hasher.jobSize = l + bmt.SetLength(l) + bmt.SetSpan(spanLength) return bmt.Sum(nil) } c := make(chan []byte, 1) hashf := func() { - bmt.SetLength(spanLength) - bmt.(*AsyncHasher).Hasher.jobSize = l + bmt.SetLength(l) + bmt.SetSpan(spanLength) c <- bmt.Sum(nil) } maxsize := len(idxs) @@ -604,100 +602,28 @@ func asyncHash(bmt param.SectionWriter, spanLength int, l int, wh whenHash, idxs } } if wh == last { - bmt.SetLength(spanLength) - bmt.(*AsyncHasher).Hasher.jobSize = l + bmt.SetLength(l) + bmt.SetSpan(spanLength) return bmt.Sum(nil) } return <-c } -// TestHashSpanCases verifies that span and size is set automatically even if SetLength() is not explicitly called -func TestHashSpanCases(t *testing.T) { +// TestUseSyncAsOrdinaryHasher verifies that the bmt.Hasher can be used with the hash.Hash interface +func TestUseSyncAsOrdinaryHasher(t *testing.T) { hasher := sha3.NewLegacyKeccak256 - pool := NewTreePool(hasher, 128, PoolSize) - zeroHash := pool.zerohashes[7] - refRes := zeroHash - - // check that SetLength(0) is equivalent to no Write() in all cases - h := New(pool) - res := h.Sum(nil) - if !bytes.Equal(refRes, res) { - t.Fatalf("nilspan vs zerohash; expected %x, got %x", refRes, res) - } - h.Reset() - h.SetLength(0) - res = h.Sum(nil) - if !bytes.Equal(refRes, res) { - t.Fatalf("length 0 vs zerohash; expected %x, got %x", refRes, res) - } - h.Reset() - h.Write([]byte("foo")) - h.SetLength(0) - res = h.Sum(nil) + pool := NewTreePool(hasher, segmentCount, PoolSize) + bmt := New(pool) + bmt.Write([]byte("foo")) + res := bmt.Sum(nil) refh := NewRefHasher(hasher, 128) resh := refh.Hash([]byte("foo")) hsub := hasher() - hsub.Write(zeroSpan) - hsub.Write(resh) - refRes = hsub.Sum(nil) - if !bytes.Equal(refRes, res) { - t.Fatalf("length 0 overwrite vs zerohash; expected %x, got %x", refRes, res) - } - - // span and length is automatically set if SetLength() is not called - h.Reset() - h.Write([]byte("foo")) - resNoLength := h.Sum(nil) - - h.Reset() - h.Write([]byte("foo")) - h.SetLength(3) - resLength := h.Sum(nil) - - if !bytes.Equal(resLength, resNoLength) { - t.Fatalf("foo length %d, expected %x, got %x", 3, resLength, resNoLength) - } - - h.Reset() - h.Write([]byte("foo")) - h.SetLength(4) - resLength = h.Sum(nil) - if bytes.Equal(resLength, resNoLength) { - t.Fatalf("foo length %d; unexpected %x == %x", 4, resLength, resNoLength) - } - - // correct length is calculated when span exceeds size of bottom tree level - h.Reset() - h.Write([]byte("foo")) - h.SetLength(4096 + 3) - res = h.Sum(nil) - refh = NewRefHasher(hasher, 128) - resh = refh.Hash([]byte("foo")) - hsub = hasher() - span := make([]byte, 8) - binary.LittleEndian.PutUint64(span, 4096+3) + span := LengthToSpan(3) hsub.Write(span) hsub.Write(resh) - refRes = hsub.Sum(nil) - - if !bytes.Equal(refRes, res) { - t.Fatalf("foo length %d, expected %x, got %x", 4096+3, refRes, res) - } - - h.Reset() - h.Write([]byte("foo")) - h.SetLength(4096 + 4) - res = h.Sum(nil) - refh = NewRefHasher(hasher, 128) - resh = refh.Hash([]byte("foo")) - hsub = hasher() - span = make([]byte, 8) - binary.LittleEndian.PutUint64(span, 4096+4) - hsub.Write(span) - hsub.Write(resh) - refRes = hsub.Sum(nil) - - if !bytes.Equal(refRes, res) { - t.Fatalf("foo length %d; expected %x, got %x", 4096+4, refRes, res) + refRes := hsub.Sum(nil) + if !bytes.Equal(res, refRes) { + t.Fatalf("normalhash; expected %x, got %x", refRes, res) } } diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index f7f34ccb74..22783359d6 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -17,6 +17,7 @@ type Encrypt struct { e encryption.Encryption w param.SectionWriter length int + span int keyHash hash.Hash errFunc func(error) } @@ -76,6 +77,11 @@ func (e *Encrypt) SetLength(length int) { e.w.SetLength(length) } +func (e *Encrypt) SetSpan(length int) { + e.span = length + e.w.SetSpan(length) +} + func (e *Encrypt) Sum(b []byte) []byte { // derive new key oldKey := make([]byte, encryption.KeyLength) From 12355cbe77665a00d112f3ccb6cec1d4f4a48a92 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 14:36:52 +0100 Subject: [PATCH 65/67] storage, bmt: Replace ResetWithLength with param.SectionWriter.SpanBytes --- bmt/bmt.go | 7 +++++ storage/chunker_test.go | 3 +- storage/common_test.go | 3 +- storage/encryption/encryption.go | 14 ++------- storage/encryption/encryption_test.go | 42 --------------------------- storage/hasherstore.go | 6 ++-- storage/swarmhasher.go | 14 ++------- storage/types.go | 6 ++-- 8 files changed, 22 insertions(+), 73 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index 53e22cc07b..eaf7dae697 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -298,14 +298,21 @@ func (h *Hasher) SectionSize() int { return h.pool.SegmentSize } +// Implements param.SectionWriter func (h *Hasher) SetLength(length int) { } +// Implements param.SectionWriter func (h *Hasher) SetSpan(length int) { span := LengthToSpan(length) h.getTree().span = span } +// Implements storage.SwarmHash +func (h *Hasher) SetSpanBytes(b []byte) { + +} + // Implements param.SectionWriter func (h *Hasher) Branches() int { return h.pool.SegmentCount diff --git a/storage/chunker_test.go b/storage/chunker_test.go index fd1af937f2..3e1158d13f 100644 --- a/storage/chunker_test.go +++ b/storage/chunker_test.go @@ -151,7 +151,8 @@ func TestSha3ForCorrectness(t *testing.T) { rawSha3Output := rawSha3.Sum(nil) sha3FromMakeFunc := MakeHashFunc(SHA3Hash)() - sha3FromMakeFunc.ResetWithLength(input[:8]) + sha3FromMakeFunc.Reset() + sha3FromMakeFunc.SetSpanBytes(input[:8]) sha3FromMakeFunc.Write(input[8:]) sha3FromMakeFuncOutput := sha3FromMakeFunc.Sum(nil) diff --git a/storage/common_test.go b/storage/common_test.go index a65a686943..e625cd8091 100644 --- a/storage/common_test.go +++ b/storage/common_test.go @@ -151,7 +151,8 @@ func testStoreCorrect(m ChunkStore, n int, t *testing.T) { } hasher := MakeHashFunc(DefaultHash)() data := chunk.Data() - hasher.ResetWithLength(data[:8]) + hasher.Reset() + hasher.SetSpanBytes(data[:8]) hasher.Write(data[8:]) exp := hasher.Sum(nil) if !bytes.Equal(h, exp) { diff --git a/storage/encryption/encryption.go b/storage/encryption/encryption.go index a5ec2d5efa..6fbdab062b 100644 --- a/storage/encryption/encryption.go +++ b/storage/encryption/encryption.go @@ -31,14 +31,12 @@ type Key []byte type Encryption interface { Encrypt(data []byte) ([]byte, error) Decrypt(data []byte) ([]byte, error) - Reset() } type encryption struct { key Key // the encryption key (hashSize bytes long) keyLen int // length of the key = length of blockcipher block padding int // encryption will pad the data upto this if > 0 - index int // counter index initCtr uint32 // initial counter used for counter mode blockcipher hashFunc func() hash.Hash // hasher constructor function } @@ -81,24 +79,18 @@ func (e *encryption) Decrypt(data []byte) ([]byte, error) { return out, nil } -// Reset resets the counter. It is only safe to call after an encryption operation is completed -// After Reset is called, the Encryption object can be re-used for other data -func (e *encryption) Reset() { - e.index = 0 -} - -// split up input into keylength segments and encrypt sequentially +// func (e *encryption) transform(in, out []byte) { inLength := len(in) wg := sync.WaitGroup{} wg.Add((inLength-1)/e.keyLen + 1) for i := 0; i < inLength; i += e.keyLen { l := min(e.keyLen, inLength-i) + // call transformations per segment (asyncronously) go func(i int, x, y []byte) { defer wg.Done() e.Transcrypt(i, x, y) - }(e.index, in[i:i+l], out[i:i+l]) - e.index++ + }(i/e.keyLen, in[i:i+l], out[i:i+l]) } // pad the rest if out is longer pad(out[inLength:]) diff --git a/storage/encryption/encryption_test.go b/storage/encryption/encryption_test.go index 80ae3da4ef..c89ab184df 100644 --- a/storage/encryption/encryption_test.go +++ b/storage/encryption/encryption_test.go @@ -18,7 +18,6 @@ package encryption import ( "bytes" - crand "crypto/rand" "testing" "github.com/ethereum/go-ethereum/common" @@ -38,7 +37,6 @@ func init() { if err != nil { panic(err.Error()) } - testutil.Init() } func TestEncryptDataLongerThanPadding(t *testing.T) { @@ -134,7 +132,6 @@ func testEncryptDecryptIsIdentity(t *testing.T, padding int, initCtr uint32, dat t.Fatalf("Expected no error got %v", err) } - enc.Reset() decrypted, err := enc.Decrypt(encrypted) if err != nil { t.Fatalf("Expected no error got %v", err) @@ -152,42 +149,3 @@ func testEncryptDecryptIsIdentity(t *testing.T, padding int, initCtr uint32, dat t.Fatalf("Expected decrypted %v got %v", common.Bytes2Hex(data), common.Bytes2Hex(decrypted)) } } - -// TestEncryptSectioned tests that the cipherText is the same regardless of size of data input buffer -func TestEncryptSectioned(t *testing.T) { - data := make([]byte, 4096) - c, err := crand.Read(data) - if err != nil { - t.Fatal(err) - } - if c < 4096 { - t.Fatalf("short read %d", c) - } - - key := make([]byte, KeyLength) - c, err = crand.Read(key) - if err != nil { - t.Fatal(err) - } - if c < KeyLength { - t.Fatalf("short read %d", c) - } - - enc := New(key, 0, uint32(42), sha3.NewLegacyKeccak256) - whole, err := enc.Encrypt(data) - if err != nil { - t.Fatal(err) - } - - enc.Reset() - for i := 0; i < 4096; i += KeyLength { - cipher, err := enc.Encrypt(data[i : i+KeyLength]) - if err != nil { - t.Fatal(err) - } - wholeSection := whole[i : i+KeyLength] - if !bytes.Equal(cipher, wholeSection) { - t.Fatalf("index %d, expected %x, got %x", i/KeyLength, wholeSection, cipher) - } - } -} diff --git a/storage/hasherstore.go b/storage/hasherstore.go index b71d31a536..d81ffba5aa 100644 --- a/storage/hasherstore.go +++ b/storage/hasherstore.go @@ -18,7 +18,6 @@ package storage import ( "context" - "encoding/binary" "fmt" "sync" "sync/atomic" @@ -186,9 +185,8 @@ func (h *hasherStore) startWait(ctx context.Context) { func (h *hasherStore) createHash(chunkData ChunkData) Address { hasher := h.hashFunc() hasher.Reset() - lengthNumber := int(binary.LittleEndian.Uint64(chunkData[:8])) - hasher.SetLength(lengthNumber) // 8 bytes of length - hasher.Write(chunkData[8:]) // minus 8 []byte length + hasher.SetSpanBytes(chunkData[:8]) // 8 bytes of length + hasher.Write(chunkData[8:]) // minus 8 []byte length return hasher.Sum(nil) } diff --git a/storage/swarmhasher.go b/storage/swarmhasher.go index f67c9a09e8..0cbc12556c 100644 --- a/storage/swarmhasher.go +++ b/storage/swarmhasher.go @@ -18,8 +18,6 @@ package storage import ( "hash" - - "github.com/ethersphere/swarm/bmt" ) const ( @@ -30,20 +28,14 @@ const ( type SwarmHash interface { hash.Hash - //ResetWithLength([]byte) - SetLength(int) + SetSpanBytes([]byte) } type HashWithLength struct { hash.Hash } -//func (h *HashWithLength) ResetWithLength(length []byte) { -// h.Reset() -// h.Write(length) -//} -func (h *HashWithLength) SetLength(length int) { +func (h *HashWithLength) SetSpanBytes(length []byte) { h.Reset() - span := bmt.LengthToSpan(length) - h.Write(span) + h.Write(length) } diff --git a/storage/types.go b/storage/types.go index 3ff4d69bb0..9fa258495d 100644 --- a/storage/types.go +++ b/storage/types.go @@ -92,8 +92,9 @@ func GenerateRandomChunk(dataSize int64) Chunk { hasher := MakeHashFunc(DefaultHash)() sdata := make([]byte, dataSize+8) rand.Read(sdata[8:]) + binary.LittleEndian.PutUint64(sdata[:8], uint64(dataSize)) hasher.Reset() - hasher.SetLength(int(dataSize)) + hasher.SetSpanBytes(sdata[:8]) hasher.Write(sdata[8:]) return NewChunk(hasher.Sum(nil), sdata) } @@ -203,8 +204,7 @@ func (v *ContentAddressValidator) Validate(ch Chunk) bool { hasher := v.Hasher() hasher.Reset() - lengthNumber := int(binary.LittleEndian.Uint64(data[8:])) - hasher.SetLength(lengthNumber) + hasher.SetSpanBytes(data[:8]) hasher.Write(data[8:]) hash := hasher.Sum(nil) From 0fbe5f8d42abbbf4c7e59fcca68c6daca0ef76a8 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 15:00:57 +0100 Subject: [PATCH 66/67] bmt, param: Cleanup + implement SetSpanBytes() --- bmt/bmt.go | 32 +++++++++++++++++++------------- param/io.go | 14 +++++++------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index eaf7dae697..cb0204c0c0 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -20,6 +20,7 @@ package bmt import ( "context" "encoding/binary" + "errors" "fmt" "hash" "strings" @@ -310,7 +311,9 @@ func (h *Hasher) SetSpan(length int) { // Implements storage.SwarmHash func (h *Hasher) SetSpanBytes(b []byte) { - + t := h.getTree() + t.span = make([]byte, 8) + copy(t.span, b) } // Implements param.SectionWriter @@ -345,7 +348,6 @@ func (h *Hasher) BlockSize() int { // data before it calculates and returns the hash of the chunk // caller must make sure Sum is not called concurrently with Write, writeSection // Implements hash.Hash in param.SectionWriter -// TODO: if span is nil return the zero-hash func (h *Hasher) Sum(b []byte) (s []byte) { t := h.getTree() if h.size == 0 && t.offset == 0 { @@ -362,11 +364,6 @@ func (h *Hasher) Sum(b []byte) (s []byte) { span := t.span // release the tree resource back to the pool h.releaseTree() - // b + sha3(span + BMT(pure_chunk)) - //if len(span) == 0 { - //if span == nil { - // return append(b, s...) - //} return doSum(h.pool.hasher(), b, span, s) } @@ -469,6 +466,7 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher { seccount: seccount, write: write, jobSize: 0, + sought: true, } } @@ -493,16 +491,21 @@ type AsyncHasher struct { secsize int // size of base section (size of hash or double) seccount int // base section count write func(i int, section []byte, final bool) + errFunc func(error) all bool // if all written in one go, temporary workaround + sought bool jobSize int } // Implements param.SectionWriter +// TODO context should be implemented all across (ie original TODO in TreePool.reserve()) func (sw *AsyncHasher) Init(_ context.Context, errFunc func(error)) { + sw.errFunc = errFunc } // Implements param.SectionWriter func (sw *AsyncHasher) Reset() { + sw.sought = true sw.jobSize = 0 sw.all = false sw.Hasher.Reset() @@ -514,7 +517,7 @@ func (sw *AsyncHasher) SetLength(length int) { // Implements param.SectionWriter func (sw *AsyncHasher) SetWriter(_ param.SectionWriterFunc) param.SectionWriter { - log.Warn("Asynchasher does not currently support SectionWriter chaining") + sw.errFunc(errors.New("Asynchasher does not currently support SectionWriter chaining")) return sw } @@ -530,20 +533,22 @@ func (sw *AsyncHasher) Branches() int { return sw.seccount } +// SeekSection sets the cursor where the next Write() will write +// It locks the cursor until Write() is called; if no Write() is called, it will hang. +// Implements param.SectionWriter func (sw *AsyncHasher) SeekSection(offset int) { sw.mtx.Lock() sw.Hasher.SeekSection(offset) } // Write writes to the current position cursor of the Hasher -// The cursor must be manually set with SeekSection(). +// The cursor must first be manually set with SeekSection() // The method will NOT advance the cursor. -// // Implements hash.hash in param.SectionWriter func (sw *AsyncHasher) Write(section []byte) (int, error) { defer sw.mtx.Unlock() sw.Hasher.size += len(section) - return sw.writeSection(sw.Hasher.cursor, section) + c, err := sw.writeSection(sw.Hasher.cursor, section) } // Write writes the i-th section of the BMT base @@ -559,7 +564,8 @@ func (sw *AsyncHasher) writeSection(i int, section []byte) (int, error) { sw.all = true return len(section), nil } - //sw.mtx.Lock() + //sw.mtx.Lock() // this lock is now set in SeekSection + // defer sw.mtk.Unlock() // this unlock is still left in Write() t := sw.getTree() // cursor keeps track of the rightmost section written so far // if index is lower than cursor then just write non-final section as is @@ -609,6 +615,7 @@ func (sw *AsyncHasher) Sum(b []byte) (s []byte) { t := sw.getTree() length := sw.jobSize if length == 0 { + sw.releaseTree() sw.mtx.Unlock() s = sw.pool.zerohashes[sw.pool.Depth] return @@ -803,7 +810,6 @@ func calculateDepthFor(n int) (d int) { // creates a binary span size representation // to pass to bmt.SectionWriter -// TODO: move to bmt.SectionWriter, which is the object for which this is actually relevant func LengthToSpan(length int) []byte { spanBytes := make([]byte, 8) binary.LittleEndian.PutUint64(spanBytes, uint64(length)) diff --git a/param/io.go b/param/io.go index 6140d6319e..3eb14bce67 100644 --- a/param/io.go +++ b/param/io.go @@ -9,11 +9,11 @@ type SectionWriterFunc func(ctx context.Context) SectionWriter type SectionWriter interface { hash.Hash - SetWriter(hashFunc SectionWriterFunc) SectionWriter - SeekSection(section int) - Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination - SetLength(length int) - SetSpan(length int) - SectionSize() int // size of the async section unit to use - Branches() int + Init(ctx context.Context, errFunc func(error)) // errFunc is used for asynchronous components to signal error and termination + SetWriter(hashFunc SectionWriterFunc) SectionWriter // chain another SectionWriter the current instance + SeekSection(section int) // sets cursor that next Write() will write to + SetLength(length int) // set total number of bytes that will be written to SectionWriter + SetSpan(length int) // set data span of chunk + SectionSize() int // section size of this SectionWriter + Branches() int // branch factor of this SectionWriter } From 4d85352699b6b1ead7adaf0cead56086cd008eb3 Mon Sep 17 00:00:00 2001 From: nolash Date: Tue, 10 Dec 2019 16:29:21 +0100 Subject: [PATCH 67/67] file, bmt: Cleanup --- bmt/bmt.go | 2 +- file/encrypt/encrypt.go | 1 - file/split_test.go | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bmt/bmt.go b/bmt/bmt.go index cb0204c0c0..d20e2c1aa3 100644 --- a/bmt/bmt.go +++ b/bmt/bmt.go @@ -548,7 +548,7 @@ func (sw *AsyncHasher) SeekSection(offset int) { func (sw *AsyncHasher) Write(section []byte) (int, error) { defer sw.mtx.Unlock() sw.Hasher.size += len(section) - c, err := sw.writeSection(sw.Hasher.cursor, section) + return sw.writeSection(sw.Hasher.cursor, section) } // Write writes the i-th section of the BMT base diff --git a/file/encrypt/encrypt.go b/file/encrypt/encrypt.go index 22783359d6..4ee570afd1 100644 --- a/file/encrypt/encrypt.go +++ b/file/encrypt/encrypt.go @@ -68,7 +68,6 @@ func (e *Encrypt) Write(b []byte) (int, error) { } func (e *Encrypt) Reset() { - e.e.Reset() e.w.Reset() } diff --git a/file/split_test.go b/file/split_test.go index b7ffd043b3..ab1de3ba08 100644 --- a/file/split_test.go +++ b/file/split_test.go @@ -77,7 +77,6 @@ func TestSplitWithDataFileStore(t *testing.T) { if err != nil { t.Fatal(err) } - time.Sleep(time.Second) refHex := hexutil.Encode(ref) correctRefHex := "0xc10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef" if refHex != correctRefHex {