Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

file, testutil: Add reference file hasher #2099

Merged
merged 7 commits into from
Feb 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions file/hasher/common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package hasher

import (
"github.com/ethersphere/swarm/testutil"
)

const (
sectionSize = 32
branches = 128
chunkSize = 4096
)

var (
dataLengths = []int{31, // 0
32, // 1
33, // 2
63, // 3
64, // 4
65, // 5
chunkSize, // 6
chunkSize + 31, // 7
chunkSize + 32, // 8
chunkSize + 63, // 9
chunkSize + 64, // 10
chunkSize * 2, // 11
chunkSize*2 + 32, // 12
chunkSize * 128, // 13
chunkSize*128 + 31, // 14
chunkSize*128 + 32, // 15
chunkSize*128 + 64, // 16
chunkSize * 129, // 17
chunkSize * 130, // 18
chunkSize * 128 * 128, // 19
chunkSize*128*128 + 32, // 20
}
expected = []string{
"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0
"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1
"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2
"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3
"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4
"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5
"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6
"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7
"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8
"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9
"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10
"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11
"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12
"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13
"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14
"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15
"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16
"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17
"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18
"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19
"ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20
}

start = 0
end = len(dataLengths)
)

func init() {
testutil.Init()
}
2 changes: 1 addition & 1 deletion file/hasher/hasher.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU Lesser General Public License
// along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.

package file
package hasher

import (
"context"
Expand Down
2 changes: 1 addition & 1 deletion file/hasher/hasher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU Lesser General Public License
// along with the Swarm library. If not, see <http://www.gnu.org/licenses/>.

package file
package hasher

import (
"bytes"
Expand Down
56 changes: 56 additions & 0 deletions file/hasher/param.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package hasher

import (
"context"
"sync"

"github.com/ethersphere/swarm/file"
)

// defines the boundaries of the hashing job and also contains the hash factory function of the job
// setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start)
type treeParams struct {
SectionSize int
acud marked this conversation as resolved.
Show resolved Hide resolved
Branches int
ChunkSize int
Spans []int
Debug bool
hashFunc file.SectionWriterFunc
writerPool sync.Pool
ctx context.Context
}

func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams {

h := hashFunc(context.Background())
p := &treeParams{
SectionSize: h.SectionSize(),
Branches: h.Branches(),
ChunkSize: h.SectionSize() * h.Branches(),
hashFunc: hashFunc,
}
h.Reset()
p.writerPool.New = func() interface{} {
hf := p.hashFunc(p.ctx)
return hf
}
p.Spans = generateSpanSizes(p.Branches, 9)
return p
}

func (p *treeParams) SetContext(ctx context.Context) {
p.ctx = ctx
acud marked this conversation as resolved.
Show resolved Hide resolved
}

func (p *treeParams) GetContext() context.Context {
return p.ctx
}

func (p *treeParams) PutWriter(w file.SectionWriter) {
w.Reset()
p.writerPool.Put(w)
}

func (p *treeParams) GetWriter() file.SectionWriter {
return p.writerPool.Get().(file.SectionWriter)
}
145 changes: 145 additions & 0 deletions file/hasher/reference.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package hasher

import (
"github.com/ethersphere/swarm/file"
)

// ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm
type ReferenceHasher struct {
params *treeParams
cursors []int // section write position, indexed per level
length int // number of bytes written to the data level of the hasher
buffer []byte // keeps data and hashes, indexed by cursors
counts []int // number of sums performed, indexed per level
hasher file.SectionWriter // underlying hasher
}

// NewReferenceHasher constructs and returns a new ReferenceHasher
// This implementation is limited to a tree of 9 levels, where level 0 is the data level
// With 32 section size and 128 branches (i.e. unencrypted, non erasure-coded content) this means
// a capacity of 4096 bytes * (128^(9-1)) ~ 295.148 * (10^18) bytes
func NewReferenceHasher(params *treeParams) *ReferenceHasher {
// TODO: remove when bmt interface is amended
h := params.GetWriter()
return &ReferenceHasher{
params: params,
cursors: make([]int, 9),
counts: make([]int, 9),
buffer: make([]byte, params.ChunkSize*9),
nolash marked this conversation as resolved.
Show resolved Hide resolved
hasher: h,
}
}

// Hash computes and returns the root hash of arbitrary data
func (r *ReferenceHasher) Hash(data []byte) []byte {
l := r.params.ChunkSize
for i := 0; i < len(data); i += r.params.ChunkSize {
if len(data)-i < r.params.ChunkSize {
l = len(data) - i
}
r.update(0, data[i:i+l])
}

// if we didn't end on a chunk boundary we need to hash remaining chunks first
r.hashUnfinished()

// if the already hashed parts tree is balanced
r.moveDanglingChunk()

return r.digest()
}

// write to the data buffer on the specified level
// calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash
// adjusts cursors accordingly
func (r *ReferenceHasher) update(lvl int, data []byte) {
if lvl == 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be nice to write that level 0 is the data layer. especially when this reference hasher is another representation of a tree or trie, in which tree height is measured as the inverse (0 is the root)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed.

r.length += len(data)
}
copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data)
r.cursors[lvl] += len(data)
if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize {
ref := r.sum(lvl)
r.update(lvl+1, ref)
r.cursors[lvl] = r.cursors[lvl+1]
acud marked this conversation as resolved.
Show resolved Hide resolved
}
}

// calculates and returns the bmt sum of the last written data on the level
func (r *ReferenceHasher) sum(lvl int) []byte {
r.counts[lvl]++
spanSize := r.params.Spans[lvl] * r.params.ChunkSize
span := (r.length-1)%spanSize + 1
acud marked this conversation as resolved.
Show resolved Hide resolved

sizeToSum := r.cursors[lvl] - r.cursors[lvl+1]

r.hasher.Reset()
r.hasher.SetSpan(span)
r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+sizeToSum])
ref := r.hasher.Sum(nil)
return ref
}

// called after all data has been written
// sums the final chunks of each level
// skips intermediate levels that end on span boundary
func (r *ReferenceHasher) digest() []byte {

// the first section of the buffer will hold the root hash
return r.buffer[:r.params.SectionSize]
}

// hashes the remaining unhashed chunks at the end of each level
func (r *ReferenceHasher) hashUnfinished() {
if r.length%r.params.ChunkSize != 0 {
ref := r.sum(0)
acud marked this conversation as resolved.
Show resolved Hide resolved
copy(r.buffer[r.cursors[1]:], ref)
r.cursors[1] += len(ref)
r.cursors[0] = r.cursors[1]
}
}

// in case of a balanced tree this method concatenates the reference to the single reference
// at the highest level of the tree.
//
// Let F be full chunks (disregarding branching factor) and S be single references
// in the following scenario:
//
// S
// F F
// F F F
// F F F F S
//
// The result will be:
//
// SS
// F F
// F F F
// F F F F
//
// After which the SS will be hashed to obtain the final root hash
func (r *ReferenceHasher) moveDanglingChunk() {

// calculate the total number of levels needed to represent the data (including the data level)
targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches)

// sum every intermediate level and write to the level above it
for i := 1; i < targetLevel; i++ {
acud marked this conversation as resolved.
Show resolved Hide resolved

// and if there is a single reference outside a balanced tree on this level
// don't hash it again but pass it on to the next level
if r.counts[i] > 0 {
// TODO: simplify if possible
if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 {
acud marked this conversation as resolved.
Show resolved Hide resolved
r.cursors[i+1] = r.cursors[i]
r.cursors[i] = r.cursors[i-1]
continue
}
}

ref := r.sum(i)
copy(r.buffer[r.cursors[i+1]:], ref)
r.cursors[i+1] += len(ref)
r.cursors[i] = r.cursors[i+1]
}
}
Loading