From 9d7d511b1a32b1f108cc7fb3d292cb54c3e29d4e Mon Sep 17 00:00:00 2001 From: Evan Forbes <42654277+evan-forbes@users.noreply.github.com> Date: Wed, 22 Sep 2021 12:15:59 -0500 Subject: [PATCH] Refactor DAH creation to better accommodate celestia-node use case (#539) * Basic DA functionality (#83) * move Messages field to the end of Block.Data * Add some constants for share computation and the NMT: - also a bunch of todos regarding shares computation * First (compiling) stab on creating shares * Test with Evidence and fix bug discovered by test * remove resolved todos * introduce split method * Introduce LenDelimitedMarshaler interface and some reformatting * Introduce TxLenDelimitedMarshaler * add some test cases * fix some comments * fix some comments & linter * Add reserved namespaces to params * Move ll-specific consts into a separate file (consts.go) * Add MarshalDelimited to HexBytes * Add tail-padding shares * Add ComputeShares method on Data to compute all shares * Fix compute the next square num and not the next power of two * lints * Unexport MakeShares function: - it's likely to change and it doesn't have to be part of the public API * lints 2 * First stab on computing row/column roots * fix rebase glitches: - move DA related constants out of params.go * refactor MakeBlock to take in interm. state roots and messages * refactor state.MakeBlock too * Add todos LenDelimitedMarshaler and extract appendShares logic * Simplify shares computation: remove LenDelimitedMarshaler abstraction * actually use DA header to compute the DataRoot everywhere (will lead to failing tests for sure) * WIP: Update block related core data structures in protobuf too * WIP: fix zero shares edge-case and get rid of Block.Data.hash (use dataAvailabilityHeader.Hash() instead) * Fixed tests, only 3 failing tests to go: TestReapMaxBytesMaxGas, TestTxFilter, TestMempoolFilters * Fix TestTxFilter: - the size of the wrapping Data{} proto message increased a few bytes * Fix Message proto and `DataFromProto` * Fix last 2 remaining tests related to the increased block/block.Data size * Use infectious lib instead of leopard * proto-lint: snake_case * some lints and minor changes * linter * panic if pushing to tree fails, extend Data.ToProto() * revert renaming in comment * add todo about refactoring as soon as the rsmt2d allows the user to choose the merkle tree * clean up some unused test helper functions * linter * still debugging the exact right number of bytes for max data... * Implement spec-compliant share splitting (#246) * Export block data compute shares. * Refactor to use ShareSize constant directly. * Change message splitting to prefix namespace ID. * Implement chunking for contiguous. * Add termination condition. * Rename append contiguous to split contiguous. * Update test for small tx. * Add test for two contiguous. * Make tx and msg adjusted share sizes exported constants. * Panic on hopefully-unreachable condition instead of silently skipping. * Update hardcoded response for block format. Co-authored-by: Ismail Khoffi * fix overwrite bug (#251) * fix overwrite bug and stop splitting shares of size MsgShareSize * remove ineffectual code * review feedback: better docs Co-authored-by: Ismail Khoffi * remove uneeded copy and only fix the source of the bug Co-authored-by: Ismail Khoffi * fix overwrite bug while also being consistent with using NamespacedShares * update to the latest rsmt2d for the nmt wrapper Co-authored-by: Ismail Khoffi * Spec compliant merge shares (#261) * start spec compliant share merging * refactor and finish unit testing * whoops * linter gods * fix initial changes and use constants * use constant * more polish * docs fix* review feedback: docs and out of range panic protection * review feedback: add panic protection from empty input * use constant instead of recalculating `ShareSize`* don't redeclare existing var* be more explicit with returned nil* use constant instead of recalculating `ShareSize`* review feedback: use consistent capitalization * stop accepting reserved namespaces as normal messages * use a descriptive var name for message length * linter and comparison fix * reorg tests, add test for parse delimiter, DataFromBlock and fix evidence marshal bug * catch error for linter * update test MakeShares to include length delimiters for the SHARE_RESERVED_BYTE * minor iteration change * refactor share splitting to fix bug * fix all bugs with third and final refactor * fix conflict * revert unnecessary changes * review feedback: better docs* reivew feedback: add comment for safeLen * review feedback: remove unnecessay comments * review feedback: split up share merging and splitting into their own files * review feedback: more descriptive var names * fix accidental change * add some constant docs * spelling error Co-authored-by: Hlib Kanunnikov Co-authored-by: John Adler Co-authored-by: Ismail Khoffi * refactor to better accomodate real world use cases (celestia node) Co-authored-by: rene <41963722+renaynay@users.noreply.github.com> * thank you linter Co-authored-by: Ismail Khoffi Co-authored-by: John Adler Co-authored-by: Hlib Kanunnikov Co-authored-by: rene <41963722+renaynay@users.noreply.github.com> --- pkg/consts/consts.go | 3 ++ pkg/da/data_availability_header.go | 57 +++++++++++-------------- pkg/da/data_availability_header_test.go | 42 +++++++++++++----- pkg/wrapper/nmt_wrapper_test.go | 8 ++-- types/shares_test.go | 2 +- 5 files changed, 64 insertions(+), 48 deletions(-) diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go index c7d9025fb2..36d83231a1 100644 --- a/pkg/consts/consts.go +++ b/pkg/consts/consts.go @@ -63,5 +63,8 @@ var ( // NewBaseHashFunc change accordingly if another hash.Hash should be used as a base hasher in the NMT: NewBaseHashFunc = sha256.New + // DefaultCodec is the default codec creator used for data erasure + // TODO(ismail): for better efficiency and a larger number shares + // we should switch to the rsmt2d.LeopardFF16 codec: DefaultCodec = rsmt2d.NewRSGF8Codec ) diff --git a/pkg/da/data_availability_header.go b/pkg/da/data_availability_header.go index b9293f7204..61e1f3c0c3 100644 --- a/pkg/da/data_availability_header.go +++ b/pkg/da/data_availability_header.go @@ -14,8 +14,8 @@ import ( ) const ( - maxDAHSize = consts.MaxSquareSize * 2 - minDAHSize = consts.MinSquareSize * 2 + maxExtendedSquareWidth = consts.MaxSquareSize * 2 + minExtendedSquareWidth = consts.MinSquareSize * 2 ) // DataAvailabilityHeader (DAHeader) contains the row and column roots of the erasure @@ -38,10 +38,23 @@ type DataAvailabilityHeader struct { } // NewDataAvailabilityHeader generates a DataAvailability header using the provided square size and shares -func NewDataAvailabilityHeader(squareSize uint64, shares [][]byte) (DataAvailabilityHeader, error) { +func NewDataAvailabilityHeader(eds *rsmt2d.ExtendedDataSquare) DataAvailabilityHeader { + // generate the row and col roots using the EDS + dah := DataAvailabilityHeader{ + RowsRoots: eds.RowRoots(), + ColumnRoots: eds.ColRoots(), + } + + // generate the hash of the data using the new roots + dah.Hash() + + return dah +} + +func ExtendShares(squareSize uint64, shares [][]byte) (*rsmt2d.ExtendedDataSquare, error) { // Check that square size is with range if squareSize < consts.MinSquareSize || squareSize > consts.MaxSquareSize { - return DataAvailabilityHeader{}, fmt.Errorf( + return nil, fmt.Errorf( "invalid square size: min %d max %d provided %d", consts.MinSquareSize, consts.MaxSquareSize, @@ -50,32 +63,14 @@ func NewDataAvailabilityHeader(squareSize uint64, shares [][]byte) (DataAvailabi } // check that valid number of shares have been provided if squareSize*squareSize != uint64(len(shares)) { - return DataAvailabilityHeader{}, fmt.Errorf( + return nil, fmt.Errorf( "must provide valid number of shares for square size: got %d wanted %d", len(shares), squareSize*squareSize, ) } - tree := wrapper.NewErasuredNamespacedMerkleTree(squareSize) - - // TODO(ismail): for better efficiency and a larger number shares - // we should switch to the rsmt2d.LeopardFF16 codec: - extendedDataSquare, err := rsmt2d.ComputeExtendedDataSquare(shares, rsmt2d.NewRSGF8Codec(), tree.Constructor) - if err != nil { - return DataAvailabilityHeader{}, err - } - - // generate the row and col roots using the EDS - dah := DataAvailabilityHeader{ - RowsRoots: extendedDataSquare.RowRoots(), - ColumnRoots: extendedDataSquare.ColRoots(), - } - - // generate the hash of the data using the new roots - dah.Hash() - - return dah, nil + return rsmt2d.ComputeExtendedDataSquare(shares, consts.DefaultCodec(), tree.Constructor) } // String returns hex representation of merkle hash of the DAHeader. @@ -143,16 +138,16 @@ func (dah *DataAvailabilityHeader) ValidateBasic() error { if dah == nil { return errors.New("nil data availability header is not valid") } - if len(dah.ColumnRoots) < minDAHSize || len(dah.RowsRoots) < minDAHSize { + if len(dah.ColumnRoots) < minExtendedSquareWidth || len(dah.RowsRoots) < minExtendedSquareWidth { return fmt.Errorf( "minimum valid DataAvailabilityHeader has at least %d row and column roots", - minDAHSize, + minExtendedSquareWidth, ) } - if len(dah.ColumnRoots) > maxDAHSize || len(dah.RowsRoots) > maxDAHSize { + if len(dah.ColumnRoots) > maxExtendedSquareWidth || len(dah.RowsRoots) > maxExtendedSquareWidth { return fmt.Errorf( "maximum valid DataAvailabilityHeader has at most %d row and column roots", - maxDAHSize, + maxExtendedSquareWidth, ) } if len(dah.ColumnRoots) != len(dah.RowsRoots) { @@ -190,13 +185,11 @@ func MinDataAvailabilityHeader() DataAvailabilityHeader { for i := 0; i < consts.MinSharecount; i++ { shares[i] = tailPaddingShare } - dah, err := NewDataAvailabilityHeader( - consts.MinSquareSize, - shares, - ) + eds, err := ExtendShares(consts.MinSquareSize, shares) if err != nil { panic(err) } + dah := NewDataAvailabilityHeader(eds) return dah } diff --git a/pkg/da/data_availability_header_test.go b/pkg/da/data_availability_header_test.go index 3b16e5ac39..3e16f1019c 100644 --- a/pkg/da/data_availability_header_test.go +++ b/pkg/da/data_availability_header_test.go @@ -37,15 +37,13 @@ func TestNewDataAvailabilityHeader(t *testing.T) { type test struct { name string expectedHash []byte - expectedErr bool squareSize uint64 shares [][]byte } tests := []test{ { - name: "typical", - expectedErr: false, + name: "typical", expectedHash: []byte{ 0xfe, 0x9c, 0x6b, 0xd8, 0xe5, 0x7c, 0xd1, 0x5d, 0x1f, 0xd6, 0x55, 0x7e, 0x87, 0x7d, 0xd9, 0x7d, 0xdb, 0xf2, 0x66, 0xfa, 0x60, 0x24, 0x2d, 0xb3, 0xa0, 0x9c, 0x4f, 0x4e, 0x5b, 0x2a, 0x2c, 0x2a, @@ -54,8 +52,7 @@ func TestNewDataAvailabilityHeader(t *testing.T) { shares: generateShares(4, 1), }, { - name: "max square size", - expectedErr: false, + name: "max square size", expectedHash: []byte{ 0xe2, 0x87, 0x23, 0xd0, 0x2d, 0x54, 0x25, 0x5f, 0x79, 0x43, 0x8e, 0xfb, 0xb7, 0xe8, 0xfa, 0xf5, 0xbf, 0x93, 0x50, 0xb3, 0x64, 0xd0, 0x4f, 0xa7, 0x7b, 0xb1, 0x83, 0x3b, 0x8, 0xba, 0xd3, 0xa4, @@ -63,6 +60,28 @@ func TestNewDataAvailabilityHeader(t *testing.T) { squareSize: consts.MaxSquareSize, shares: generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 99), }, + } + + for _, tt := range tests { + tt := tt + eds, err := ExtendShares(tt.squareSize, tt.shares) + require.NoError(t, err) + resdah := NewDataAvailabilityHeader(eds) + require.Equal(t, tt.squareSize*2, uint64(len(resdah.ColumnRoots)), tt.name) + require.Equal(t, tt.squareSize*2, uint64(len(resdah.RowsRoots)), tt.name) + require.Equal(t, tt.expectedHash, resdah.hash, tt.name) + } +} + +func TestExtendShares(t *testing.T) { + type test struct { + name string + expectedErr bool + squareSize uint64 + shares [][]byte + } + + tests := []test{ { name: "too large square size", expectedErr: true, @@ -79,15 +98,13 @@ func TestNewDataAvailabilityHeader(t *testing.T) { for _, tt := range tests { tt := tt - resdah, err := NewDataAvailabilityHeader(tt.squareSize, tt.shares) + eds, err := ExtendShares(tt.squareSize, tt.shares) if tt.expectedErr { require.NotNil(t, err) continue } require.NoError(t, err) - require.Equal(t, tt.squareSize*2, uint64(len(resdah.ColumnRoots)), tt.name) - require.Equal(t, tt.squareSize*2, uint64(len(resdah.RowsRoots)), tt.name) - require.Equal(t, tt.expectedHash, resdah.hash, tt.name) + require.Equal(t, tt.squareSize*2, eds.Width(), tt.name) } } @@ -98,8 +115,9 @@ func TestDataAvailabilityHeaderProtoConversion(t *testing.T) { } shares := generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 1) - bigdah, err := NewDataAvailabilityHeader(consts.MaxSquareSize, shares) + eds, err := ExtendShares(consts.MaxSquareSize, shares) require.NoError(t, err) + bigdah := NewDataAvailabilityHeader(eds) tests := []test{ { @@ -133,8 +151,10 @@ func Test_DAHValidateBasic(t *testing.T) { } shares := generateShares(consts.MaxSquareSize*consts.MaxSquareSize, 1) - bigdah, err := NewDataAvailabilityHeader(consts.MaxSquareSize, shares) + eds, err := ExtendShares(consts.MaxSquareSize, shares) require.NoError(t, err) + bigdah := NewDataAvailabilityHeader(eds) + // make a mutant dah that has too many roots var tooBigDah DataAvailabilityHeader tooBigDah.ColumnRoots = make([][]byte, consts.MaxSquareSize*consts.MaxSquareSize) diff --git a/pkg/wrapper/nmt_wrapper_test.go b/pkg/wrapper/nmt_wrapper_test.go index 8bd4e83eb8..a1cd7580b1 100644 --- a/pkg/wrapper/nmt_wrapper_test.go +++ b/pkg/wrapper/nmt_wrapper_test.go @@ -27,7 +27,7 @@ func TestPushErasuredNamespacedMerkleTree(t *testing.T) { tree := n.Constructor() // push test data to the tree - for i, d := range generateErasuredData(t, tc.squareSize, rsmt2d.NewRSGF8Codec()) { + for i, d := range generateErasuredData(t, tc.squareSize, consts.DefaultCodec()) { // push will panic if there's an error tree.Push(d, rsmt2d.SquareIndex{Axis: uint(0), Cell: uint(i)}) } @@ -64,7 +64,7 @@ func TestErasureNamespacedMerkleTreePanics(t *testing.T) { "push over square size", assert.PanicTestFunc( func() { - data := generateErasuredData(t, 16, rsmt2d.NewRSGF8Codec()) + data := generateErasuredData(t, 16, consts.DefaultCodec()) n := NewErasuredNamespacedMerkleTree(uint64(15)) tree := n.Constructor() for i, d := range data { @@ -76,7 +76,7 @@ func TestErasureNamespacedMerkleTreePanics(t *testing.T) { "push in incorrect lexigraphic order", assert.PanicTestFunc( func() { - data := generateErasuredData(t, 16, rsmt2d.NewRSGF8Codec()) + data := generateErasuredData(t, 16, consts.DefaultCodec()) n := NewErasuredNamespacedMerkleTree(uint64(16)) tree := n.Constructor() for i := len(data) - 1; i > 0; i-- { @@ -104,7 +104,7 @@ func TestExtendedDataSquare(t *testing.T) { tree := NewErasuredNamespacedMerkleTree(uint64(squareSize)) - _, err := rsmt2d.ComputeExtendedDataSquare(raw, rsmt2d.NewRSGF8Codec(), tree.Constructor) + _, err := rsmt2d.ComputeExtendedDataSquare(raw, consts.DefaultCodec(), tree.Constructor) assert.NoError(t, err) } diff --git a/types/shares_test.go b/types/shares_test.go index ddf7c29b07..e5cd4abe3e 100644 --- a/types/shares_test.go +++ b/types/shares_test.go @@ -252,7 +252,7 @@ func TestDataFromSquare(t *testing.T) { shares, _ := data.ComputeShares() rawShares := shares.RawShares() - eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, rsmt2d.NewRSGF8Codec(), rsmt2d.NewDefaultTree) + eds, err := rsmt2d.ComputeExtendedDataSquare(rawShares, consts.DefaultCodec(), rsmt2d.NewDefaultTree) if err != nil { t.Error(err) }