From 38ae1c0a5ed19a6831b13a9d3805fd723e61ae5b Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Wed, 29 May 2024 22:12:18 -0400 Subject: [PATCH 01/15] search and replace with SchemaVersionID --- internal/core/crdt/composite.go | 4 ++-- internal/core/crdt/counter.go | 2 +- internal/core/crdt/lwwreg.go | 2 +- internal/core/key.go | 10 +++++----- tests/integration/query/commits/simple_test.go | 2 +- .../query/commits/with_field_test.go | 2 +- .../query/latest_commits/with_doc_id_test.go | 2 +- .../query/simple/with_version_test.go | 2 +- .../updates/add/field/create_update_test.go | 18 +++++++++--------- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/internal/core/crdt/composite.go b/internal/core/crdt/composite.go index a6b7299a60..58372cfb49 100644 --- a/internal/core/crdt/composite.go +++ b/internal/core/crdt/composite.go @@ -90,7 +90,7 @@ func (c CompositeDAG) Set(status client.DocumentStatus) *CompositeDAGDelta { return &CompositeDAGDelta{ DocID: []byte(c.key.DocID), FieldName: c.fieldName, - SchemaVersionID: c.schemaVersionKey.SchemaVersionId, + SchemaVersionID: c.schemaVersionKey.SchemaVersionID, Status: status, } } @@ -130,7 +130,7 @@ func (c CompositeDAG) Merge(ctx context.Context, delta core.Delta) error { // been migrated yet locally. schemaVersionId = dagDelta.SchemaVersionID } else { - schemaVersionId = c.schemaVersionKey.SchemaVersionId + schemaVersionId = c.schemaVersionKey.SchemaVersionID } err = c.store.Put(ctx, versionKey.ToDS(), []byte(schemaVersionId)) diff --git a/internal/core/crdt/counter.go b/internal/core/crdt/counter.go index 6c4e002223..c87c7d6da6 100644 --- a/internal/core/crdt/counter.go +++ b/internal/core/crdt/counter.go @@ -141,7 +141,7 @@ func (c Counter[T]) Increment(ctx context.Context, value T) (*CounterDelta[T], e DocID: []byte(c.key.DocID), FieldName: c.fieldName, Data: value, - SchemaVersionID: c.schemaVersionKey.SchemaVersionId, + SchemaVersionID: c.schemaVersionKey.SchemaVersionID, Nonce: nonce, }, nil } diff --git a/internal/core/crdt/lwwreg.go b/internal/core/crdt/lwwreg.go index 0df8187dae..edfff9ca05 100644 --- a/internal/core/crdt/lwwreg.go +++ b/internal/core/crdt/lwwreg.go @@ -97,7 +97,7 @@ func (reg LWWRegister) Set(value []byte) *LWWRegDelta { Data: value, DocID: []byte(reg.key.DocID), FieldName: reg.fieldName, - SchemaVersionID: reg.schemaVersionKey.SchemaVersionId, + SchemaVersionID: reg.schemaVersionKey.SchemaVersionID, } } diff --git a/internal/core/key.go b/internal/core/key.go index 69b19efb6e..d087c43af8 100644 --- a/internal/core/key.go +++ b/internal/core/key.go @@ -136,7 +136,7 @@ var _ Key = (*CollectionNameKey)(nil) // // This key should be removed in https://github.com/sourcenetwork/defradb/issues/1085 type CollectionSchemaVersionKey struct { - SchemaVersionId string + SchemaVersionID string CollectionID uint32 } @@ -296,7 +296,7 @@ func NewCollectionNameKey(name string) CollectionNameKey { func NewCollectionSchemaVersionKey(schemaVersionId string, collectionID uint32) CollectionSchemaVersionKey { return CollectionSchemaVersionKey{ - SchemaVersionId: schemaVersionId, + SchemaVersionID: schemaVersionId, CollectionID: collectionID, } } @@ -309,7 +309,7 @@ func NewCollectionSchemaVersionKeyFromString(key string) (CollectionSchemaVersio } return CollectionSchemaVersionKey{ - SchemaVersionId: elements[len(elements)-2], + SchemaVersionID: elements[len(elements)-2], CollectionID: uint32(colID), }, nil } @@ -591,8 +591,8 @@ func (k CollectionNameKey) ToDS() ds.Key { func (k CollectionSchemaVersionKey) ToString() string { result := COLLECTION_SCHEMA_VERSION - if k.SchemaVersionId != "" { - result = result + "/" + k.SchemaVersionId + if k.SchemaVersionID != "" { + result = result + "/" + k.SchemaVersionID } if k.CollectionID != 0 { diff --git a/tests/integration/query/commits/simple_test.go b/tests/integration/query/commits/simple_test.go index 13f8307840..4b2d037d61 100644 --- a/tests/integration/query/commits/simple_test.go +++ b/tests/integration/query/commits/simple_test.go @@ -104,7 +104,7 @@ func TestQueryCommitsMultipleDocs(t *testing.T) { testUtils.ExecuteTestCase(t, test) } -func TestQueryCommitsWithSchemaVersionIdField(t *testing.T) { +func TestQueryCommitsWithSchemaVersionIDField(t *testing.T) { test := testUtils.TestCase{ Description: "Simple commits query yielding schemaVersionId", Actions: []any{ diff --git a/tests/integration/query/commits/with_field_test.go b/tests/integration/query/commits/with_field_test.go index fa1886304b..6d4922d9b8 100644 --- a/tests/integration/query/commits/with_field_test.go +++ b/tests/integration/query/commits/with_field_test.go @@ -110,7 +110,7 @@ func TestQueryCommitsWithCompositeFieldId(t *testing.T) { // This test is for documentation reasons only. This is not // desired behaviour (Users should not be specifying field ids). -func TestQueryCommitsWithCompositeFieldIdWithReturnedSchemaVersionId(t *testing.T) { +func TestQueryCommitsWithCompositeFieldIdWithReturnedSchemaVersionID(t *testing.T) { test := testUtils.TestCase{ Description: "Simple all commits query with docID and field id", Actions: []any{ diff --git a/tests/integration/query/latest_commits/with_doc_id_test.go b/tests/integration/query/latest_commits/with_doc_id_test.go index 726c009cf7..290dea175d 100644 --- a/tests/integration/query/latest_commits/with_doc_id_test.go +++ b/tests/integration/query/latest_commits/with_doc_id_test.go @@ -56,7 +56,7 @@ func TestQueryLatestCommitsWithDocID(t *testing.T) { executeTestCase(t, test) } -func TestQueryLatestCommitsWithDocIDWithSchemaVersionIdField(t *testing.T) { +func TestQueryLatestCommitsWithDocIDWithSchemaVersionIDField(t *testing.T) { test := testUtils.RequestTestCase{ Description: "Simple latest commits query with docID and schema versiion id field", Request: `query { diff --git a/tests/integration/query/simple/with_version_test.go b/tests/integration/query/simple/with_version_test.go index 5baf65a0ae..ea7ac76a2b 100644 --- a/tests/integration/query/simple/with_version_test.go +++ b/tests/integration/query/simple/with_version_test.go @@ -66,7 +66,7 @@ func TestQuerySimpleWithEmbeddedLatestCommit(t *testing.T) { executeTestCase(t, test) } -func TestQuerySimpleWithEmbeddedLatestCommitWithSchemaVersionId(t *testing.T) { +func TestQuerySimpleWithEmbeddedLatestCommitWithSchemaVersionID(t *testing.T) { test := testUtils.RequestTestCase{ Description: "Embedded commits query within object query with schema version id", Request: `query { diff --git a/tests/integration/schema/updates/add/field/create_update_test.go b/tests/integration/schema/updates/add/field/create_update_test.go index d299b70e7f..cd3a0b1267 100644 --- a/tests/integration/schema/updates/add/field/create_update_test.go +++ b/tests/integration/schema/updates/add/field/create_update_test.go @@ -17,8 +17,8 @@ import ( ) func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndVersionJoin(t *testing.T) { - initialSchemaVersionId := "bafkreia3o3cetvcnnxyu5spucimoos77ifungfmacxdkva4zah2is3aooe" - updatedSchemaVersionId := "bafkreibz4g6rkxanzn6ro74ezmbwoe5hvcguwvi34judrk2kfuqqtk5ak4" + initialSchemaVersionID := "bafkreia3o3cetvcnnxyu5spucimoos77ifungfmacxdkva4zah2is3aooe" + updatedSchemaVersionID := "bafkreibz4g6rkxanzn6ro74ezmbwoe5hvcguwvi34judrk2kfuqqtk5ak4" test := testUtils.TestCase{ Description: "Test schema update, add field with update after schema update, version join", @@ -52,7 +52,7 @@ func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndVersionJoi "name": "John", "_version": []map[string]any{ { - "schemaVersionId": initialSchemaVersionId, + "schemaVersionId": initialSchemaVersionID, }, }, }, @@ -89,11 +89,11 @@ func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndVersionJoi "_version": []map[string]any{ { // Update commit - "schemaVersionId": updatedSchemaVersionId, + "schemaVersionId": updatedSchemaVersionID, }, { // Create commit - "schemaVersionId": initialSchemaVersionId, + "schemaVersionId": initialSchemaVersionID, }, }, }, @@ -105,8 +105,8 @@ func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndVersionJoi } func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndCommitQuery(t *testing.T) { - initialSchemaVersionId := "bafkreia3o3cetvcnnxyu5spucimoos77ifungfmacxdkva4zah2is3aooe" - updatedSchemaVersionId := "bafkreibz4g6rkxanzn6ro74ezmbwoe5hvcguwvi34judrk2kfuqqtk5ak4" + initialSchemaVersionID := "bafkreia3o3cetvcnnxyu5spucimoos77ifungfmacxdkva4zah2is3aooe" + updatedSchemaVersionID := "bafkreibz4g6rkxanzn6ro74ezmbwoe5hvcguwvi34judrk2kfuqqtk5ak4" test := testUtils.TestCase{ Description: "Test schema update, add field with update after schema update, commits query", @@ -147,11 +147,11 @@ func TestSchemaUpdatesAddFieldWithCreateWithUpdateAfterSchemaUpdateAndCommitQuer Results: []map[string]any{ { // Update commit - "schemaVersionId": updatedSchemaVersionId, + "schemaVersionId": updatedSchemaVersionID, }, { // Create commit - "schemaVersionId": initialSchemaVersionId, + "schemaVersionId": initialSchemaVersionID, }, }, }, From 9e68eba52208b886004e6ebb6e51a758777a993b Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 30 May 2024 12:38:53 -0400 Subject: [PATCH 02/15] remove net order tests --- .../peer/with_create_update_test.go | 0 .../replicator/with_create_test.go | 0 tests/integration/net/order/tcp_test.go | 170 -------- tests/integration/net/order/utils.go | 382 ------------------ .../simple/peer/crdt/pcounter_test.go | 0 .../simple/peer/crdt/pncounter_test.go | 0 .../subscribe/with_add_get_remove_test.go | 0 .../peer/subscribe/with_add_get_test.go | 0 .../peer/subscribe/with_add_remove_test.go | 0 .../simple/peer/subscribe/with_add_test.go | 0 .../simple/peer/subscribe/with_get_test.go | 0 .../simple/peer/with_create_add_field_test.go | 0 .../simple/peer/with_create_test.go | 0 .../simple/peer/with_delete_test.go | 0 .../simple/peer/with_update_add_field_test.go | 0 .../simple/peer/with_update_restart_test.go | 0 .../simple/peer/with_update_test.go | 0 .../peer_replicator/crdt/pcounter_test.go | 0 .../peer_replicator/crdt/pncounter_test.go | 0 .../peer_replicator/with_create_test.go | 0 .../peer_replicator/with_delete_test.go | 0 .../with_update_restart_test.go | 0 .../peer_replicator/with_update_test.go | 0 .../simple/replicator/crdt/pcounter_test.go | 0 .../simple/replicator/crdt/pncounter_test.go | 0 .../replicator/with_create_add_field_test.go | 0 .../replicator/with_create_restart_test.go | 0 .../simple/replicator/with_create_test.go | 0 .../replicator/with_create_update_test.go | 0 .../simple/replicator/with_delete_test.go | 0 .../replicator/with_update_add_field_test.go | 0 .../simple/replicator/with_update_test.go | 0 32 files changed, 552 deletions(-) rename tests/integration/net/{state => }/one_to_many/peer/with_create_update_test.go (100%) rename tests/integration/net/{state => }/one_to_many/replicator/with_create_test.go (100%) delete mode 100644 tests/integration/net/order/tcp_test.go delete mode 100644 tests/integration/net/order/utils.go rename tests/integration/net/{state => }/simple/peer/crdt/pcounter_test.go (100%) rename tests/integration/net/{state => }/simple/peer/crdt/pncounter_test.go (100%) rename tests/integration/net/{state => }/simple/peer/subscribe/with_add_get_remove_test.go (100%) rename tests/integration/net/{state => }/simple/peer/subscribe/with_add_get_test.go (100%) rename tests/integration/net/{state => }/simple/peer/subscribe/with_add_remove_test.go (100%) rename tests/integration/net/{state => }/simple/peer/subscribe/with_add_test.go (100%) rename tests/integration/net/{state => }/simple/peer/subscribe/with_get_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_create_add_field_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_create_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_delete_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_update_add_field_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_update_restart_test.go (100%) rename tests/integration/net/{state => }/simple/peer/with_update_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/crdt/pcounter_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/crdt/pncounter_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/with_create_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/with_delete_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/with_update_restart_test.go (100%) rename tests/integration/net/{state => }/simple/peer_replicator/with_update_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/crdt/pcounter_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/crdt/pncounter_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_create_add_field_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_create_restart_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_create_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_create_update_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_delete_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_update_add_field_test.go (100%) rename tests/integration/net/{state => }/simple/replicator/with_update_test.go (100%) diff --git a/tests/integration/net/state/one_to_many/peer/with_create_update_test.go b/tests/integration/net/one_to_many/peer/with_create_update_test.go similarity index 100% rename from tests/integration/net/state/one_to_many/peer/with_create_update_test.go rename to tests/integration/net/one_to_many/peer/with_create_update_test.go diff --git a/tests/integration/net/state/one_to_many/replicator/with_create_test.go b/tests/integration/net/one_to_many/replicator/with_create_test.go similarity index 100% rename from tests/integration/net/state/one_to_many/replicator/with_create_test.go rename to tests/integration/net/one_to_many/replicator/with_create_test.go diff --git a/tests/integration/net/order/tcp_test.go b/tests/integration/net/order/tcp_test.go deleted file mode 100644 index ef18668d20..0000000000 --- a/tests/integration/net/order/tcp_test.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2022 Democratized Data Foundation -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package order - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/sourcenetwork/defradb/client" - "github.com/sourcenetwork/defradb/net" - testUtils "github.com/sourcenetwork/defradb/tests/integration" -) - -// TestP2PWithSingleDocumentUpdatePerNode tests document syncing between two nodes with a single update per node -func TestP2PWithSingleDocumentUpdatePerNode(t *testing.T) { - test := P2PTestCase{ - NodeConfig: [][]net.NodeOpt{ - testUtils.RandomNetworkingConfig()(), - testUtils.RandomNetworkingConfig()(), - }, - NodePeers: map[int][]int{ - 1: { - 0, - }, - }, - SeedDocuments: []string{ - `{ - "Name": "John", - "Age": 21 - }`, - }, - Updates: map[int]map[int][]string{ - 1: { - 0: { - `{ - "Age": 45 - }`, - }, - }, - 0: { - 0: { - `{ - "Age": 60 - }`, - }, - }, - }, - Results: map[int]map[int]map[string]any{ - 0: { - 0: { - "Age": int64(45), - }, - }, - 1: { - 0: { - "Age": int64(60), - }, - }, - }, - } - - executeTestCase(t, test) -} - -// TestP2PWithMultipleDocumentUpdatesPerNode tests document syncing between two nodes with multiple updates per node. -func TestP2PWithMultipleDocumentUpdatesPerNode(t *testing.T) { - test := P2PTestCase{ - NodeConfig: [][]net.NodeOpt{ - testUtils.RandomNetworkingConfig()(), - testUtils.RandomNetworkingConfig()(), - }, - NodePeers: map[int][]int{ - 1: { - 0, - }, - }, - SeedDocuments: []string{ - `{ - "Name": "John", - "Age": 21 - }`, - }, - Updates: map[int]map[int][]string{ - 0: { - 0: { - `{ - "Age": 60 - }`, - `{ - "Age": 61 - }`, - `{ - "Age": 62 - }`, - }, - }, - 1: { - 0: { - `{ - "Age": 45 - }`, - `{ - "Age": 46 - }`, - `{ - "Age": 47 - }`, - }, - }, - }, - Results: map[int]map[int]map[string]any{ - 0: { - 0: { - "Age": int64(47), - }, - }, - 1: { - 0: { - "Age": int64(62), - }, - }, - }, - } - - executeTestCase(t, test) -} - -// TestP2FullPReplicator tests document syncing between a node and a replicator. -func TestP2FullPReplicator(t *testing.T) { - colDefMap, err := testUtils.ParseSDL(userCollectionGQLSchema) - require.NoError(t, err) - doc, err := client.NewDocFromJSON([]byte(`{ - "Name": "John", - "Age": 21 - }`), colDefMap[userCollection]) - require.NoError(t, err) - - test := P2PTestCase{ - NodeConfig: [][]net.NodeOpt{ - testUtils.RandomNetworkingConfig()(), - testUtils.RandomNetworkingConfig()(), - }, - NodeReplicators: map[int][]int{ - 0: { - 1, - }, - }, - DocumentsToReplicate: []*client.Document{ - doc, - }, - ReplicatorResult: map[int]map[string]map[string]any{ - 1: { - doc.ID().String(): { - "Age": int64(21), - }, - }, - }, - } - - executeTestCase(t, test) -} diff --git a/tests/integration/net/order/utils.go b/tests/integration/net/order/utils.go deleted file mode 100644 index c7075dae22..0000000000 --- a/tests/integration/net/order/utils.go +++ /dev/null @@ -1,382 +0,0 @@ -// Copyright 2022 Democratized Data Foundation -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package order - -import ( - "context" - "fmt" - "testing" - - "github.com/sourcenetwork/corelog" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/sourcenetwork/defradb/client" - "github.com/sourcenetwork/defradb/errors" - "github.com/sourcenetwork/defradb/net" - netutils "github.com/sourcenetwork/defradb/net/utils" - testutils "github.com/sourcenetwork/defradb/tests/integration" -) - -var ( - log = corelog.NewLogger("test.net") -) - -const ( - userCollectionGQLSchema = ` - type Users { - Name: String - Email: String - Age: Int - Height: Float - Verified: Boolean - } - ` - - userCollection = "Users" -) - -type P2PTestCase struct { - Query string - - // The identity for all requests. - // TODO-ACP: https://github.com/sourcenetwork/defradb/issues/2366 - Improve in ACP <> P2P implementation - Identity string - - // Configuration parameters for each peer - NodeConfig [][]net.NodeOpt - - // List of peers for each net. - // Only peers with lower index than the node can be used in the list of peers. - NodePeers map[int][]int - - // List of replicators for each net. - // Only peers with lower index than the node can be used in the list of peers. - NodeReplicators map[int][]int - - SeedDocuments []string - DocumentsToReplicate []*client.Document - - // node/docID/values - Updates map[int]map[int][]string - Results map[int]map[int]map[string]any - ReplicatorResult map[int]map[string]map[string]any -} - -func setupDefraNode( - t *testing.T, - opts []net.NodeOpt, - peers []string, - seeds []string, -) (*net.Node, []client.DocID, error) { - ctx := context.Background() - - log.InfoContext(ctx, "Building new memory store") - db, err := testutils.NewBadgerMemoryDB(ctx) - if err != nil { - return nil, nil, err - } - - if err := seedSchema(ctx, db); err != nil { - return nil, nil, err - } - - // seed the database with a set of documents - docIDs := []client.DocID{} - for _, document := range seeds { - docID, err := seedDocument(ctx, db, document) - require.NoError(t, err) - docIDs = append(docIDs, docID) - } - - // init the P2P node - var n *net.Node - n, err = net.NewNode(ctx, db, opts...) - if err != nil { - return nil, nil, errors.Wrap("failed to start P2P node", err) - } - - // parse peers and bootstrap - if len(peers) != 0 { - log.InfoContext(ctx, "Parsing bootstrap peers", corelog.Any("Peers", peers)) - addrs, err := netutils.ParsePeers(peers) - if err != nil { - return nil, nil, errors.Wrap(fmt.Sprintf("failed to parse bootstrap peers %v", peers), err) - } - log.InfoContext(ctx, "Bootstrapping with peers", corelog.Any("Addresses", addrs)) - n.Bootstrap(addrs) - } - - log.InfoContext(ctx, "Starting P2P node", corelog.Any("P2P addresses", n.PeerInfo().Addrs)) - if err := n.Start(); err != nil { - n.Close() - return nil, nil, errors.Wrap("unable to start P2P listeners", err) - } - - return n, docIDs, nil -} - -func seedSchema(ctx context.Context, db client.DB) error { - _, err := db.AddSchema(ctx, userCollectionGQLSchema) - return err -} - -func seedDocument( - ctx context.Context, - db client.DB, - document string, -) (client.DocID, error) { - col, err := db.GetCollectionByName(ctx, userCollection) - if err != nil { - return client.DocID{}, err - } - - doc, err := client.NewDocFromJSON([]byte(document), col.Definition()) - if err != nil { - return client.DocID{}, err - } - - err = col.Save(ctx, doc) - if err != nil { - return client.DocID{}, err - } - - return doc.ID(), nil -} - -func saveDocument( - ctx context.Context, - db client.DB, - document *client.Document, -) error { - col, err := db.GetCollectionByName(ctx, userCollection) - if err != nil { - return err - } - - return col.Save(ctx, document) -} - -func updateDocument( - ctx context.Context, - db client.DB, - docID client.DocID, - update string, -) error { - col, err := db.GetCollectionByName(ctx, userCollection) - if err != nil { - return err - } - - doc, err := getDocument(ctx, db, docID) - if err != nil { - return err - } - - if err := doc.SetWithJSON([]byte(update)); err != nil { - return err - } - - return col.Save(ctx, doc) -} - -func getDocument( - ctx context.Context, - db client.DB, - docID client.DocID, -) (*client.Document, error) { - col, err := db.GetCollectionByName(ctx, userCollection) - if err != nil { - return nil, err - } - - doc, err := col.Get(ctx, docID, false) - if err != nil { - return nil, err - } - return doc, err -} - -func executeTestCase(t *testing.T, test P2PTestCase) { - ctx := context.Background() - - docIDs := []client.DocID{} - nodes := []*net.Node{} - - for i, cfg := range test.NodeConfig { - log.InfoContext(ctx, fmt.Sprintf("Setting up node %d", i)) - var peerAddresses []string - if peers, ok := test.NodePeers[i]; ok { - for _, p := range peers { - if p >= len(nodes) { - log.InfoContext(ctx, "cannot set a peer that hasn't been started. Skipping to next peer") - continue - } - peerInfo := nodes[p].PeerInfo() - peerAddresses = append( - peerAddresses, - fmt.Sprintf("%s/p2p/%s", peerInfo.Addrs[0], peerInfo.ID), - ) - } - } - n, d, err := setupDefraNode( - t, - cfg, - peerAddresses, - test.SeedDocuments, - ) - require.NoError(t, err) - - if i == 0 { - docIDs = d - } - nodes = append(nodes, n) - } - - ////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////// - // PubSub related test logic - - // wait for peers to connect to each other - if len(test.NodePeers) > 0 { - for i, n := range nodes { - for j, p := range nodes { - if i == j { - continue - } - log.InfoContext(ctx, fmt.Sprintf("Waiting for node %d to connect with peer %d", i, j)) - err := n.WaitForPubSubEvent(p.PeerID()) - require.NoError(t, err) - log.InfoContext(ctx, fmt.Sprintf("Node %d connected to peer %d", i, j)) - } - } - } - - // update and sync peers - for n, updateMap := range test.Updates { - if n >= len(nodes) { - log.InfoContext(ctx, "cannot update a node that hasn't been started. Skipping to next node") - continue - } - - for d, updates := range updateMap { - for _, update := range updates { - log.InfoContext(ctx, fmt.Sprintf("Updating node %d with update %d", n, d)) - err := updateDocument( - ctx, - nodes[n].DB, - docIDs[d], - update, - ) - require.NoError(t, err) - - // wait for peers to sync - for n2, p := range nodes { - if n2 == n { - continue - } - log.InfoContext(ctx, fmt.Sprintf("Waiting for node %d to sync with peer %d", n2, n)) - err := p.WaitForPushLogByPeerEvent(nodes[n].PeerInfo().ID) - require.NoError(t, err) - log.InfoContext(ctx, fmt.Sprintf("Node %d synced", n2)) - } - } - } - - // check that peers actually received the update - for n2, resultsMap := range test.Results { - if n2 == n { - continue - } - if n2 >= len(nodes) { - log.InfoContext(ctx, "cannot check results of a node that hasn't been started. Skipping to next node") - continue - } - - for d, results := range resultsMap { - for field, result := range results { - doc, err := getDocument( - ctx, - nodes[n2].DB, - docIDs[d], - ) - require.NoError(t, err) - - val, err := doc.Get(field) - require.NoError(t, err) - - assert.Equal(t, result, val) - } - } - } - } - - ////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////// - // Replicator related test logic - - if len(test.NodeReplicators) > 0 { - for i, n := range nodes { - if reps, ok := test.NodeReplicators[i]; ok { - for _, r := range reps { - err := n.Peer.SetReplicator(ctx, client.Replicator{ - Info: nodes[r].PeerInfo(), - }) - require.NoError(t, err) - } - } - } - } - - if len(test.DocumentsToReplicate) > 0 { - for n, reps := range test.NodeReplicators { - for _, doc := range test.DocumentsToReplicate { - err := saveDocument( - ctx, - nodes[n].DB, - doc, - ) - require.NoError(t, err) - } - for _, rep := range reps { - log.InfoContext(ctx, fmt.Sprintf("Waiting for node %d to sync with peer %d", rep, n)) - err := nodes[rep].WaitForPushLogByPeerEvent(nodes[n].PeerID()) - require.NoError(t, err) - log.InfoContext(ctx, fmt.Sprintf("Node %d synced", rep)) - - for docID, results := range test.ReplicatorResult[rep] { - for field, result := range results { - d, err := client.NewDocIDFromString(docID) - require.NoError(t, err) - - doc, err := getDocument( - ctx, - nodes[rep].DB, - d, - ) - require.NoError(t, err) - - val, err := doc.Get(field) - require.NoError(t, err) - - assert.Equal(t, result, val) - } - } - } - } - } - - // clean up - for _, n := range nodes { - n.Close() - n.DB.Close() - } -} diff --git a/tests/integration/net/state/simple/peer/crdt/pcounter_test.go b/tests/integration/net/simple/peer/crdt/pcounter_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/crdt/pcounter_test.go rename to tests/integration/net/simple/peer/crdt/pcounter_test.go diff --git a/tests/integration/net/state/simple/peer/crdt/pncounter_test.go b/tests/integration/net/simple/peer/crdt/pncounter_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/crdt/pncounter_test.go rename to tests/integration/net/simple/peer/crdt/pncounter_test.go diff --git a/tests/integration/net/state/simple/peer/subscribe/with_add_get_remove_test.go b/tests/integration/net/simple/peer/subscribe/with_add_get_remove_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/subscribe/with_add_get_remove_test.go rename to tests/integration/net/simple/peer/subscribe/with_add_get_remove_test.go diff --git a/tests/integration/net/state/simple/peer/subscribe/with_add_get_test.go b/tests/integration/net/simple/peer/subscribe/with_add_get_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/subscribe/with_add_get_test.go rename to tests/integration/net/simple/peer/subscribe/with_add_get_test.go diff --git a/tests/integration/net/state/simple/peer/subscribe/with_add_remove_test.go b/tests/integration/net/simple/peer/subscribe/with_add_remove_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/subscribe/with_add_remove_test.go rename to tests/integration/net/simple/peer/subscribe/with_add_remove_test.go diff --git a/tests/integration/net/state/simple/peer/subscribe/with_add_test.go b/tests/integration/net/simple/peer/subscribe/with_add_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/subscribe/with_add_test.go rename to tests/integration/net/simple/peer/subscribe/with_add_test.go diff --git a/tests/integration/net/state/simple/peer/subscribe/with_get_test.go b/tests/integration/net/simple/peer/subscribe/with_get_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/subscribe/with_get_test.go rename to tests/integration/net/simple/peer/subscribe/with_get_test.go diff --git a/tests/integration/net/state/simple/peer/with_create_add_field_test.go b/tests/integration/net/simple/peer/with_create_add_field_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_create_add_field_test.go rename to tests/integration/net/simple/peer/with_create_add_field_test.go diff --git a/tests/integration/net/state/simple/peer/with_create_test.go b/tests/integration/net/simple/peer/with_create_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_create_test.go rename to tests/integration/net/simple/peer/with_create_test.go diff --git a/tests/integration/net/state/simple/peer/with_delete_test.go b/tests/integration/net/simple/peer/with_delete_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_delete_test.go rename to tests/integration/net/simple/peer/with_delete_test.go diff --git a/tests/integration/net/state/simple/peer/with_update_add_field_test.go b/tests/integration/net/simple/peer/with_update_add_field_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_update_add_field_test.go rename to tests/integration/net/simple/peer/with_update_add_field_test.go diff --git a/tests/integration/net/state/simple/peer/with_update_restart_test.go b/tests/integration/net/simple/peer/with_update_restart_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_update_restart_test.go rename to tests/integration/net/simple/peer/with_update_restart_test.go diff --git a/tests/integration/net/state/simple/peer/with_update_test.go b/tests/integration/net/simple/peer/with_update_test.go similarity index 100% rename from tests/integration/net/state/simple/peer/with_update_test.go rename to tests/integration/net/simple/peer/with_update_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/crdt/pcounter_test.go b/tests/integration/net/simple/peer_replicator/crdt/pcounter_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/crdt/pcounter_test.go rename to tests/integration/net/simple/peer_replicator/crdt/pcounter_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/crdt/pncounter_test.go b/tests/integration/net/simple/peer_replicator/crdt/pncounter_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/crdt/pncounter_test.go rename to tests/integration/net/simple/peer_replicator/crdt/pncounter_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/with_create_test.go b/tests/integration/net/simple/peer_replicator/with_create_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/with_create_test.go rename to tests/integration/net/simple/peer_replicator/with_create_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/with_delete_test.go b/tests/integration/net/simple/peer_replicator/with_delete_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/with_delete_test.go rename to tests/integration/net/simple/peer_replicator/with_delete_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/with_update_restart_test.go b/tests/integration/net/simple/peer_replicator/with_update_restart_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/with_update_restart_test.go rename to tests/integration/net/simple/peer_replicator/with_update_restart_test.go diff --git a/tests/integration/net/state/simple/peer_replicator/with_update_test.go b/tests/integration/net/simple/peer_replicator/with_update_test.go similarity index 100% rename from tests/integration/net/state/simple/peer_replicator/with_update_test.go rename to tests/integration/net/simple/peer_replicator/with_update_test.go diff --git a/tests/integration/net/state/simple/replicator/crdt/pcounter_test.go b/tests/integration/net/simple/replicator/crdt/pcounter_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/crdt/pcounter_test.go rename to tests/integration/net/simple/replicator/crdt/pcounter_test.go diff --git a/tests/integration/net/state/simple/replicator/crdt/pncounter_test.go b/tests/integration/net/simple/replicator/crdt/pncounter_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/crdt/pncounter_test.go rename to tests/integration/net/simple/replicator/crdt/pncounter_test.go diff --git a/tests/integration/net/state/simple/replicator/with_create_add_field_test.go b/tests/integration/net/simple/replicator/with_create_add_field_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_create_add_field_test.go rename to tests/integration/net/simple/replicator/with_create_add_field_test.go diff --git a/tests/integration/net/state/simple/replicator/with_create_restart_test.go b/tests/integration/net/simple/replicator/with_create_restart_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_create_restart_test.go rename to tests/integration/net/simple/replicator/with_create_restart_test.go diff --git a/tests/integration/net/state/simple/replicator/with_create_test.go b/tests/integration/net/simple/replicator/with_create_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_create_test.go rename to tests/integration/net/simple/replicator/with_create_test.go diff --git a/tests/integration/net/state/simple/replicator/with_create_update_test.go b/tests/integration/net/simple/replicator/with_create_update_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_create_update_test.go rename to tests/integration/net/simple/replicator/with_create_update_test.go diff --git a/tests/integration/net/state/simple/replicator/with_delete_test.go b/tests/integration/net/simple/replicator/with_delete_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_delete_test.go rename to tests/integration/net/simple/replicator/with_delete_test.go diff --git a/tests/integration/net/state/simple/replicator/with_update_add_field_test.go b/tests/integration/net/simple/replicator/with_update_add_field_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_update_add_field_test.go rename to tests/integration/net/simple/replicator/with_update_add_field_test.go diff --git a/tests/integration/net/state/simple/replicator/with_update_test.go b/tests/integration/net/simple/replicator/with_update_test.go similarity index 100% rename from tests/integration/net/state/simple/replicator/with_update_test.go rename to tests/integration/net/simple/replicator/with_update_test.go From c75c57401d073a199ecfb72a50242850b9b4e9d6 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 30 May 2024 21:00:38 -0400 Subject: [PATCH 03/15] fix leaking context done handler --- datastore/memory/memory.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datastore/memory/memory.go b/datastore/memory/memory.go index e650776623..8a17d79603 100644 --- a/datastore/memory/memory.go +++ b/datastore/memory/memory.go @@ -347,10 +347,14 @@ func (d *Datastore) executePurge(ctx context.Context) { } func (d *Datastore) handleContextDone(ctx context.Context) { - <-ctx.Done() - // It is safe to ignore the error since the only error that could occur is if the - // datastore is already closed, in which case the purpose of the `Close` call is already covered. - _ = d.Close() + select { + case <-d.closing: + return + case <-ctx.Done(): + // It is safe to ignore the error since the only error that could occur is if the + // datastore is already closed, in which case the purpose of the `Close` call is already covered. + _ = d.Close() + } } // commit commits the given transaction to the datastore. From b52d01542f03618b2eb5a799d695cd4ca0ba88be Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 30 May 2024 21:33:12 -0400 Subject: [PATCH 04/15] refactor dag sync --- cli/start.go | 1 + client/collection.go | 15 - events/dag_sync.go | 31 ++ events/events.go | 3 + go.sum | 2 - http/client_collection.go | 12 - internal/db/collection_index.go | 46 +-- internal/db/config.go | 11 +- internal/db/config_test.go | 8 +- internal/db/db.go | 11 + internal/db/errors.go | 2 + internal/db/merge.go | 361 ++++++++++++++++++++++++ internal/merkle/clock/clock.go | 2 +- net/client_test.go | 9 +- net/dag.go | 161 ----------- net/dag_test.go | 216 -------------- net/errors.go | 20 +- net/node.go | 2 +- net/peer.go | 51 +--- net/process.go | 259 ++++++----------- net/server.go | 181 +++--------- net/server_test.go | 19 +- tests/clients/cli/wrapper_collection.go | 12 - tests/integration/db.go | 1 + 24 files changed, 591 insertions(+), 845 deletions(-) create mode 100644 events/dag_sync.go create mode 100644 internal/db/merge.go delete mode 100644 net/dag.go delete mode 100644 net/dag_test.go diff --git a/cli/start.go b/cli/start.go index 9505fd7fff..4ae60b2bb0 100644 --- a/cli/start.go +++ b/cli/start.go @@ -116,6 +116,7 @@ func MakeStartCommand() *cobra.Command { node.WithPeers(peers...), // db options db.WithUpdateEvents(), + db.WithDAGMergeEvents(), db.WithMaxRetries(cfg.GetInt("datastore.MaxTxnRetries")), // net node options net.WithListenAddresses(cfg.GetStringSlice("net.p2pAddresses")...), diff --git a/client/collection.go b/client/collection.go index 38c309a0e8..b557e2e335 100644 --- a/client/collection.go +++ b/client/collection.go @@ -122,21 +122,6 @@ type Collection interface { // GetIndexes returns all the indexes that exist on the collection. GetIndexes(ctx context.Context) ([]IndexDescription, error) - - // CreateDocIndex creates an index for the given document. - // WARNING: This method is only for internal use and is not supposed to be called by the client - // as it might compromise the integrity of the database. This method will be removed in the future - CreateDocIndex(context.Context, *Document) error - - // UpdateDocIndex updates the index for the given document. - // WARNING: This method is only for internal use and is not supposed to be called by the client - // as it might compromise the integrity of the database. This method will be removed in the future - UpdateDocIndex(ctx context.Context, oldDoc, newDoc *Document) error - - // DeleteDocIndex deletes the index for the given document. - // WARNING: This method is only for internal use and is not supposed to be called by the client - // as it might compromise the integrity of the database. This method will be removed in the future - DeleteDocIndex(context.Context, *Document) error } // DocIDResult wraps the result of an attempt at a DocID retrieval operation. diff --git a/events/dag_sync.go b/events/dag_sync.go new file mode 100644 index 0000000000..0af06f49ea --- /dev/null +++ b/events/dag_sync.go @@ -0,0 +1,31 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package events + +import ( + "github.com/ipfs/go-cid" + + "github.com/sourcenetwork/immutable" +) + +// DAGMergeChannel is the bus onto which dag merge are published. +type DAGMergeChannel = immutable.Option[Channel[DAGMerge]] + +// DAGMerge is a notification that a merge can be performed up to the provided CID. +type DAGMerge struct { + // Cid is the id of the composite commit that formed this update in the DAG. + Cid cid.Cid + // SchemaRoot is the root identifier of the schema that defined the shape of the document that was updated. + SchemaRoot string + // MergeCompleteChan is a channel that will be closed when the merge is complete + // allowing the caller to optionnaly block until the merge is complete. + MergeCompleteChan chan struct{} +} diff --git a/events/events.go b/events/events.go index 4f910ab454..ec29adc24c 100644 --- a/events/events.go +++ b/events/events.go @@ -49,4 +49,7 @@ func New[T any](commandBufferSize int, eventBufferSize int) Channel[T] { type Events struct { // Updates publishes an `Update` for each document written to in the database. Updates UpdateChannel + + // DAGMerges publishes a `DAGMerge` for each completed DAG sync process over P2P. + DAGMerges DAGMergeChannel } diff --git a/go.sum b/go.sum index 3203ff8b9c..e6246a269d 100644 --- a/go.sum +++ b/go.sum @@ -1166,8 +1166,6 @@ github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49u github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= github.com/vito/go-sse v1.0.0 h1:e6/iTrrvy8BRrOwJwmQmlndlil+TLdxXvHi55ZDzH6M= github.com/vito/go-sse v1.0.0/go.mod h1:2wkcaQ+jtlZ94Uve8gYZjFpL68luAjssTINA2hpgcZs= -github.com/warpfork/go-testmark v0.12.1 h1:rMgCpJfwy1sJ50x0M0NgyphxYYPMOODIJHhsXyEHU0s= -github.com/warpfork/go-testmark v0.12.1/go.mod h1:kHwy7wfvGSPh1rQJYKayD4AbtNaeyZdcGi9tNJTaa5Y= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/wasmerio/wasmer-go v1.0.4 h1:MnqHoOGfiQ8MMq2RF6wyCeebKOe84G88h5yv+vmxJgs= diff --git a/http/client_collection.go b/http/client_collection.go index 59b2cf79b6..ee614c1dba 100644 --- a/http/client_collection.go +++ b/http/client_collection.go @@ -413,15 +413,3 @@ func (c *Collection) GetIndexes(ctx context.Context) ([]client.IndexDescription, } return indexes, nil } - -func (c *Collection) CreateDocIndex(context.Context, *client.Document) error { - return ErrMethodIsNotImplemented -} - -func (c *Collection) UpdateDocIndex(ctx context.Context, oldDoc, newDoc *client.Document) error { - return ErrMethodIsNotImplemented -} - -func (c *Collection) DeleteDocIndex(context.Context, *client.Document) error { - return ErrMethodIsNotImplemented -} diff --git a/internal/db/collection_index.go b/internal/db/collection_index.go index c2f02bf3bf..c606cc45b7 100644 --- a/internal/db/collection_index.go +++ b/internal/db/collection_index.go @@ -109,53 +109,13 @@ func (db *db) fetchCollectionIndexDescriptions( return indexDescriptions, nil } -func (c *collection) CreateDocIndex(ctx context.Context, doc *client.Document) error { - ctx, txn, err := ensureContextTxn(ctx, c.db, false) - if err != nil { - return err - } - defer txn.Discard(ctx) - - err = c.indexNewDoc(ctx, doc) - if err != nil { - return err - } - - return txn.Commit(ctx) -} - -func (c *collection) UpdateDocIndex(ctx context.Context, oldDoc, newDoc *client.Document) error { - ctx, txn, err := ensureContextTxn(ctx, c.db, false) - if err != nil { - return err - } - defer txn.Discard(ctx) - - err = c.deleteIndexedDoc(ctx, oldDoc) - if err != nil { - return err - } - err = c.indexNewDoc(ctx, newDoc) - if err != nil { - return err - } - - return txn.Commit(ctx) -} - -func (c *collection) DeleteDocIndex(ctx context.Context, doc *client.Document) error { - ctx, txn, err := ensureContextTxn(ctx, c.db, false) +func (c *collection) updateDocIndex(ctx context.Context, oldDoc, newDoc *client.Document) error { + err := c.deleteIndexedDoc(ctx, oldDoc) if err != nil { return err } - defer txn.Discard(ctx) - err = c.deleteIndexedDoc(ctx, doc) - if err != nil { - return err - } - - return txn.Commit(ctx) + return c.indexNewDoc(ctx, newDoc) } func (c *collection) indexNewDoc(ctx context.Context, doc *client.Document) error { diff --git a/internal/db/config.go b/internal/db/config.go index 1364cab09b..2debf41df9 100644 --- a/internal/db/config.go +++ b/internal/db/config.go @@ -27,9 +27,14 @@ type Option func(*db) // WithUpdateEvents enables the update events channel. func WithUpdateEvents() Option { return func(db *db) { - db.events = events.Events{ - Updates: immutable.Some(events.New[events.Update](0, updateEventBufferSize)), - } + db.events.Updates = immutable.Some(events.New[events.Update](0, updateEventBufferSize)) + } +} + +// WithDAGMergeEvents enables the dag merge events channel. +func WithDAGMergeEvents() Option { + return func(db *db) { + db.events.DAGMerges = immutable.Some(events.New[events.DAGMerge](0, updateEventBufferSize)) } } diff --git a/internal/db/config_test.go b/internal/db/config_test.go index f80e538b4f..a08bd7815d 100644 --- a/internal/db/config_test.go +++ b/internal/db/config_test.go @@ -19,7 +19,13 @@ import ( func TestWithUpdateEvents(t *testing.T) { d := &db{} WithUpdateEvents()(d) - assert.NotNil(t, d.events) + assert.NotNil(t, d.events.Updates) +} + +func TestWithDAGMergeEvents(t *testing.T) { + d := &db{} + WithDAGMergeEvents()(d) + assert.NotNil(t, d.events.DAGMerges) } func TestWithMaxRetries(t *testing.T) { diff --git a/internal/db/db.go b/internal/db/db.go index 979626034c..a04dee5123 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -118,6 +118,14 @@ func newDB( return nil, err } + if db.events.DAGMerges.HasValue() { + merges, err := db.events.DAGMerges.Value().Subscribe() + if err != nil { + return nil, err + } + go db.handleMerges(ctx, merges) + } + return db, nil } @@ -262,6 +270,9 @@ func (db *db) Close() { if db.events.Updates.HasValue() { db.events.Updates.Value().Close() } + if db.events.DAGMerges.HasValue() { + db.events.DAGMerges.Value().Close() + } err := db.rootstore.Close() if err != nil { diff --git a/internal/db/errors.go b/internal/db/errors.go index fcb4baf13f..8d3c770bd8 100644 --- a/internal/db/errors.go +++ b/internal/db/errors.go @@ -93,6 +93,7 @@ const ( errCanNotHavePolicyWithoutACP string = "can not specify policy on collection, without acp" errSecondaryFieldOnSchema string = "secondary relation fields cannot be defined on the schema" errRelationMissingField string = "relation missing field" + errNoTransactionInContext string = "no transaction in context" ) var ( @@ -126,6 +127,7 @@ var ( ErrSecondaryFieldOnSchema = errors.New(errSecondaryFieldOnSchema) ErrRelationMissingField = errors.New(errRelationMissingField) ErrMultipleRelationPrimaries = errors.New("relation can only have a single field set as primary") + ErrNoTransactionInContext = errors.New(errNoTransactionInContext) ) // NewErrFailedToGetHeads returns a new error indicating that the heads of a document diff --git a/internal/db/merge.go b/internal/db/merge.go new file mode 100644 index 0000000000..9fb7374c16 --- /dev/null +++ b/internal/db/merge.go @@ -0,0 +1,361 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package db + +import ( + "container/list" + "context" + + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + + "github.com/sourcenetwork/corelog" + "github.com/sourcenetwork/defradb/client" + "github.com/sourcenetwork/defradb/datastore" + "github.com/sourcenetwork/defradb/errors" + "github.com/sourcenetwork/defradb/events" + "github.com/sourcenetwork/defradb/internal/core" + coreblock "github.com/sourcenetwork/defradb/internal/core/block" + "github.com/sourcenetwork/defradb/internal/db/base" + "github.com/sourcenetwork/defradb/internal/merkle/clock" + merklecrdt "github.com/sourcenetwork/defradb/internal/merkle/crdt" + "github.com/sourcenetwork/immutable" +) + +func (db *db) handleMerges(ctx context.Context, merges events.Subscription[events.DAGMerge]) { + for { + select { + case <-ctx.Done(): + return + case merge, ok := <-merges: + if !ok { + return + } + go func() { + err := db.executeMerge(ctx, merge) + if err != nil { + log.ErrorContextE( + ctx, + "Failed to execute merge", + err, + corelog.String("cid", merge.Cid.String()), + ) + } + }() + } + } +} + +func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error { + defer func() { + // Notify the caller that the merge is complete. + if dagMerge.MergeCompleteChan != nil { + close(dagMerge.MergeCompleteChan) + } + }() + ctx, txn, err := ensureContextTxn(ctx, db, false) + if err != nil { + return err + } + defer txn.Discard(ctx) + mp, err := db.newMergeProcessor(ctx, dagMerge.Cid, dagMerge.SchemaRoot) + if err != nil { + return err + } + mt, err := mp.getHeads(ctx) + if err != nil { + return err + } + err = mp.getComposites(ctx, dagMerge.Cid, mt) + if err != nil { + return err + } + err = mp.merge(ctx) + if err != nil { + return err + } + err = mp.syncIndexedDocs(ctx) + if err != nil { + return err + } + return txn.Commit(ctx) +} + +type mergeProcessor struct { + ctx context.Context + txn datastore.Txn + ls linking.LinkSystem + docID client.DocID + mCRDTs map[uint32]merklecrdt.MerkleCRDT + col *collection + schemaVersionKey core.CollectionSchemaVersionKey + dsKey core.DataStoreKey + composites *list.List +} + +func (db *db) newMergeProcessor(ctx context.Context, cid cid.Cid, rootSchema string) (*mergeProcessor, error) { + txn, ok := TryGetContextTxn(ctx) + if !ok { + return nil, ErrNoTransactionInContext + } + + ls := cidlink.DefaultLinkSystem() + ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) + nd, err := ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: cid}, coreblock.SchemaPrototype) + if err != nil { + return nil, err + } + + block, err := coreblock.GetFromNode(nd) + if err != nil { + return nil, err + } + + cols, err := db.getCollections( + ctx, + client.CollectionFetchOptions{ + SchemaRoot: immutable.Some(rootSchema), + }, + ) + if err != nil { + return nil, err + } + + col := cols[0].(*collection) + docID, err := client.NewDocIDFromString(string(block.Delta.GetDocID())) + if err != nil { + return nil, err + } + + return &mergeProcessor{ + ctx: ctx, + txn: txn, + ls: ls, + docID: docID, + mCRDTs: make(map[uint32]merklecrdt.MerkleCRDT), + col: col, + schemaVersionKey: core.CollectionSchemaVersionKey{ + SchemaVersionID: col.Schema().VersionID, + CollectionID: col.ID(), + }, + dsKey: base.MakeDataStoreKeyWithCollectionAndDocID(col.Description(), docID.String()), + composites: list.New(), + }, nil +} + +type mergeTarget struct { + heads map[cid.Cid]*coreblock.Block + headHeigth uint64 +} + +func newMergeTarget() mergeTarget { + return mergeTarget{ + heads: make(map[cid.Cid]*coreblock.Block), + } +} + +// getComposites retrieves the composite blocks for the given document until it reaches a +// block that has already been merged or until we reach the genesis block. +func (mp *mergeProcessor) getComposites(ctx context.Context, blockCid cid.Cid, mt mergeTarget) error { + nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: blockCid}, coreblock.SchemaPrototype) + if err != nil { + return err + } + + block, err := coreblock.GetFromNode(nd) + if err != nil { + return err + } + + if _, ok := mt.heads[blockCid]; ok { + // We've already processed this block. + return nil + } + + if block.Delta.GetPriority() >= mt.headHeigth { + mp.composites.PushFront(block) + for _, link := range block.Links { + if link.Name == core.HEAD { + err := mp.getComposites(ctx, link.Cid, mt) + if err != nil { + return err + } + } + } + } else { + newMT := newMergeTarget() + for _, b := range mt.heads { + for _, link := range b.Links { + nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) + if err != nil { + return err + } + + childBlock, err := coreblock.GetFromNode(nd) + if err != nil { + return err + } + + newMT.heads[link.Cid] = childBlock + newMT.headHeigth = childBlock.Delta.GetPriority() + } + } + return mp.getComposites(ctx, blockCid, newMT) + } + return nil +} + +// getHeads retrieves the heads of the composite DAG for the given document. +func (mp *mergeProcessor) getHeads(ctx context.Context) (mergeTarget, error) { + headset := clock.NewHeadSet( + mp.txn.Headstore(), + mp.dsKey.WithFieldId(core.COMPOSITE_NAMESPACE).ToHeadStoreKey(), + ) + + cids, _, err := headset.List(ctx) + if err != nil { + return mergeTarget{}, err + } + + mt := newMergeTarget() + for _, cid := range cids { + b, err := mp.txn.DAGstore().Get(ctx, cid) + if err != nil { + return mergeTarget{}, err + } + + block, err := coreblock.GetFromBytes(b.RawData()) + if err != nil { + return mergeTarget{}, err + } + + mt.heads[cid] = block + // All heads have the same height so overwriting is ok. + mt.headHeigth = block.Delta.GetPriority() + } + return mt, nil +} + +func (mp *mergeProcessor) merge(ctx context.Context) error { + for e := mp.composites.Front(); e != nil; e = e.Next() { + block := e.Value.(*coreblock.Block) + link, err := block.GenerateLink() + if err != nil { + return err + } + err = mp.processBlock(ctx, block, link) + if err != nil { + return err + } + } + return nil +} + +// processBlock merges the block and its children to the datastore and sets the head accordingly. +func (mp *mergeProcessor) processBlock( + ctx context.Context, + block *coreblock.Block, + blockLink cidlink.Link, +) error { + crdt, err := mp.initCRDTForType(block.Delta.GetFieldName()) + if err != nil { + return err + } + + // If the CRDT is nil, it means the field is not part + // of the schema and we can safely ignore it. + if crdt == nil { + return nil + } + + err = crdt.Clock().ProcessBlock(ctx, block, blockLink) + if err != nil { + return err + } + + for _, link := range block.Links { + if link.Name == core.HEAD { + continue + } + + b, err := mp.txn.DAGstore().Get(ctx, link.Cid) + if err != nil { + return err + } + + childBlock, err := coreblock.GetFromBytes(b.RawData()) + if err != nil { + return err + } + + if err := mp.processBlock(ctx, childBlock, link.Link); err != nil { + return err + } + } + + return nil +} + +func (mp *mergeProcessor) initCRDTForType( + field string, +) (merklecrdt.MerkleCRDT, error) { + if field == "" { + return merklecrdt.NewMerkleCompositeDAG( + mp.txn, + mp.schemaVersionKey, + mp.dsKey.WithFieldId(core.COMPOSITE_NAMESPACE), + "", + ), nil + } + + fd, ok := mp.col.Definition().GetFieldByName(field) + if !ok { + // If the field is not part of the schema, we can safely ignore it. + return nil, nil + } + + return merklecrdt.InstanceWithStore( + mp.txn, + mp.schemaVersionKey, + fd.Typ, + fd.Kind, + mp.dsKey.WithFieldId(fd.ID.String()), + field, + ) +} + +func (mp *mergeProcessor) syncIndexedDocs( + ctx context.Context, +) error { + // remove transaction from old context + oldCtx := SetContextTxn(ctx, nil) + + oldDoc, err := mp.col.Get(oldCtx, mp.docID, false) + isNewDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) + if !isNewDoc && err != nil { + return err + } + + doc, err := mp.col.Get(ctx, mp.docID, false) + isDeletedDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) + if !isDeletedDoc && err != nil { + return err + } + + if isDeletedDoc { + return mp.col.deleteIndexedDoc(ctx, oldDoc) + } else if isNewDoc { + return mp.col.indexNewDoc(ctx, doc) + } else { + return mp.col.updateDocIndex(ctx, oldDoc, doc) + } +} diff --git a/internal/merkle/clock/clock.go b/internal/merkle/clock/clock.go index 087ba76804..06cccb6467 100644 --- a/internal/merkle/clock/clock.go +++ b/internal/merkle/clock/clock.go @@ -125,7 +125,7 @@ func (mc *MerkleClock) ProcessBlock( // check if we have any HEAD links hasHeads := false for _, l := range block.Links { - if l.Name == "_head" { + if l.Name == core.HEAD { hasHeads = true break } diff --git a/net/client_test.go b/net/client_test.go index e074947213..6e85a516be 100644 --- a/net/client_test.go +++ b/net/client_test.go @@ -129,14 +129,17 @@ func TestPushlogW_WithValidPeerID_NoError(t *testing.T) { err = col.Save(ctx, doc) require.NoError(t, err) - cid, err := createCID(doc) + headCID, err := getHead(ctx, n1.db, doc.ID()) + require.NoError(t, err) + + b, err := n1.db.Blockstore().AsIPLDStorage().Get(ctx, headCID.KeyString()) require.NoError(t, err) err = n1.server.pushLog(ctx, events.Update{ DocID: doc.ID().String(), - Cid: cid, + Cid: headCID, SchemaRoot: col.SchemaRoot(), - Block: emptyBlock(), + Block: b, }, n2.PeerInfo().ID) require.NoError(t, err) } diff --git a/net/dag.go b/net/dag.go deleted file mode 100644 index 7718db6c27..0000000000 --- a/net/dag.go +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package net - -import ( - "sync" - "time" - - "github.com/ipfs/go-cid" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" - - "github.com/sourcenetwork/corelog" - - coreblock "github.com/sourcenetwork/defradb/internal/core/block" -) - -var ( - DAGSyncTimeout = time.Second * 60 -) - -type dagJob struct { - session *sync.WaitGroup // A waitgroup to wait for all related jobs to conclude - bp *blockProcessor // the block processor to use - cid cid.Cid // the cid of the block to fetch from the P2P network - - // OLD FIELDS - // root cid.Cid // the root of the branch we are walking down - // rootPrio uint64 // the priority of the root delta - // delta core.Delta // the current delta -} - -// the only purpose of this worker is to be able to orderly shut-down job -// workers without races by becoming the only sender for the store.jobQueue -// channel. -func (p *Peer) sendJobWorker() { - // The DAG sync process for a document is handled over a single transaction, it is possible that a single - // document ends up using all workers. Since the transaction uses a mutex to guarantee thread safety, some - // operations in those workers may temporarily blocked which would leave a concurrent document sync process - // hanging waiting for some workers to free up. To eliviate this problem, we add new workers dedicated to a - // document and discard them once the process is completed. - docWorkerQueue := make(map[string]chan *dagJob) - for { - select { - case <-p.ctx.Done(): - for _, job := range docWorkerQueue { - close(job) - } - return - - case newJob := <-p.sendJobs: - jobs, ok := docWorkerQueue[newJob.bp.dsKey.DocID] - if !ok { - jobs = make(chan *dagJob, numWorkers) - for i := 0; i < numWorkers; i++ { - go p.dagWorker(jobs) - } - docWorkerQueue[newJob.bp.dsKey.DocID] = jobs - } - jobs <- newJob - - case docID := <-p.closeJob: - if jobs, ok := docWorkerQueue[docID]; ok { - close(jobs) - delete(docWorkerQueue, docID) - } - } - } -} - -// dagWorker should run in its own goroutine. Workers are launched during -// initialization in New(). -func (p *Peer) dagWorker(jobs chan *dagJob) { - for job := range jobs { - select { - case <-p.ctx.Done(): - // drain jobs from queue when we are done - job.session.Done() - continue - default: - } - - go func(j *dagJob) { - if j.bp.dagSyncer != nil && j.cid.Defined() { - // BlockOfType will return the block if it is already in the store or fetch it from the network - // if it is not. This is a blocking call and will wait for the block to be fetched. - // It uses the LinkSystem to fetch the block. Blocks retrieved from the network will - // also be stored in the blockstore in the same call. - // Blocks have to match the coreblock.SchemaPrototype to be returned. - nd, err := j.bp.dagSyncer.BlockOfType(p.ctx, cidlink.Link{Cid: j.cid}, coreblock.SchemaPrototype) - if err != nil { - log.ErrorContextE( - p.ctx, - "Failed to get node", - err, - corelog.Any("CID", j.cid)) - j.session.Done() - return - } - block, err := coreblock.GetFromNode(nd) - if err != nil { - log.ErrorContextE( - p.ctx, - "Failed to convert ipld node to block", - err, - corelog.Any("CID", j.cid)) - } - j.bp.handleChildBlocks( - p.ctx, - j.session, - block, - ) - } - p.queuedChildren.Remove(j.cid) - j.session.Done() - }(job) - } -} - -type cidSafeSet struct { - set map[cid.Cid]struct{} - mux sync.Mutex -} - -func newCidSafeSet() *cidSafeSet { - return &cidSafeSet{ - set: make(map[cid.Cid]struct{}), - } -} - -// Visit checks if we can visit this node, or -// if its already being visited -func (s *cidSafeSet) Visit(c cid.Cid) bool { - var b bool - s.mux.Lock() - { - if _, ok := s.set[c]; !ok { - s.set[c] = struct{}{} - b = true - } - } - s.mux.Unlock() - return b -} - -func (s *cidSafeSet) Remove(c cid.Cid) { - s.mux.Lock() - { - delete(s.set, c) - } - s.mux.Unlock() -} diff --git a/net/dag_test.go b/net/dag_test.go deleted file mode 100644 index 976f43653a..0000000000 --- a/net/dag_test.go +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2023 Democratized Data Foundation -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package net - -import ( - "context" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/sourcenetwork/defradb/client" - "github.com/sourcenetwork/defradb/internal/core" - coreblock "github.com/sourcenetwork/defradb/internal/core/block" - "github.com/sourcenetwork/defradb/internal/merkle/clock" - netutils "github.com/sourcenetwork/defradb/net/utils" -) - -const timeout = 5 * time.Second - -func TestSendJobWorker_ExitOnContextClose_NoError(t *testing.T) { - ctx := context.Background() - _, n := newTestNode(ctx, t) - done := make(chan struct{}) - go func() { - n.sendJobWorker() - close(done) - }() - n.Close() - select { - case <-done: - case <-time.After(timeout): - t.Error("failed to close sendJobWorker") - } -} - -func TestSendJobWorker_WithNewJob_NoError(t *testing.T) { - ctx := context.Background() - db, n := newTestNode(ctx, t) - done := make(chan struct{}) - go func() { - n.sendJobWorker() - close(done) - }() - _, err := db.AddSchema(ctx, `type User { - name: String - age: Int - }`) - require.NoError(t, err) - - col, err := db.GetCollectionByName(ctx, "User") - require.NoError(t, err) - - doc, err := client.NewDocFromJSON([]byte(`{"name": "John", "age": 30}`), col.Definition()) - require.NoError(t, err) - dsKey := core.DataStoreKeyFromDocID(doc.ID()) - - txn, err := db.NewTxn(ctx, false) - require.NoError(t, err) - - wg := sync.WaitGroup{} - wg.Add(1) - - n.sendJobs <- &dagJob{ - session: &wg, - bp: &blockProcessor{ - dsKey: dsKey, - txn: txn, - }, - } - // Give the jobworker time to process the job. - time.Sleep(100 * time.Microsecond) - n.Close() - select { - case <-done: - case <-time.After(timeout): - t.Error("failed to close sendJobWorker") - } -} - -func TestSendJobWorker_WithCloseJob_NoError(t *testing.T) { - ctx := context.Background() - db, n := newTestNode(ctx, t) - done := make(chan struct{}) - go func() { - n.sendJobWorker() - close(done) - }() - _, err := db.AddSchema(ctx, `type User { - name: String - age: Int - }`) - require.NoError(t, err) - - col, err := db.GetCollectionByName(ctx, "User") - require.NoError(t, err) - - doc, err := client.NewDocFromJSON([]byte(`{"name": "John", "age": 30}`), col.Definition()) - require.NoError(t, err) - dsKey := core.DataStoreKeyFromDocID(doc.ID()) - - txn, err := db.NewTxn(ctx, false) - require.NoError(t, err) - - wg := sync.WaitGroup{} - wg.Add(1) - - n.sendJobs <- &dagJob{ - session: &wg, - bp: &blockProcessor{ - dsKey: dsKey, - txn: txn, - }, - } - - n.closeJob <- dsKey.DocID - - n.Close() - select { - case <-done: - case <-time.After(timeout): - t.Error("failed to close sendJobWorker") - } -} - -func TestSendJobWorker_WithPeer_NoError(t *testing.T) { - ctx := context.Background() - db1, n1 := newTestNode(ctx, t) - db2, n2 := newTestNode(ctx, t) - - addrs, err := netutils.ParsePeers([]string{n1.host.Addrs()[0].String() + "/p2p/" + n1.PeerID().String()}) - require.NoError(t, err) - n2.Bootstrap(addrs) - - err = n1.WaitForPeerConnectionEvent(n2.PeerID()) - require.NoError(t, err) - err = n2.WaitForPeerConnectionEvent(n1.PeerID()) - require.NoError(t, err) - done := make(chan struct{}) - go func() { - n2.sendJobWorker() - close(done) - }() - - _, err = db1.AddSchema(ctx, `type User { - name: String - age: Int - }`) - require.NoError(t, err) - _, err = db2.AddSchema(ctx, `type User { - name: String - age: Int - }`) - require.NoError(t, err) - - col, err := db1.GetCollectionByName(ctx, "User") - require.NoError(t, err) - - doc, err := client.NewDocFromJSON([]byte(`{"name": "John", "age": 30}`), col.Definition()) - require.NoError(t, err) - dsKey := core.DataStoreKeyFromDocID(doc.ID()) - - err = col.Create(ctx, doc) - require.NoError(t, err) - - txn1, _ := db1.NewTxn(ctx, false) - heads, _, err := clock.NewHeadSet(txn1.Headstore(), dsKey.ToHeadStoreKey().WithFieldId(core.COMPOSITE_NAMESPACE)).List(ctx) - require.NoError(t, err) - txn1.Discard(ctx) - - txn2, err := db2.NewTxn(ctx, false) - require.NoError(t, err) - - wg := sync.WaitGroup{} - wg.Add(1) - - fetcher := n2.Peer.newDAGSyncerTxn(txn2) - - n2.sendJobs <- &dagJob{ - bp: newBlockProcessor(n2.Peer, txn2, col, dsKey, fetcher), - session: &wg, - cid: heads[0], - } - wg.Wait() - - err = txn2.Commit(ctx) - require.NoError(t, err) - - b, err := n1.db.Blockstore().Get(ctx, heads[0]) - require.NoError(t, err) - block, err := coreblock.GetFromBytes(b.RawData()) - require.NoError(t, err) - - for _, link := range block.Links { - exists, err := n2.db.Blockstore().Has(ctx, link.Cid) - require.NoError(t, err) - require.True(t, exists) - } - - n1.Close() - n2.Close() - select { - case <-done: - case <-time.After(timeout): - t.Error("failed to close sendJobWorker") - } -} diff --git a/net/errors.go b/net/errors.go index 773eb8765d..eb53a8e2a5 100644 --- a/net/errors.go +++ b/net/errors.go @@ -19,13 +19,14 @@ import ( ) const ( - errPushLog = "failed to push log" - errFailedToGetDocID = "failed to get DocID from broadcast message" - errPublishingToDocIDTopic = "can't publish log %s for docID %s" - errPublishingToSchemaTopic = "can't publish log %s for schema %s" - errReplicatorExists = "replicator already exists for %s with peerID %s" - errReplicatorDocID = "failed to get docID for replicator %s with peerID %s" - errReplicatorCollections = "failed to get collections for replicator" + errPushLog = "failed to push log" + errFailedToGetDocID = "failed to get DocID from broadcast message" + errPublishingToDocIDTopic = "can't publish log %s for docID %s" + errPublishingToSchemaTopic = "can't publish log %s for schema %s" + errReplicatorExists = "replicator already exists for %s with peerID %s" + errReplicatorDocID = "failed to get docID for replicator %s with peerID %s" + errReplicatorCollections = "failed to get collections for replicator" + errCheckingForExistingBlock = "failed to check for existing block" ) var ( @@ -38,6 +39,7 @@ var ( ErrNilDB = errors.New("database object can't be nil") ErrNilUpdateChannel = errors.New("tried to subscribe to update channel, but update channel is nil") ErrSelfTargetForReplicator = errors.New("can't target ourselves as a replicator") + ErrCheckingForExistingBlock = errors.New(errCheckingForExistingBlock) ) func NewErrPushLog(inner error, kv ...errors.KV) error { @@ -67,3 +69,7 @@ func NewErrReplicatorDocID(inner error, collection string, peerID peer.ID, kv .. func NewErrReplicatorCollections(inner error, kv ...errors.KV) error { return errors.Wrap(errReplicatorCollections, inner, kv...) } + +func NewErrCheckingForExistingBlock(inner error, cid string) error { + return errors.Wrap(errCheckingForExistingBlock, inner, errors.NewKV("cid", cid)) +} diff --git a/net/node.go b/net/node.go index 7683d3fb8f..ffd60e52fb 100644 --- a/net/node.go +++ b/net/node.go @@ -36,8 +36,8 @@ import ( "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/routing" - "github.com/multiformats/go-multiaddr" + "github.com/sourcenetwork/corelog" "github.com/sourcenetwork/go-libp2p-pubsub-rpc/finalizer" diff --git a/net/peer.go b/net/peer.go index 3d728a1d87..fc49aec7ec 100644 --- a/net/peer.go +++ b/net/peer.go @@ -21,8 +21,6 @@ import ( "github.com/ipfs/boxo/bitswap/network" "github.com/ipfs/boxo/blockservice" exchange "github.com/ipfs/boxo/exchange" - dagsyncer "github.com/ipfs/boxo/fetcher" - dagsyncerbs "github.com/ipfs/boxo/fetcher/impl/blockservice" "github.com/ipfs/go-cid" ds "github.com/ipfs/go-datastore" gostream "github.com/libp2p/go-libp2p-gostream" @@ -44,10 +42,6 @@ import ( pb "github.com/sourcenetwork/defradb/net/pb" ) -var ( - numWorkers = 5 -) - // Peer is a DefraDB Peer node which exposes all the LibP2P host/peer functionality // to the underlying DefraDB instance. type Peer struct { @@ -63,20 +57,11 @@ type Peer struct { server *server p2pRPC *grpc.Server // rpc server over the P2P network - // Used to close the dagWorker pool for a given document. - // The string represents a docID. - closeJob chan string - sendJobs chan *dagJob - - // outstanding log request currently being processed - queuedChildren *cidSafeSet - // replicators is a map from collectionName => peerId replicators map[string]map[peer.ID]struct{} mu sync.Mutex // peer DAG service - dagsyncerbs.FetcherConfig exch exchange.Interface bserv blockservice.BlockService @@ -100,20 +85,17 @@ func NewPeer( ctx, cancel := context.WithCancel(ctx) p := &Peer{ - host: h, - dht: dht, - ps: ps, - db: db, - p2pRPC: grpc.NewServer(serverOptions...), - ctx: ctx, - cancel: cancel, - closeJob: make(chan string), - sendJobs: make(chan *dagJob), - replicators: make(map[string]map[peer.ID]struct{}), - queuedChildren: newCidSafeSet(), + host: h, + dht: dht, + ps: ps, + db: db, + p2pRPC: grpc.NewServer(serverOptions...), + ctx: ctx, + cancel: cancel, + replicators: make(map[string]map[peer.ID]struct{}), } var err error - p.server, err = newServer(p, db, dialOptions...) + p.server, err = newServer(p, dialOptions...) if err != nil { return nil, err } @@ -124,7 +106,6 @@ func NewPeer( } p.setupBlockService() - p.setupDAGService() return p, nil } @@ -190,9 +171,6 @@ func (p *Peer) Start() error { } }() - // start sendJobWorker - go p.sendJobWorker() - return nil } @@ -496,17 +474,6 @@ func (p *Peer) setupBlockService() { p.exch = bswap } -func (p *Peer) setupDAGService() { - p.FetcherConfig = dagsyncerbs.NewFetcherConfig(p.bserv) -} - -func (p *Peer) newDAGSyncerTxn(txn datastore.Txn) dagsyncer.Fetcher { - return p.FetcherWithSession( - p.ctx, - blockservice.NewSession(p.ctx, blockservice.New(txn.DAGstore(), p.exch)), - ) -} - func stopGRPCServer(ctx context.Context, server *grpc.Server) { stopped := make(chan struct{}) go func() { diff --git a/net/process.go b/net/process.go index 882c29c360..bc99216022 100644 --- a/net/process.go +++ b/net/process.go @@ -13,229 +13,146 @@ package net import ( - "container/list" "context" - "fmt" "sync" + "time" - dagsyncer "github.com/ipfs/boxo/fetcher" + "github.com/ipfs/boxo/blockservice" + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/sourcenetwork/corelog" - "github.com/sourcenetwork/defradb/client" - "github.com/sourcenetwork/defradb/datastore" - "github.com/sourcenetwork/defradb/errors" - "github.com/sourcenetwork/defradb/internal/core" coreblock "github.com/sourcenetwork/defradb/internal/core/block" - "github.com/sourcenetwork/defradb/internal/db/base" - merklecrdt "github.com/sourcenetwork/defradb/internal/merkle/crdt" +) + +var ( + dagSyncTimeout = time.Second * 60 ) type blockProcessor struct { *Peer - txn datastore.Txn - col client.Collection - dsKey core.DataStoreKey - dagSyncer dagsyncer.Fetcher - // List of composite blocks to eventually merge - composites *list.List + wg *sync.WaitGroup + bsSession *blockservice.Session + queuedChildren *cidSafeSet } func newBlockProcessor( + ctx context.Context, p *Peer, - txn datastore.Txn, - col client.Collection, - dsKey core.DataStoreKey, - dagSyncer dagsyncer.Fetcher, ) *blockProcessor { return &blockProcessor{ - Peer: p, - composites: list.New(), - txn: txn, - col: col, - dsKey: dsKey, - dagSyncer: dagSyncer, - } -} - -// mergeBlock runs trough the list of composite blocks and sends them for processing. -func (bp *blockProcessor) mergeBlocks(ctx context.Context) { - for e := bp.composites.Front(); e != nil; e = e.Next() { - block := e.Value.(*coreblock.Block) - link, _ := block.GenerateLink() - err := bp.processBlock(ctx, block, link, "") - if err != nil { - log.ErrorContextE( - ctx, - "Failed to process block", - err, - corelog.String("DocID", bp.dsKey.DocID), - corelog.Any("CID", link.Cid), - ) - } - } -} - -// processBlock merges the block and its children to the datastore and sets the head accordingly. -func (bp *blockProcessor) processBlock( - ctx context.Context, - block *coreblock.Block, - blockLink cidlink.Link, - field string, -) error { - crdt, err := initCRDTForType(bp.txn, bp.col, bp.dsKey, field) - if err != nil { - return err - } - - err = crdt.Clock().ProcessBlock(ctx, block, blockLink) - if err != nil { - return err - } - - for _, link := range block.Links { - if link.Name == core.HEAD { - continue - } - - b, err := bp.txn.DAGstore().Get(ctx, link.Cid) - if err != nil { - return err - } - - childBlock, err := coreblock.GetFromBytes(b.RawData()) - if err != nil { - return err - } - - if err := bp.processBlock(ctx, childBlock, link.Link, link.Name); err != nil { - log.ErrorContextE( - ctx, - "Failed to process block", - err, - corelog.String("DocID", bp.dsKey.DocID), - corelog.Any("CID", link.Cid), - ) - } - } - - return nil -} - -func initCRDTForType( - txn datastore.Txn, - col client.Collection, - dsKey core.DataStoreKey, - field string, -) (merklecrdt.MerkleCRDT, error) { - var key core.DataStoreKey - var ctype client.CType - description := col.Description() - if field == "" { // empty field name implies composite type - key = base.MakeDataStoreKeyWithCollectionDescription( - description, - ).WithInstanceInfo( - dsKey, - ).WithFieldId( - core.COMPOSITE_NAMESPACE, - ) - - return merklecrdt.NewMerkleCompositeDAG( - txn, - core.NewCollectionSchemaVersionKey(col.Schema().VersionID, col.ID()), - key, - field, - ), nil + Peer: p, + wg: &sync.WaitGroup{}, + bsSession: blockservice.NewSession(ctx, p.bserv), + queuedChildren: newCidSafeSet(), } - - fd, ok := col.Definition().GetFieldByName(field) - if !ok { - return nil, errors.New(fmt.Sprintf("Couldn't find field %s for doc %s", field, dsKey.ToString())) - } - ctype = fd.Typ - fieldID := fd.ID.String() - key = base.MakeDataStoreKeyWithCollectionDescription(description).WithInstanceInfo(dsKey).WithFieldId(fieldID) - - return merklecrdt.InstanceWithStore( - txn, - core.NewCollectionSchemaVersionKey(col.Schema().VersionID, col.ID()), - ctype, - fd.Kind, - key, - field, - ) } // processRemoteBlock stores the block in the DAG store and initiates a sync of the block's children. func (bp *blockProcessor) processRemoteBlock( ctx context.Context, - session *sync.WaitGroup, block *coreblock.Block, ) error { - link, err := block.GenerateLink() - if err != nil { - return err - } - - b, err := block.Marshal() + // Store the block in the DAG store + lsys := cidlink.DefaultLinkSystem() + lsys.SetWriteStorage(bp.db.Blockstore().AsIPLDStorage()) + _, err := lsys.Store(linking.LinkContext{Ctx: ctx}, coreblock.GetLinkPrototype(), block.GenerateNode()) if err != nil { return err } - - if err := bp.txn.DAGstore().AsIPLDStorage().Put(ctx, link.Binary(), b); err != nil { - return err - } - - bp.handleChildBlocks(ctx, session, block) + // Initiate a sync of the block's children + bp.wg.Add(1) + bp.handleChildBlocks(ctx, block) return nil } func (bp *blockProcessor) handleChildBlocks( ctx context.Context, - session *sync.WaitGroup, block *coreblock.Block, ) { - if block.Delta.IsComposite() { - bp.composites.PushFront(block) - } + defer bp.wg.Done() if len(block.Links) == 0 { return } - ctx, cancel := context.WithTimeout(ctx, DAGSyncTimeout) - defer cancel() - + links := make([]cid.Cid, 0, len(block.Links)) for _, link := range block.Links { - if !bp.queuedChildren.Visit(link.Cid) { // reserve for processing - continue - } - - exist, err := bp.txn.DAGstore().Has(ctx, link.Cid) + exists, err := bp.db.Blockstore().Has(ctx, link.Cid) if err != nil { - log.ErrorContext( + log.ErrorContextE( ctx, - "Failed to check for existing block", + "Failed to check if block exists", + err, corelog.Any("CID", link.Cid), - corelog.Any("ERROR", err), ) + continue } - if exist { + if exists { continue } + if bp.queuedChildren.Visit(link.Cid) { + links = append(links, link.Cid) + } + } + + getCtx, cancel := context.WithTimeout(ctx, dagSyncTimeout) + defer cancel() - session.Add(1) - job := &dagJob{ - session: session, - cid: link.Cid, - bp: bp, + childBlocks := bp.bsSession.GetBlocks(getCtx, links) + + for rawBlock := range childBlocks { + block, err := coreblock.GetFromBytes(rawBlock.RawData()) + if err != nil { + log.ErrorContextE( + ctx, + "Failed to get block from bytes", + err, + corelog.Any("CID", rawBlock.Cid()), + ) + continue } + bp.wg.Add(1) + go bp.handleChildBlocks(ctx, block) + } - select { - case bp.sendJobs <- job: - case <-bp.ctx.Done(): - return // jump out + for _, link := range links { + bp.queuedChildren.Remove(link) + } +} + +type cidSafeSet struct { + set map[cid.Cid]struct{} + mux sync.Mutex +} + +func newCidSafeSet() *cidSafeSet { + return &cidSafeSet{ + set: make(map[cid.Cid]struct{}), + } +} + +// Visit checks if we can visit this node, or +// if its already being visited +func (s *cidSafeSet) Visit(c cid.Cid) bool { + var b bool + s.mux.Lock() + { + if _, ok := s.set[c]; !ok { + s.set[c] = struct{}{} + b = true } } + s.mux.Unlock() + return b +} + +func (s *cidSafeSet) Remove(c cid.Cid) { + s.mux.Lock() + { + delete(s.set, c) + } + s.mux.Unlock() } diff --git a/net/server.go b/net/server.go index 1cd9910856..a44acfa20a 100644 --- a/net/server.go +++ b/net/server.go @@ -22,18 +22,15 @@ import ( libpeer "github.com/libp2p/go-libp2p/core/peer" "github.com/sourcenetwork/corelog" rpc "github.com/sourcenetwork/go-libp2p-pubsub-rpc" - "github.com/sourcenetwork/immutable" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" grpcpeer "google.golang.org/grpc/peer" "google.golang.org/protobuf/proto" "github.com/sourcenetwork/defradb/client" - "github.com/sourcenetwork/defradb/datastore/badger/v4" "github.com/sourcenetwork/defradb/errors" - "github.com/sourcenetwork/defradb/internal/core" + "github.com/sourcenetwork/defradb/events" coreblock "github.com/sourcenetwork/defradb/internal/core/block" - "github.com/sourcenetwork/defradb/internal/db" pb "github.com/sourcenetwork/defradb/net/pb" ) @@ -45,7 +42,6 @@ import ( type server struct { peer *Peer opts []grpc.DialOption - db client.DB topics map[string]pubsubTopic mu sync.Mutex @@ -72,12 +68,11 @@ type pubsubTopic struct { // newServer creates a new network server that handle/directs RPC requests to the // underlying DB instance. -func newServer(p *Peer, db client.DB, opts ...grpc.DialOption) (*server, error) { +func newServer(p *Peer, opts ...grpc.DialOption) (*server, error) { s := &server{ peer: p, conns: make(map[libpeer.ID]*grpc.ClientConn), topics: make(map[string]pubsubTopic), - db: db, docQueue: &docQueue{ docs: make(map[string]chan struct{}), }, @@ -97,7 +92,7 @@ func newServer(p *Peer, db client.DB, opts ...grpc.DialOption) (*server, error) } // Get all DocIDs across all collections in the DB - cols, err := s.db.GetCollections(s.peer.ctx, client.CollectionFetchOptions{}) + cols, err := s.peer.db.GetCollections(s.peer.ctx, client.CollectionFetchOptions{}) if err != nil { return nil, err } @@ -224,157 +219,51 @@ func (s *server) PushLog(ctx context.Context, req *pb.PushLogRequest) (*pb.PushL } }() - // make sure were not processing twice - if canVisit := s.peer.queuedChildren.Visit(cid); !canVisit { - return &pb.PushLogReply{}, nil - } - defer s.peer.queuedChildren.Remove(cid) - // check if we already have this block - exists, err := s.db.Blockstore().Has(ctx, cid) + exists, err := s.peer.db.Blockstore().Has(ctx, cid) if err != nil { - return nil, errors.Wrap(fmt.Sprintf("failed to check for existing block %s", cid), err) + return nil, NewErrCheckingForExistingBlock(err, cid.String()) } if exists { return &pb.PushLogReply{}, nil } - dsKey := core.DataStoreKeyFromDocID(docID) - - var txnErr error - for retry := 0; retry < s.peer.db.MaxTxnRetries(); retry++ { - // To prevent a potential deadlock on DAG sync if an error occures mid process, we handle - // each process on a single transaction. - txn, err := s.db.NewConcurrentTxn(ctx, false) - if err != nil { - return nil, err - } - defer txn.Discard(ctx) - - // use a transaction for all operations - ctx = db.SetContextTxn(ctx, txn) - - // Currently a schema is the best way we have to link a push log request to a collection, - // this will change with https://github.com/sourcenetwork/defradb/issues/1085 - col, err := s.getActiveCollection(ctx, s.db, string(req.Body.SchemaRoot)) - if err != nil { - return nil, err - } - - // Create a new DAG service with the current transaction - dagSyncer := s.peer.newDAGSyncerTxn(txn) - - // handleComposite - block, err := coreblock.GetFromBytes(req.Body.Log.Block) - if err != nil { - return nil, errors.Wrap("failed to decode block", err) - } - - var wg sync.WaitGroup - bp := newBlockProcessor(s.peer, txn, col, dsKey, dagSyncer) - err = bp.processRemoteBlock(ctx, &wg, block) - if err != nil { - log.ErrorContextE( - ctx, - "Failed to process remote block", - err, - corelog.String("DocID", dsKey.DocID), - corelog.Any("CID", cid), - ) - } - wg.Wait() - bp.mergeBlocks(ctx) - - err = s.syncIndexedDocs(ctx, col, docID) - if err != nil { - return nil, err - } - - // dagWorkers specific to the DocID will have been spawned within handleChildBlocks. - // Once we are done with the dag syncing process, we can get rid of those workers. - if s.peer.closeJob != nil { - s.peer.closeJob <- dsKey.DocID - } - - if txnErr = txn.Commit(ctx); txnErr != nil { - if errors.Is(txnErr, badger.ErrTxnConflict) { - continue - } - return &pb.PushLogReply{}, txnErr - } - - // Once processed, subscribe to the DocID topic on the pubsub network unless we already - // suscribe to the collection. - if !s.hasPubSubTopic(col.SchemaRoot()) { - err = s.addPubSubTopic(dsKey.DocID, true) - if err != nil { - return nil, err - } - } - return &pb.PushLogReply{}, nil - } - - return &pb.PushLogReply{}, client.NewErrMaxTxnRetries(txnErr) -} - -func (*server) getActiveCollection( - ctx context.Context, - store client.Store, - schemaRoot string, -) (client.Collection, error) { - cols, err := store.GetCollections( - ctx, - client.CollectionFetchOptions{ - SchemaRoot: immutable.Some(schemaRoot), - }, - ) + block, err := coreblock.GetFromBytes(req.Body.Log.Block) if err != nil { - return nil, errors.Wrap(fmt.Sprintf("Failed to get collection from schemaRoot %s", schemaRoot), err) - } - if len(cols) == 0 { - return nil, client.NewErrCollectionNotFoundForSchema(schemaRoot) - } - var col client.Collection - for _, c := range cols { - if col != nil && col.Name().HasValue() && !c.Name().HasValue() { - continue - } - col = c - } - return col, nil -} - -func (s *server) syncIndexedDocs( - ctx context.Context, - col client.Collection, - docID client.DocID, -) error { - // remove transaction from old context - oldCtx := db.SetContextTxn(ctx, nil) - - //TODO-ACP: https://github.com/sourcenetwork/defradb/issues/2365 - // Resolve while handling acp <> secondary indexes. - oldDoc, err := col.Get(oldCtx, docID, false) - isNewDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) - if !isNewDoc && err != nil { - return err + return nil, err } - //TODO-ACP: https://github.com/sourcenetwork/defradb/issues/2365 - // Resolve while handling acp <> secondary indexes. - doc, err := col.Get(ctx, docID, false) - isDeletedDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) - if !isDeletedDoc && err != nil { - return err + bp := newBlockProcessor(ctx, s.peer) + err = bp.processRemoteBlock(ctx, block) + if err != nil { + log.ErrorContextE( + ctx, + "Failed to process remote block", + err, + corelog.String("DocID", docID.String()), + corelog.Any("CID", cid), + ) + } + bp.wg.Wait() + if s.peer.db.Events().DAGMerges.HasValue() { + mergeCompleteChan := make(chan struct{}) + s.peer.db.Events().DAGMerges.Value().Publish(events.DAGMerge{ + Cid: cid, + SchemaRoot: string(req.Body.SchemaRoot), + MergeCompleteChan: mergeCompleteChan, + }) + <-mergeCompleteChan } - if isDeletedDoc { - return col.DeleteDocIndex(oldCtx, oldDoc) - } else if isNewDoc { - return col.CreateDocIndex(ctx, doc) - } else { - return col.UpdateDocIndex(ctx, oldDoc, doc) + // Once processed, subscribe to the DocID topic on the pubsub network unless we already + // suscribe to the collection. + if !s.hasPubSubTopic(string(req.Body.SchemaRoot)) { + err = s.addPubSubTopic(docID.String(), true) + if err != nil { + return nil, err + } } + return &pb.PushLogReply{}, nil } // GetHeadLog receives a get head log request diff --git a/net/server_test.go b/net/server_test.go index 93d7d8130f..47d6a68aa8 100644 --- a/net/server_test.go +++ b/net/server_test.go @@ -32,8 +32,8 @@ import ( func TestNewServerSimple(t *testing.T) { ctx := context.Background() - db, n := newTestNode(ctx, t) - _, err := newServer(n.Peer, db) + _, n := newTestNode(ctx, t) + _, err := newServer(n.Peer) require.NoError(t, err) } @@ -42,7 +42,7 @@ func TestNewServerWithDBClosed(t *testing.T) { db, n := newTestNode(ctx, t) db.Close() - _, err := newServer(n.Peer, db) + _, err := newServer(n.Peer) require.ErrorIs(t, err, memory.ErrClosed) } @@ -60,7 +60,8 @@ func TestNewServerWithGetAllCollectionError(t *testing.T) { ctx := context.Background() db, n := newTestNode(ctx, t) mDB := mockDBColError{db} - _, err := newServer(n.Peer, &mDB) + n.Peer.db = &mDB + _, err := newServer(n.Peer) require.ErrorIs(t, err, mockError) } @@ -80,7 +81,7 @@ func TestNewServerWithCollectionSubscribed(t *testing.T) { err = n.AddP2PCollections(ctx, []string{col.SchemaRoot()}) require.NoError(t, err) - _, err = newServer(n.Peer, db) + _, err = newServer(n.Peer) require.NoError(t, err) } @@ -118,8 +119,8 @@ func TestNewServerWithGetAllDocIDsError(t *testing.T) { require.NoError(t, err) mDB := mockDBDocIDsError{db} - - _, err = newServer(n.Peer, &mDB) + n.Peer.db = &mDB + _, err = newServer(n.Peer) require.ErrorIs(t, err, mockError) } @@ -145,7 +146,7 @@ func TestNewServerWithAddTopicError(t *testing.T) { _, err = rpc.NewTopic(ctx, n.Peer.ps, n.Peer.host.ID(), doc.ID().String(), true) require.NoError(t, err) - _, err = newServer(n.Peer, db) + _, err = newServer(n.Peer) require.ErrorContains(t, err, "topic already exists") } @@ -190,7 +191,7 @@ func TestNewServerWithEmitterError(t *testing.T) { n.Peer.host = &mockHost{n.Peer.host} - _, err = newServer(n.Peer, db) + _, err = newServer(n.Peer) require.NoError(t, err) } diff --git a/tests/clients/cli/wrapper_collection.go b/tests/clients/cli/wrapper_collection.go index 618d9491d2..62458dae99 100644 --- a/tests/clients/cli/wrapper_collection.go +++ b/tests/clients/cli/wrapper_collection.go @@ -377,15 +377,3 @@ func (c *Collection) GetIndexes(ctx context.Context) ([]client.IndexDescription, } return indexes, nil } - -func (c *Collection) CreateDocIndex(context.Context, *client.Document) error { - return ErrMethodIsNotImplemented -} - -func (c *Collection) UpdateDocIndex(ctx context.Context, oldDoc, newDoc *client.Document) error { - return ErrMethodIsNotImplemented -} - -func (c *Collection) DeleteDocIndex(context.Context, *client.Document) error { - return ErrMethodIsNotImplemented -} diff --git a/tests/integration/db.go b/tests/integration/db.go index c473e4cdd0..1e8fe82731 100644 --- a/tests/integration/db.go +++ b/tests/integration/db.go @@ -105,6 +105,7 @@ func NewBadgerFileDB(ctx context.Context, t testing.TB) (client.DB, error) { func setupDatabase(s *state) (client.DB, string, error) { opts := []node.Option{ db.WithUpdateEvents(), + db.WithDAGMergeEvents(), node.WithLensPoolSize(lensPoolSize), // The test framework sets this up elsewhere when required so that it may be wrapped // into a [client.DB]. From ccfc92ff60cf3f1d869b3f277b126faabfbb12b0 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 31 May 2024 00:33:05 -0400 Subject: [PATCH 05/15] reduce number of leaked go routines. This helps the current race condition a lot. --- net/node.go | 76 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 20 deletions(-) diff --git a/net/node.go b/net/node.go index ffd60e52fb..7b403720e6 100644 --- a/net/node.go +++ b/net/node.go @@ -70,8 +70,9 @@ type Node struct { // receives an event when a pushLog request has been processed. pushLogEvent chan EvtReceivedPushLog - ctx context.Context - cancel context.CancelFunc + ctx context.Context + cancel context.CancelFunc + dhtClose func() error } // NewNode creates a new network node instance of DefraDB, wired into libp2p. @@ -101,8 +102,11 @@ func NewNode( fin := finalizer.NewFinalizer() + ctx, cancel := context.WithCancel(ctx) + peerstore, err := pstoreds.NewPeerstore(ctx, db.Peerstore(), pstoreds.DefaultOpts()) if err != nil { + cancel() return nil, fin.Cleanup(err) } fin.Add(peerstore) @@ -111,6 +115,7 @@ func NewNode( // generate an ephemeral private key key, err := crypto.GenerateEd25519() if err != nil { + cancel() return nil, fin.Cleanup(err) } options.PrivateKey = key @@ -119,6 +124,7 @@ func NewNode( // unmarshal the private key bytes privateKey, err := libp2pCrypto.UnmarshalEd25519PrivateKey(options.PrivateKey) if err != nil { + cancel() return nil, fin.Cleanup(err) } @@ -149,6 +155,7 @@ func NewNode( h, err := libp2p.New(libp2pOpts...) if err != nil { + cancel() return nil, fin.Cleanup(err) } log.InfoContext( @@ -167,12 +174,10 @@ func NewNode( pubsub.WithFloodPublish(true), ) if err != nil { + cancel() return nil, fin.Cleanup(err) } } - - ctx, cancel := context.WithCancel(ctx) - peer, err := NewPeer( ctx, db, @@ -201,6 +206,7 @@ func NewNode( DB: db, ctx: ctx, cancel: cancel, + dhtClose: ddht.Close, } n.subscribeToPeerConnectionEvents() @@ -268,12 +274,21 @@ func (n *Node) subscribeToPeerConnectionEvents() { return } go func() { - for e := range sub.Out() { + for { select { - case n.peerEvent <- e.(event.EvtPeerConnectednessChanged): - default: - <-n.peerEvent - n.peerEvent <- e.(event.EvtPeerConnectednessChanged) + case <-n.ctx.Done(): + sub.Close() + return + case e, ok := <-sub.Out(): + if !ok { + return + } + select { + case n.peerEvent <- e.(event.EvtPeerConnectednessChanged): + default: + <-n.peerEvent + n.peerEvent <- e.(event.EvtPeerConnectednessChanged) + } } } }() @@ -290,12 +305,21 @@ func (n *Node) subscribeToPubSubEvents() { return } go func() { - for e := range sub.Out() { + for { select { - case n.pubSubEvent <- e.(EvtPubSub): - default: - <-n.pubSubEvent - n.pubSubEvent <- e.(EvtPubSub) + case <-n.ctx.Done(): + sub.Close() + return + case e, ok := <-sub.Out(): + if !ok { + return + } + select { + case n.pubSubEvent <- e.(EvtPubSub): + default: + <-n.pubSubEvent + n.pubSubEvent <- e.(EvtPubSub) + } } } }() @@ -312,12 +336,21 @@ func (n *Node) subscribeToPushLogEvents() { return } go func() { - for e := range sub.Out() { + for { select { - case n.pushLogEvent <- e.(EvtReceivedPushLog): - default: - <-n.pushLogEvent - n.pushLogEvent <- e.(EvtReceivedPushLog) + case <-n.ctx.Done(): + sub.Close() + return + case e, ok := <-sub.Out(): + if !ok { + return + } + select { + case n.pushLogEvent <- e.(EvtReceivedPushLog): + default: + <-n.pushLogEvent + n.pushLogEvent <- e.(EvtReceivedPushLog) + } } } }() @@ -428,5 +461,8 @@ func (n Node) Close() { if n.Peer != nil { n.Peer.Close() } + if n.dhtClose != nil { + n.dhtClose() + } n.DB.Close() } From bc6089151ce5bd90370179b5b17246e3a53fc79d Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 31 May 2024 00:38:49 -0400 Subject: [PATCH 06/15] fix lint --- internal/db/merge.go | 3 ++- net/node.go | 32 ++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/internal/db/merge.go b/internal/db/merge.go index 9fb7374c16..f35a3269dd 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -19,6 +19,8 @@ import ( cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/sourcenetwork/corelog" + "github.com/sourcenetwork/immutable" + "github.com/sourcenetwork/defradb/client" "github.com/sourcenetwork/defradb/datastore" "github.com/sourcenetwork/defradb/errors" @@ -28,7 +30,6 @@ import ( "github.com/sourcenetwork/defradb/internal/db/base" "github.com/sourcenetwork/defradb/internal/merkle/clock" merklecrdt "github.com/sourcenetwork/defradb/internal/merkle/crdt" - "github.com/sourcenetwork/immutable" ) func (db *db) handleMerges(ctx context.Context, merges events.Subscription[events.DAGMerge]) { diff --git a/net/node.go b/net/node.go index 7b403720e6..e8ac0f1348 100644 --- a/net/node.go +++ b/net/node.go @@ -277,7 +277,14 @@ func (n *Node) subscribeToPeerConnectionEvents() { for { select { case <-n.ctx.Done(): - sub.Close() + err := sub.Close() + if err != nil { + log.ErrorContextE( + n.ctx, + "Failed to close peer connectedness changed event subscription", + err, + ) + } return case e, ok := <-sub.Out(): if !ok { @@ -308,7 +315,14 @@ func (n *Node) subscribeToPubSubEvents() { for { select { case <-n.ctx.Done(): - sub.Close() + err := sub.Close() + if err != nil { + log.ErrorContextE( + n.ctx, + "Failed to close pubsub event subscription", + err, + ) + } return case e, ok := <-sub.Out(): if !ok { @@ -339,7 +353,14 @@ func (n *Node) subscribeToPushLogEvents() { for { select { case <-n.ctx.Done(): - sub.Close() + err := sub.Close() + if err != nil { + log.ErrorContextE( + n.ctx, + "Failed to close push log event subscription", + err, + ) + } return case e, ok := <-sub.Out(): if !ok { @@ -462,7 +483,10 @@ func (n Node) Close() { n.Peer.Close() } if n.dhtClose != nil { - n.dhtClose() + err := n.dhtClose() + if err != nil { + log.ErrorContextE(n.ctx, "Failed to close DHT", err) + } } n.DB.Close() } From c009869e29e0d1b47e6fdd739210feb7853c279b Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 31 May 2024 13:52:33 -0400 Subject: [PATCH 07/15] apply feedback --- internal/db/merge.go | 278 ++++++++++++++++++++++++++----------------- 1 file changed, 167 insertions(+), 111 deletions(-) diff --git a/internal/db/merge.go b/internal/db/merge.go index f35a3269dd..820be8b72d 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -23,6 +23,7 @@ import ( "github.com/sourcenetwork/defradb/client" "github.com/sourcenetwork/defradb/datastore" + "github.com/sourcenetwork/defradb/datastore/badger/v4" "github.com/sourcenetwork/defradb/errors" "github.com/sourcenetwork/defradb/events" "github.com/sourcenetwork/defradb/internal/core" @@ -48,7 +49,8 @@ func (db *db) handleMerges(ctx context.Context, merges events.Subscription[event ctx, "Failed to execute merge", err, - corelog.String("cid", merge.Cid.String()), + corelog.String("CID", merge.Cid.String()), + corelog.String("Error", err.Error()), ) } }() @@ -68,87 +70,86 @@ func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error return err } defer txn.Discard(ctx) - mp, err := db.newMergeProcessor(ctx, dagMerge.Cid, dagMerge.SchemaRoot) - if err != nil { - return err - } - mt, err := mp.getHeads(ctx) + + col, err := getCollectionFromRootSchema(ctx, db, dagMerge.SchemaRoot) if err != nil { return err } - err = mp.getComposites(ctx, dagMerge.Cid, mt) + + ls := cidlink.DefaultLinkSystem() + ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) + + docID, err := getDocIDFromBlock(ctx, ls, dagMerge.Cid) if err != nil { return err } - err = mp.merge(ctx) + dsKey := base.MakeDataStoreKeyWithCollectionAndDocID(col.Description(), docID.String()) + + mp, err := db.newMergeProcessor(txn, ls, col, dsKey) if err != nil { return err } - err = mp.syncIndexedDocs(ctx) + + mt, err := getHeadsAsMergeTarget(ctx, txn, dsKey) if err != nil { return err } - return txn.Commit(ctx) -} - -type mergeProcessor struct { - ctx context.Context - txn datastore.Txn - ls linking.LinkSystem - docID client.DocID - mCRDTs map[uint32]merklecrdt.MerkleCRDT - col *collection - schemaVersionKey core.CollectionSchemaVersionKey - dsKey core.DataStoreKey - composites *list.List -} -func (db *db) newMergeProcessor(ctx context.Context, cid cid.Cid, rootSchema string) (*mergeProcessor, error) { - txn, ok := TryGetContextTxn(ctx) - if !ok { - return nil, ErrNoTransactionInContext - } - - ls := cidlink.DefaultLinkSystem() - ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) - nd, err := ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: cid}, coreblock.SchemaPrototype) + err = mp.loadComposites(ctx, dagMerge.Cid, mt) if err != nil { - return nil, err + return err } - block, err := coreblock.GetFromNode(nd) - if err != nil { - return nil, err + for retry := 0; retry < db.MaxTxnRetries(); retry++ { + err := mp.mergeComposites(ctx) + if err != nil { + return err + } + err = syncIndexedDoc(ctx, docID, col) + if err != nil { + return err + } + err = txn.Commit(ctx) + if err != nil { + if errors.Is(err, badger.ErrTxnConflict) { + txn, err = db.NewTxn(ctx, false) + if err != nil { + return err + } + ctx = SetContextTxn(ctx, txn) + mp.txn = txn + mp.ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) + continue + } + return err + } + break } - cols, err := db.getCollections( - ctx, - client.CollectionFetchOptions{ - SchemaRoot: immutable.Some(rootSchema), - }, - ) - if err != nil { - return nil, err - } + return nil +} - col := cols[0].(*collection) - docID, err := client.NewDocIDFromString(string(block.Delta.GetDocID())) - if err != nil { - return nil, err - } +type mergeProcessor struct { + txn datastore.Txn + ls linking.LinkSystem + mCRDTs map[string]merklecrdt.MerkleCRDT + col *collection + dsKey core.DataStoreKey + composites *list.List +} +func (db *db) newMergeProcessor( + txn datastore.Txn, + ls linking.LinkSystem, + col *collection, + dsKey core.DataStoreKey, +) (*mergeProcessor, error) { return &mergeProcessor{ - ctx: ctx, - txn: txn, - ls: ls, - docID: docID, - mCRDTs: make(map[uint32]merklecrdt.MerkleCRDT), - col: col, - schemaVersionKey: core.CollectionSchemaVersionKey{ - SchemaVersionID: col.Schema().VersionID, - CollectionID: col.ID(), - }, - dsKey: base.MakeDataStoreKeyWithCollectionAndDocID(col.Description(), docID.String()), + txn: txn, + ls: ls, + mCRDTs: make(map[string]merklecrdt.MerkleCRDT), + col: col, + dsKey: dsKey, composites: list.New(), }, nil } @@ -164,9 +165,13 @@ func newMergeTarget() mergeTarget { } } -// getComposites retrieves the composite blocks for the given document until it reaches a -// block that has already been merged or until we reach the genesis block. -func (mp *mergeProcessor) getComposites(ctx context.Context, blockCid cid.Cid, mt mergeTarget) error { +// loadComposites retrieves and stores into the merge processor the composite blocks for the given +// document until it reaches a block that has already been merged or until we reach the genesis block. +func (mp *mergeProcessor) loadComposites( + ctx context.Context, + blockCid cid.Cid, + mt mergeTarget, +) error { nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: blockCid}, coreblock.SchemaPrototype) if err != nil { return err @@ -186,7 +191,7 @@ func (mp *mergeProcessor) getComposites(ctx context.Context, blockCid cid.Cid, m mp.composites.PushFront(block) for _, link := range block.Links { if link.Name == core.HEAD { - err := mp.getComposites(ctx, link.Cid, mt) + err := mp.loadComposites(ctx, link.Cid, mt) if err != nil { return err } @@ -210,43 +215,12 @@ func (mp *mergeProcessor) getComposites(ctx context.Context, blockCid cid.Cid, m newMT.headHeigth = childBlock.Delta.GetPriority() } } - return mp.getComposites(ctx, blockCid, newMT) + return mp.loadComposites(ctx, blockCid, newMT) } return nil } -// getHeads retrieves the heads of the composite DAG for the given document. -func (mp *mergeProcessor) getHeads(ctx context.Context) (mergeTarget, error) { - headset := clock.NewHeadSet( - mp.txn.Headstore(), - mp.dsKey.WithFieldId(core.COMPOSITE_NAMESPACE).ToHeadStoreKey(), - ) - - cids, _, err := headset.List(ctx) - if err != nil { - return mergeTarget{}, err - } - - mt := newMergeTarget() - for _, cid := range cids { - b, err := mp.txn.DAGstore().Get(ctx, cid) - if err != nil { - return mergeTarget{}, err - } - - block, err := coreblock.GetFromBytes(b.RawData()) - if err != nil { - return mergeTarget{}, err - } - - mt.heads[cid] = block - // All heads have the same height so overwriting is ok. - mt.headHeigth = block.Delta.GetPriority() - } - return mt, nil -} - -func (mp *mergeProcessor) merge(ctx context.Context) error { +func (mp *mergeProcessor) mergeComposites(ctx context.Context) error { for e := mp.composites.Front(); e != nil; e = e.Next() { block := e.Value.(*coreblock.Block) link, err := block.GenerateLink() @@ -288,12 +262,12 @@ func (mp *mergeProcessor) processBlock( continue } - b, err := mp.txn.DAGstore().Get(ctx, link.Cid) + nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) if err != nil { return err } - childBlock, err := coreblock.GetFromBytes(b.RawData()) + childBlock, err := coreblock.GetFromNode(nd) if err != nil { return err } @@ -309,13 +283,25 @@ func (mp *mergeProcessor) processBlock( func (mp *mergeProcessor) initCRDTForType( field string, ) (merklecrdt.MerkleCRDT, error) { + mcrdt, exists := mp.mCRDTs[field] + if exists { + return mcrdt, nil + } + + schemaVersionKey := core.CollectionSchemaVersionKey{ + SchemaVersionID: mp.col.Schema().VersionID, + CollectionID: mp.col.ID(), + } + if field == "" { - return merklecrdt.NewMerkleCompositeDAG( + mcrdt = merklecrdt.NewMerkleCompositeDAG( mp.txn, - mp.schemaVersionKey, + schemaVersionKey, mp.dsKey.WithFieldId(core.COMPOSITE_NAMESPACE), "", - ), nil + ) + mp.mCRDTs[field] = mcrdt + return mcrdt, nil } fd, ok := mp.col.Definition().GetFieldByName(field) @@ -324,39 +310,109 @@ func (mp *mergeProcessor) initCRDTForType( return nil, nil } - return merklecrdt.InstanceWithStore( + mcrdt, err := merklecrdt.InstanceWithStore( mp.txn, - mp.schemaVersionKey, + schemaVersionKey, fd.Typ, fd.Kind, mp.dsKey.WithFieldId(fd.ID.String()), field, ) + if err != nil { + return nil, err + } + + mp.mCRDTs[field] = mcrdt + return mcrdt, nil +} + +func getDocIDFromBlock(ctx context.Context, ls linking.LinkSystem, cid cid.Cid) (client.DocID, error) { + nd, err := ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: cid}, coreblock.SchemaPrototype) + if err != nil { + return client.DocID{}, err + } + block, err := coreblock.GetFromNode(nd) + if err != nil { + return client.DocID{}, err + } + return client.NewDocIDFromString(string(block.Delta.GetDocID())) +} + +func getCollectionFromRootSchema(ctx context.Context, db *db, rootSchema string) (*collection, error) { + cols, err := db.getCollections( + ctx, + client.CollectionFetchOptions{ + SchemaRoot: immutable.Some(rootSchema), + }, + ) + if err != nil { + return nil, err + } + if len(cols) == 0 { + return nil, client.NewErrCollectionNotFoundForSchema(rootSchema) + } + // We currently only support one active collection per root schema + // so it is safe to return the first one. + return cols[0].(*collection), nil +} + +// getHeadsAsMergeTarget retrieves the heads of the composite DAG for the given document +// and returns them as a merge target. +func getHeadsAsMergeTarget(ctx context.Context, txn datastore.Txn, dsKey core.DataStoreKey) (mergeTarget, error) { + headset := clock.NewHeadSet( + txn.Headstore(), + dsKey.WithFieldId(core.COMPOSITE_NAMESPACE).ToHeadStoreKey(), + ) + + cids, _, err := headset.List(ctx) + if err != nil { + return mergeTarget{}, err + } + + mt := newMergeTarget() + for _, cid := range cids { + b, err := txn.DAGstore().Get(ctx, cid) + if err != nil { + return mergeTarget{}, err + } + + block, err := coreblock.GetFromBytes(b.RawData()) + if err != nil { + return mergeTarget{}, err + } + + mt.heads[cid] = block + // All heads have the same height so overwriting is ok. + mt.headHeigth = block.Delta.GetPriority() + } + return mt, nil } -func (mp *mergeProcessor) syncIndexedDocs( +func syncIndexedDoc( ctx context.Context, + docID client.DocID, + col *collection, ) error { // remove transaction from old context oldCtx := SetContextTxn(ctx, nil) - oldDoc, err := mp.col.Get(oldCtx, mp.docID, false) + oldDoc, err := col.Get(oldCtx, docID, false) isNewDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) if !isNewDoc && err != nil { return err } - doc, err := mp.col.Get(ctx, mp.docID, false) + doc, err := col.Get(ctx, docID, false) isDeletedDoc := errors.Is(err, client.ErrDocumentNotFoundOrNotAuthorized) if !isDeletedDoc && err != nil { return err } if isDeletedDoc { - return mp.col.deleteIndexedDoc(ctx, oldDoc) + return col.deleteIndexedDoc(ctx, oldDoc) } else if isNewDoc { - return mp.col.indexNewDoc(ctx, doc) + return col.indexNewDoc(ctx, doc) } else { - return mp.col.updateDocIndex(ctx, oldDoc, doc) + return col.updateDocIndex(ctx, oldDoc, doc) } } From 7a31ca9792cf9ac3eaa51a639aa0e6d9664ab757 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Mon, 3 Jun 2024 09:29:52 -0400 Subject: [PATCH 08/15] fix CRDTs using old transaction --- internal/db/merge.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/db/merge.go b/internal/db/merge.go index 820be8b72d..1720960ccd 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -119,6 +119,8 @@ func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error ctx = SetContextTxn(ctx, txn) mp.txn = txn mp.ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) + // Reset the CRDTs to avoid reusing the old transaction. + mp.mCRDTs = make(map[string]merklecrdt.MerkleCRDT) continue } return err From dcd77af55543fb83e7a2a46f578dafe8fea34bf3 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Mon, 3 Jun 2024 09:45:39 -0400 Subject: [PATCH 09/15] apply feedback --- internal/db/merge.go | 8 ++++---- net/process.go | 42 ++++-------------------------------------- 2 files changed, 8 insertions(+), 42 deletions(-) diff --git a/internal/db/merge.go b/internal/db/merge.go index 1720960ccd..69d001784f 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -158,7 +158,7 @@ func (db *db) newMergeProcessor( type mergeTarget struct { heads map[cid.Cid]*coreblock.Block - headHeigth uint64 + headHeight uint64 } func newMergeTarget() mergeTarget { @@ -189,7 +189,7 @@ func (mp *mergeProcessor) loadComposites( return nil } - if block.Delta.GetPriority() >= mt.headHeigth { + if block.Delta.GetPriority() >= mt.headHeight { mp.composites.PushFront(block) for _, link := range block.Links { if link.Name == core.HEAD { @@ -214,7 +214,7 @@ func (mp *mergeProcessor) loadComposites( } newMT.heads[link.Cid] = childBlock - newMT.headHeigth = childBlock.Delta.GetPriority() + newMT.headHeight = childBlock.Delta.GetPriority() } } return mp.loadComposites(ctx, blockCid, newMT) @@ -385,7 +385,7 @@ func getHeadsAsMergeTarget(ctx context.Context, txn datastore.Txn, dsKey core.Da mt.heads[cid] = block // All heads have the same height so overwriting is ok. - mt.headHeigth = block.Delta.GetPriority() + mt.headHeight = block.Delta.GetPriority() } return mt, nil } diff --git a/net/process.go b/net/process.go index bc99216022..b4f85134fb 100644 --- a/net/process.go +++ b/net/process.go @@ -34,7 +34,7 @@ type blockProcessor struct { *Peer wg *sync.WaitGroup bsSession *blockservice.Session - queuedChildren *cidSafeSet + queuedChildren *sync.Map } func newBlockProcessor( @@ -45,7 +45,7 @@ func newBlockProcessor( Peer: p, wg: &sync.WaitGroup{}, bsSession: blockservice.NewSession(ctx, p.bserv), - queuedChildren: newCidSafeSet(), + queuedChildren: &sync.Map{}, } } @@ -93,7 +93,7 @@ func (bp *blockProcessor) handleChildBlocks( if exists { continue } - if bp.queuedChildren.Visit(link.Cid) { + if _, loaded := bp.queuedChildren.LoadOrStore(link.Cid, struct{}{}); !loaded { links = append(links, link.Cid) } } @@ -119,40 +119,6 @@ func (bp *blockProcessor) handleChildBlocks( } for _, link := range links { - bp.queuedChildren.Remove(link) + bp.queuedChildren.Delete(link) } } - -type cidSafeSet struct { - set map[cid.Cid]struct{} - mux sync.Mutex -} - -func newCidSafeSet() *cidSafeSet { - return &cidSafeSet{ - set: make(map[cid.Cid]struct{}), - } -} - -// Visit checks if we can visit this node, or -// if its already being visited -func (s *cidSafeSet) Visit(c cid.Cid) bool { - var b bool - s.mux.Lock() - { - if _, ok := s.set[c]; !ok { - s.set[c] = struct{}{} - b = true - } - } - s.mux.Unlock() - return b -} - -func (s *cidSafeSet) Remove(c cid.Cid) { - s.mux.Lock() - { - delete(s.set, c) - } - s.mux.Unlock() -} From d779a7fa337bb2435ec4888760c69b053c7a025f Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 6 Jun 2024 17:39:24 -0400 Subject: [PATCH 10/15] apply feedback --- events/dag_sync.go | 6 ++++-- internal/db/db_test.go | 6 ++++++ internal/db/merge.go | 41 +++++++++++++++++++++++------------------ net/server.go | 10 +++++----- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/events/dag_sync.go b/events/dag_sync.go index 0af06f49ea..4ab568b7d0 100644 --- a/events/dag_sync.go +++ b/events/dag_sync.go @@ -11,6 +11,8 @@ package events import ( + "sync" + "github.com/ipfs/go-cid" "github.com/sourcenetwork/immutable" @@ -25,7 +27,7 @@ type DAGMerge struct { Cid cid.Cid // SchemaRoot is the root identifier of the schema that defined the shape of the document that was updated. SchemaRoot string - // MergeCompleteChan is a channel that will be closed when the merge is complete + // Wg is a wait group that can be used to synchronize the merge, // allowing the caller to optionnaly block until the merge is complete. - MergeCompleteChan chan struct{} + Wg *sync.WaitGroup } diff --git a/internal/db/db_test.go b/internal/db/db_test.go index 6f5a03e809..3aca97e480 100644 --- a/internal/db/db_test.go +++ b/internal/db/db_test.go @@ -18,6 +18,7 @@ import ( "github.com/sourcenetwork/defradb/acp" badgerds "github.com/sourcenetwork/defradb/datastore/badger/v4" + "github.com/sourcenetwork/defradb/datastore/memory" ) func newMemoryDB(ctx context.Context) (*db, error) { @@ -29,6 +30,11 @@ func newMemoryDB(ctx context.Context) (*db, error) { return newDB(ctx, rootstore, acp.NoACP, nil) } +func newDefraMemoryDB(ctx context.Context) (*db, error) { + rootstore := memory.NewDatastore(ctx) + return newDB(ctx, rootstore, acp.NoACP) +} + func TestNewDB(t *testing.T) { ctx := context.Background() opts := badgerds.Options{Options: badger.DefaultOptions("").WithInMemory(true)} diff --git a/internal/db/merge.go b/internal/db/merge.go index 69d001784f..b78e5b165e 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -61,8 +61,8 @@ func (db *db) handleMerges(ctx context.Context, merges events.Subscription[event func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error { defer func() { // Notify the caller that the merge is complete. - if dagMerge.MergeCompleteChan != nil { - close(dagMerge.MergeCompleteChan) + if dagMerge.Wg != nil { + dagMerge.Wg.Done() } }() ctx, txn, err := ensureContextTxn(ctx, db, false) @@ -174,6 +174,11 @@ func (mp *mergeProcessor) loadComposites( blockCid cid.Cid, mt mergeTarget, ) error { + if _, ok := mt.heads[blockCid]; ok { + // We've already processed this block. + return nil + } + nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: blockCid}, coreblock.SchemaPrototype) if err != nil { return err @@ -184,11 +189,9 @@ func (mp *mergeProcessor) loadComposites( return err } - if _, ok := mt.heads[blockCid]; ok { - // We've already processed this block. - return nil - } - + // In the simplest case, the new block or its children will link to the current head/heads (merge target) + // of the composite DAG. However, the new block and its children might have branched off from an older block. + // In this case, we also need to walk back the merge target's DAG until we reach a common block. if block.Delta.GetPriority() >= mt.headHeight { mp.composites.PushFront(block) for _, link := range block.Links { @@ -203,18 +206,20 @@ func (mp *mergeProcessor) loadComposites( newMT := newMergeTarget() for _, b := range mt.heads { for _, link := range b.Links { - nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) - if err != nil { - return err - } - - childBlock, err := coreblock.GetFromNode(nd) - if err != nil { - return err + if link.Name == core.HEAD { + nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) + if err != nil { + return err + } + + childBlock, err := coreblock.GetFromNode(nd) + if err != nil { + return err + } + + newMT.heads[link.Cid] = childBlock + newMT.headHeight = childBlock.Delta.GetPriority() } - - newMT.heads[link.Cid] = childBlock - newMT.headHeight = childBlock.Delta.GetPriority() } } return mp.loadComposites(ctx, blockCid, newMT) diff --git a/net/server.go b/net/server.go index a44acfa20a..7c673c261d 100644 --- a/net/server.go +++ b/net/server.go @@ -246,13 +246,13 @@ func (s *server) PushLog(ctx context.Context, req *pb.PushLogRequest) (*pb.PushL } bp.wg.Wait() if s.peer.db.Events().DAGMerges.HasValue() { - mergeCompleteChan := make(chan struct{}) + wg := &sync.WaitGroup{} s.peer.db.Events().DAGMerges.Value().Publish(events.DAGMerge{ - Cid: cid, - SchemaRoot: string(req.Body.SchemaRoot), - MergeCompleteChan: mergeCompleteChan, + Cid: cid, + SchemaRoot: string(req.Body.SchemaRoot), + Wg: wg, }) - <-mergeCompleteChan + wg.Wait() } // Once processed, subscribe to the DocID topic on the pubsub network unless we already From d2eebb7e5eb683cc2e423960468fc35a0920b1af Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 6 Jun 2024 20:02:42 -0400 Subject: [PATCH 11/15] add tests for merge --- internal/db/db_test.go | 2 +- internal/db/merge.go | 14 +- internal/db/merge_test.go | 313 ++++++++++++++++++++++++++++++++++++++ net/node.go | 13 +- 4 files changed, 327 insertions(+), 15 deletions(-) create mode 100644 internal/db/merge_test.go diff --git a/internal/db/db_test.go b/internal/db/db_test.go index 3aca97e480..387be0154d 100644 --- a/internal/db/db_test.go +++ b/internal/db/db_test.go @@ -32,7 +32,7 @@ func newMemoryDB(ctx context.Context) (*db, error) { func newDefraMemoryDB(ctx context.Context) (*db, error) { rootstore := memory.NewDatastore(ctx) - return newDB(ctx, rootstore, acp.NoACP) + return newDB(ctx, rootstore, acp.NoACP, nil) } func TestNewDB(t *testing.T) { diff --git a/internal/db/merge.go b/internal/db/merge.go index b78e5b165e..323f7ae92c 100644 --- a/internal/db/merge.go +++ b/internal/db/merge.go @@ -118,7 +118,7 @@ func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error } ctx = SetContextTxn(ctx, txn) mp.txn = txn - mp.ls.SetReadStorage(txn.DAGstore().AsIPLDStorage()) + mp.lsys.SetReadStorage(txn.DAGstore().AsIPLDStorage()) // Reset the CRDTs to avoid reusing the old transaction. mp.mCRDTs = make(map[string]merklecrdt.MerkleCRDT) continue @@ -133,7 +133,7 @@ func (db *db) executeMerge(ctx context.Context, dagMerge events.DAGMerge) error type mergeProcessor struct { txn datastore.Txn - ls linking.LinkSystem + lsys linking.LinkSystem mCRDTs map[string]merklecrdt.MerkleCRDT col *collection dsKey core.DataStoreKey @@ -142,13 +142,13 @@ type mergeProcessor struct { func (db *db) newMergeProcessor( txn datastore.Txn, - ls linking.LinkSystem, + lsys linking.LinkSystem, col *collection, dsKey core.DataStoreKey, ) (*mergeProcessor, error) { return &mergeProcessor{ txn: txn, - ls: ls, + lsys: lsys, mCRDTs: make(map[string]merklecrdt.MerkleCRDT), col: col, dsKey: dsKey, @@ -179,7 +179,7 @@ func (mp *mergeProcessor) loadComposites( return nil } - nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: blockCid}, coreblock.SchemaPrototype) + nd, err := mp.lsys.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: blockCid}, coreblock.SchemaPrototype) if err != nil { return err } @@ -207,7 +207,7 @@ func (mp *mergeProcessor) loadComposites( for _, b := range mt.heads { for _, link := range b.Links { if link.Name == core.HEAD { - nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) + nd, err := mp.lsys.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) if err != nil { return err } @@ -269,7 +269,7 @@ func (mp *mergeProcessor) processBlock( continue } - nd, err := mp.ls.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) + nd, err := mp.lsys.Load(linking.LinkContext{Ctx: ctx}, link.Link, coreblock.SchemaPrototype) if err != nil { return err } diff --git a/internal/db/merge_test.go b/internal/db/merge_test.go new file mode 100644 index 0000000000..0fb5b8704b --- /dev/null +++ b/internal/db/merge_test.go @@ -0,0 +1,313 @@ +// Copyright 2024 Democratized Data Foundation +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package db + +import ( + "context" + "testing" + + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/stretchr/testify/require" + + "github.com/sourcenetwork/defradb/client" + "github.com/sourcenetwork/defradb/events" + "github.com/sourcenetwork/defradb/internal/core" + coreblock "github.com/sourcenetwork/defradb/internal/core/block" + "github.com/sourcenetwork/defradb/internal/db/base" + "github.com/sourcenetwork/defradb/internal/merkle/clock" +) + +const userSchema = ` +type User { + name: String + age: Int + email: String + points: Int +} +` + +func TestMerge_NoError(t *testing.T) { + // Test that a merge can be performed up to the provided CID. + ctx := context.Background() + + // Setup the "local" database + localDB, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = localDB.AddSchema(ctx, userSchema) + require.NoError(t, err) + localCol, err := localDB.GetCollectionByName(ctx, "User") + require.NoError(t, err) + docMap := map[string]any{ + "name": "Alice", + "age": 30, + } + doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + require.NoError(t, err) + + err = localCol.Create(ctx, doc) + require.NoError(t, err) + + // Setup the "remote" database + remoteDB, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = remoteDB.AddSchema(ctx, userSchema) + require.NoError(t, err) + remoteCol, err := remoteDB.GetCollectionByName(ctx, "User") + require.NoError(t, err) + doc, err = client.NewDocFromMap(docMap, localCol.Definition()) + require.NoError(t, err) + err = remoteCol.Create(ctx, doc) + require.NoError(t, err) + + // Add a few changes to the remote node + err = doc.Set("points", 100) + require.NoError(t, err) + err = remoteCol.Update(ctx, doc) + require.NoError(t, err) + + // Sync the remote blocks to the local node + err = syncAndMerge(ctx, remoteDB, localDB, remoteCol, localCol, doc.ID().String()) + require.NoError(t, err) + + // verify the local node has the same data as the remote node + localDoc, err := localCol.Get(ctx, doc.ID(), false) + require.NoError(t, err) + localDocString, err := localDoc.String() + require.NoError(t, err) + remoteDoc, err := remoteCol.Get(ctx, doc.ID(), false) + require.NoError(t, err) + remoteDocString, err := remoteDoc.String() + require.NoError(t, err) + require.Equal(t, remoteDocString, localDocString) +} + +func TestMerge_DelayedSync_NoError(t *testing.T) { + // Test that a merge can be performed up to the provided CID. + ctx := context.Background() + + // Setup the "local" database + localDB, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = localDB.AddSchema(ctx, userSchema) + require.NoError(t, err) + localCol, err := localDB.GetCollectionByName(ctx, "User") + require.NoError(t, err) + docMap := map[string]any{ + "name": "Alice", + "age": 30, + } + doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + require.NoError(t, err) + + err = localCol.Create(ctx, doc) + require.NoError(t, err) + + // Setup the "remote" database + remoteDB, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = remoteDB.AddSchema(ctx, userSchema) + require.NoError(t, err) + remoteCol, err := remoteDB.GetCollectionByName(ctx, "User") + require.NoError(t, err) + doc, err = client.NewDocFromMap(docMap, localCol.Definition()) + require.NoError(t, err) + err = remoteCol.Create(ctx, doc) + require.NoError(t, err) + + // Add a few changes to the remote node + err = doc.Set("points", 100) + require.NoError(t, err) + err = remoteCol.Update(ctx, doc) + require.NoError(t, err) + + err = doc.Set("age", 31) + require.NoError(t, err) + err = remoteCol.Update(ctx, doc) + require.NoError(t, err) + + err = doc.Set("email", "alice@yahoo.com") + require.NoError(t, err) + err = remoteCol.Update(ctx, doc) + require.NoError(t, err) + + // Sync the remote blocks to the local node + err = syncAndMerge(ctx, remoteDB, localDB, remoteCol, localCol, doc.ID().String()) + require.NoError(t, err) + + // verify the local node has the same data as the remote node + localDoc, err := localCol.Get(ctx, doc.ID(), false) + require.NoError(t, err) + localDocString, err := localDoc.String() + require.NoError(t, err) + remoteDoc, err := remoteCol.Get(ctx, doc.ID(), false) + require.NoError(t, err) + remoteDocString, err := remoteDoc.String() + require.NoError(t, err) + require.Equal(t, remoteDocString, localDocString) +} + +func TestMerge_DelayedSyncTwoBranches_NoError(t *testing.T) { + // Test that a merge can be performed up to the provided CID. + ctx := context.Background() + + // Setup the "local" database + localDB, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = localDB.AddSchema(ctx, userSchema) + require.NoError(t, err) + localCol, err := localDB.GetCollectionByName(ctx, "User") + require.NoError(t, err) + docMap := map[string]interface{}{ + "name": "Alice", + "age": 30, + } + doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + require.NoError(t, err) + + err = localCol.Create(ctx, doc) + require.NoError(t, err) + + // Setup the "remote" database + remoteDB1, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = remoteDB1.AddSchema(ctx, userSchema) + require.NoError(t, err) + remoteCol1, err := remoteDB1.GetCollectionByName(ctx, "User") + require.NoError(t, err) + doc, err = client.NewDocFromMap(docMap, remoteCol1.Definition()) + require.NoError(t, err) + err = remoteCol1.Create(ctx, doc) + require.NoError(t, err) + + // Setup the second "remote" database + remoteDB2, err := newDefraMemoryDB(ctx) + require.NoError(t, err) + _, err = remoteDB2.AddSchema(ctx, userSchema) + require.NoError(t, err) + remoteCol2, err := remoteDB2.GetCollectionByName(ctx, "User") + require.NoError(t, err) + doc2, err := client.NewDocFromMap(docMap, remoteCol2.Definition()) + require.NoError(t, err) + err = remoteCol2.Create(ctx, doc2) + require.NoError(t, err) + + // Add a few changes to the remote nodes creating two branches + err = doc.Set("points", 200) + require.NoError(t, err) + err = remoteCol1.Update(ctx, doc) + require.NoError(t, err) + + err = doc2.Set("points", 100) + require.NoError(t, err) + err = remoteCol2.Update(ctx, doc2) + require.NoError(t, err) + + err = doc.Set("age", 31) + require.NoError(t, err) + err = remoteCol1.Update(ctx, doc) + require.NoError(t, err) + + err = doc2.Set("age", 32) + require.NoError(t, err) + err = remoteCol2.Update(ctx, doc2) + require.NoError(t, err) + + err = doc.Set("email", "alice@yahoo.com") + require.NoError(t, err) + err = remoteCol1.Update(ctx, doc) + require.NoError(t, err) + + err = doc2.Set("email", "alice-in-wonderland@yahoo.com") + require.NoError(t, err) + err = remoteCol2.Update(ctx, doc2) + require.NoError(t, err) + + // Sync the remote blocks to the local node + err = syncAndMerge(ctx, remoteDB2, remoteDB1, remoteCol2, remoteCol1, doc.ID().String()) + require.NoError(t, err) + err = syncAndMerge(ctx, remoteDB1, localDB, remoteCol1, localCol, doc.ID().String()) + require.NoError(t, err) + + // verify the local node has the same data as the remote node + localDoc, err := localCol.Get(ctx, doc.ID(), false) + require.NoError(t, err) + localDocString, err := localDoc.String() + require.NoError(t, err) + remoteDoc1, err := remoteCol1.Get(ctx, doc.ID(), false) + require.NoError(t, err) + remoteDocString1, err := remoteDoc1.String() + require.NoError(t, err) + require.Equal(t, remoteDocString1, localDocString) +} + +func syncAndMerge(ctx context.Context, from, to *db, fromCol, toCol client.Collection, docID string) error { + dsKey := base.MakeDataStoreKeyWithCollectionAndDocID(fromCol.Description(), docID) + headset := clock.NewHeadSet( + from.multistore.Headstore(), + dsKey.WithFieldId(core.COMPOSITE_NAMESPACE).ToHeadStoreKey(), + ) + + cids, _, err := headset.List(ctx) + if err != nil { + return err + } + + for _, cid := range cids { + blockBytes, err := from.multistore.DAGstore().AsIPLDStorage().Get(ctx, cid.KeyString()) + if err != nil { + return err + } + block, err := coreblock.GetFromBytes(blockBytes) + if err != nil { + return err + } + err = syncDAG(ctx, from, to, block) + if err != nil { + return err + } + err = to.executeMerge(ctx, events.DAGMerge{ + Cid: cid, + SchemaRoot: toCol.SchemaRoot(), + }) + if err != nil { + return err + } + } + return nil +} + +func syncDAG(ctx context.Context, from, to *db, block *coreblock.Block) error { + lsys := cidlink.DefaultLinkSystem() + lsys.SetWriteStorage(to.multistore.DAGstore().AsIPLDStorage()) + _, err := lsys.Store(linking.LinkContext{Ctx: ctx}, coreblock.GetLinkPrototype(), block.GenerateNode()) + if err != nil { + return err + } + + for _, link := range block.Links { + lsys := cidlink.DefaultLinkSystem() + lsys.SetReadStorage(from.multistore.DAGstore().AsIPLDStorage()) + nd, err := lsys.Load(linking.LinkContext{Ctx: ctx}, link, coreblock.SchemaPrototype) + if err != nil { + return err + } + block, err := coreblock.GetFromNode(nd) + if err != nil { + return err + } + err = syncDAG(ctx, from, to, block) + if err != nil { + return err + } + } + return nil +} diff --git a/net/node.go b/net/node.go index e8ac0f1348..1fa8da6316 100644 --- a/net/node.go +++ b/net/node.go @@ -80,7 +80,7 @@ func NewNode( ctx context.Context, db client.DB, opts ...NodeOpt, -) (*Node, error) { +) (node *Node, err error) { options := DefaultOptions() for _, opt := range opts { opt(options) @@ -103,10 +103,14 @@ func NewNode( fin := finalizer.NewFinalizer() ctx, cancel := context.WithCancel(ctx) + defer func() { + if node == nil { + cancel() + } + }() peerstore, err := pstoreds.NewPeerstore(ctx, db.Peerstore(), pstoreds.DefaultOpts()) if err != nil { - cancel() return nil, fin.Cleanup(err) } fin.Add(peerstore) @@ -115,7 +119,6 @@ func NewNode( // generate an ephemeral private key key, err := crypto.GenerateEd25519() if err != nil { - cancel() return nil, fin.Cleanup(err) } options.PrivateKey = key @@ -124,7 +127,6 @@ func NewNode( // unmarshal the private key bytes privateKey, err := libp2pCrypto.UnmarshalEd25519PrivateKey(options.PrivateKey) if err != nil { - cancel() return nil, fin.Cleanup(err) } @@ -155,7 +157,6 @@ func NewNode( h, err := libp2p.New(libp2pOpts...) if err != nil { - cancel() return nil, fin.Cleanup(err) } log.InfoContext( @@ -174,7 +175,6 @@ func NewNode( pubsub.WithFloodPublish(true), ) if err != nil { - cancel() return nil, fin.Cleanup(err) } } @@ -188,7 +188,6 @@ func NewNode( options.GRPCDialOptions, ) if err != nil { - cancel() return nil, fin.Cleanup(err) } From 9a817eb4b1b996b1b1c66695badfdfb2ff7effb6 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Thu, 6 Jun 2024 21:16:19 -0400 Subject: [PATCH 12/15] fix missing wait group increment --- net/server.go | 1 + 1 file changed, 1 insertion(+) diff --git a/net/server.go b/net/server.go index 7c673c261d..94d791854f 100644 --- a/net/server.go +++ b/net/server.go @@ -247,6 +247,7 @@ func (s *server) PushLog(ctx context.Context, req *pb.PushLogRequest) (*pb.PushL bp.wg.Wait() if s.peer.db.Events().DAGMerges.HasValue() { wg := &sync.WaitGroup{} + wg.Add(1) s.peer.db.Events().DAGMerges.Value().Publish(events.DAGMerge{ Cid: cid, SchemaRoot: string(req.Body.SchemaRoot), From efdc1ed99b79a6d6c96e268dc2391aff22b8bb8c Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 7 Jun 2024 13:18:05 -0400 Subject: [PATCH 13/15] make test more explicitly about the dag --- internal/db/merge_test.go | 376 +++++++++++++++++--------------------- 1 file changed, 171 insertions(+), 205 deletions(-) diff --git a/internal/db/merge_test.go b/internal/db/merge_test.go index 0fb5b8704b..bacaddd40f 100644 --- a/internal/db/merge_test.go +++ b/internal/db/merge_test.go @@ -14,6 +14,8 @@ import ( "context" "testing" + "github.com/fxamacker/cbor/v2" + "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/stretchr/testify/require" @@ -22,292 +24,256 @@ import ( "github.com/sourcenetwork/defradb/events" "github.com/sourcenetwork/defradb/internal/core" coreblock "github.com/sourcenetwork/defradb/internal/core/block" - "github.com/sourcenetwork/defradb/internal/db/base" - "github.com/sourcenetwork/defradb/internal/merkle/clock" + "github.com/sourcenetwork/defradb/internal/core/crdt" ) const userSchema = ` type User { name: String age: Int - email: String - points: Int } ` -func TestMerge_NoError(t *testing.T) { - // Test that a merge can be performed up to the provided CID. +func TestMerge_SingleBranch_NoError(t *testing.T) { ctx := context.Background() - // Setup the "local" database - localDB, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = localDB.AddSchema(ctx, userSchema) - require.NoError(t, err) - localCol, err := localDB.GetCollectionByName(ctx, "User") - require.NoError(t, err) - docMap := map[string]any{ - "name": "Alice", - "age": 30, - } - doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + db, err := newDefraMemoryDB(ctx) require.NoError(t, err) - err = localCol.Create(ctx, doc) + _, err = db.AddSchema(ctx, userSchema) require.NoError(t, err) - // Setup the "remote" database - remoteDB, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = remoteDB.AddSchema(ctx, userSchema) - require.NoError(t, err) - remoteCol, err := remoteDB.GetCollectionByName(ctx, "User") - require.NoError(t, err) - doc, err = client.NewDocFromMap(docMap, localCol.Definition()) - require.NoError(t, err) - err = remoteCol.Create(ctx, doc) + col, err := db.GetCollectionByName(ctx, "User") require.NoError(t, err) - // Add a few changes to the remote node - err = doc.Set("points", 100) + lsys := cidlink.DefaultLinkSystem() + lsys.SetWriteStorage(db.multistore.DAGstore().AsIPLDStorage()) + + initialDocState := map[string]any{ + "name": "John", + } + d, docID := newDagBuilder(col, initialDocState) + compInfo, err := d.generateCompositeUpdate(&lsys, initialDocState, compositeInfo{}) require.NoError(t, err) - err = remoteCol.Update(ctx, doc) + compInfo2, err := d.generateCompositeUpdate(&lsys, map[string]any{"name": "Johny"}, compInfo) require.NoError(t, err) - // Sync the remote blocks to the local node - err = syncAndMerge(ctx, remoteDB, localDB, remoteCol, localCol, doc.ID().String()) + err = db.executeMerge(ctx, events.DAGMerge{ + Cid: compInfo2.link.Cid, + SchemaRoot: col.SchemaRoot(), + }) require.NoError(t, err) - // verify the local node has the same data as the remote node - localDoc, err := localCol.Get(ctx, doc.ID(), false) - require.NoError(t, err) - localDocString, err := localDoc.String() + // Verify the document was created with the expected values + doc, err := col.Get(ctx, docID, false) require.NoError(t, err) - remoteDoc, err := remoteCol.Get(ctx, doc.ID(), false) + docMap, err := doc.ToMap() require.NoError(t, err) - remoteDocString, err := remoteDoc.String() - require.NoError(t, err) - require.Equal(t, remoteDocString, localDocString) + + expectedDocMap := map[string]any{ + "_docID": docID.String(), + "name": "Johny", + } + + require.Equal(t, expectedDocMap, docMap) } -func TestMerge_DelayedSync_NoError(t *testing.T) { - // Test that a merge can be performed up to the provided CID. +func TestMerge_DualBranch_NoError(t *testing.T) { ctx := context.Background() - // Setup the "local" database - localDB, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = localDB.AddSchema(ctx, userSchema) - require.NoError(t, err) - localCol, err := localDB.GetCollectionByName(ctx, "User") - require.NoError(t, err) - docMap := map[string]any{ - "name": "Alice", - "age": 30, - } - doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + db, err := newDefraMemoryDB(ctx) require.NoError(t, err) - err = localCol.Create(ctx, doc) + _, err = db.AddSchema(ctx, userSchema) require.NoError(t, err) - // Setup the "remote" database - remoteDB, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = remoteDB.AddSchema(ctx, userSchema) - require.NoError(t, err) - remoteCol, err := remoteDB.GetCollectionByName(ctx, "User") - require.NoError(t, err) - doc, err = client.NewDocFromMap(docMap, localCol.Definition()) - require.NoError(t, err) - err = remoteCol.Create(ctx, doc) + col, err := db.GetCollectionByName(ctx, "User") require.NoError(t, err) - // Add a few changes to the remote node - err = doc.Set("points", 100) - require.NoError(t, err) - err = remoteCol.Update(ctx, doc) - require.NoError(t, err) + lsys := cidlink.DefaultLinkSystem() + lsys.SetWriteStorage(db.multistore.DAGstore().AsIPLDStorage()) - err = doc.Set("age", 31) + initialDocState := map[string]any{ + "name": "John", + } + d, docID := newDagBuilder(col, initialDocState) + compInfo, err := d.generateCompositeUpdate(&lsys, initialDocState, compositeInfo{}) require.NoError(t, err) - err = remoteCol.Update(ctx, doc) + compInfo2, err := d.generateCompositeUpdate(&lsys, map[string]any{"name": "Johny"}, compInfo) require.NoError(t, err) - err = doc.Set("email", "alice@yahoo.com") - require.NoError(t, err) - err = remoteCol.Update(ctx, doc) + err = db.executeMerge(ctx, events.DAGMerge{ + Cid: compInfo2.link.Cid, + SchemaRoot: col.SchemaRoot(), + }) require.NoError(t, err) - // Sync the remote blocks to the local node - err = syncAndMerge(ctx, remoteDB, localDB, remoteCol, localCol, doc.ID().String()) + compInfo3, err := d.generateCompositeUpdate(&lsys, map[string]any{"age": 30}, compInfo) require.NoError(t, err) - // verify the local node has the same data as the remote node - localDoc, err := localCol.Get(ctx, doc.ID(), false) + err = db.executeMerge(ctx, events.DAGMerge{ + Cid: compInfo3.link.Cid, + SchemaRoot: col.SchemaRoot(), + }) require.NoError(t, err) - localDocString, err := localDoc.String() - require.NoError(t, err) - remoteDoc, err := remoteCol.Get(ctx, doc.ID(), false) + + // Verify the document was created with the expected values + doc, err := col.Get(ctx, docID, false) require.NoError(t, err) - remoteDocString, err := remoteDoc.String() + docMap, err := doc.ToMap() require.NoError(t, err) - require.Equal(t, remoteDocString, localDocString) + + expectedDocMap := map[string]any{ + "_docID": docID.String(), + "age": int64(30), + "name": "Johny", + } + + require.Equal(t, expectedDocMap, docMap) } -func TestMerge_DelayedSyncTwoBranches_NoError(t *testing.T) { - // Test that a merge can be performed up to the provided CID. +func TestMerge_DualBranchWithOneIncomplete_CouldNotFindCID(t *testing.T) { ctx := context.Background() - // Setup the "local" database - localDB, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = localDB.AddSchema(ctx, userSchema) - require.NoError(t, err) - localCol, err := localDB.GetCollectionByName(ctx, "User") - require.NoError(t, err) - docMap := map[string]interface{}{ - "name": "Alice", - "age": 30, - } - doc, err := client.NewDocFromMap(docMap, localCol.Definition()) + db, err := newDefraMemoryDB(ctx) require.NoError(t, err) - err = localCol.Create(ctx, doc) + _, err = db.AddSchema(ctx, userSchema) require.NoError(t, err) - // Setup the "remote" database - remoteDB1, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = remoteDB1.AddSchema(ctx, userSchema) - require.NoError(t, err) - remoteCol1, err := remoteDB1.GetCollectionByName(ctx, "User") - require.NoError(t, err) - doc, err = client.NewDocFromMap(docMap, remoteCol1.Definition()) - require.NoError(t, err) - err = remoteCol1.Create(ctx, doc) + col, err := db.GetCollectionByName(ctx, "User") require.NoError(t, err) - // Setup the second "remote" database - remoteDB2, err := newDefraMemoryDB(ctx) - require.NoError(t, err) - _, err = remoteDB2.AddSchema(ctx, userSchema) - require.NoError(t, err) - remoteCol2, err := remoteDB2.GetCollectionByName(ctx, "User") - require.NoError(t, err) - doc2, err := client.NewDocFromMap(docMap, remoteCol2.Definition()) - require.NoError(t, err) - err = remoteCol2.Create(ctx, doc2) - require.NoError(t, err) - - // Add a few changes to the remote nodes creating two branches - err = doc.Set("points", 200) - require.NoError(t, err) - err = remoteCol1.Update(ctx, doc) - require.NoError(t, err) + lsys := cidlink.DefaultLinkSystem() + lsys.SetWriteStorage(db.multistore.DAGstore().AsIPLDStorage()) - err = doc2.Set("points", 100) + initialDocState := map[string]any{ + "name": "John", + } + d, _ := newDagBuilder(col, initialDocState) + compInfo, err := d.generateCompositeUpdate(&lsys, initialDocState, compositeInfo{}) require.NoError(t, err) - err = remoteCol2.Update(ctx, doc2) + compInfo2, err := d.generateCompositeUpdate(&lsys, map[string]any{"name": "Johny"}, compInfo) require.NoError(t, err) - err = doc.Set("age", 31) - require.NoError(t, err) - err = remoteCol1.Update(ctx, doc) + err = db.executeMerge(ctx, events.DAGMerge{ + Cid: compInfo2.link.Cid, + SchemaRoot: col.SchemaRoot(), + }) require.NoError(t, err) - err = doc2.Set("age", 32) - require.NoError(t, err) - err = remoteCol2.Update(ctx, doc2) + someUnknownBlock := coreblock.Block{Delta: crdt.CRDT{CompositeDAGDelta: &crdt.CompositeDAGDelta{Status: 1}}} + someUnknownLink, err := coreblock.GetLinkFromNode(someUnknownBlock.GenerateNode()) require.NoError(t, err) - err = doc.Set("email", "alice@yahoo.com") - require.NoError(t, err) - err = remoteCol1.Update(ctx, doc) - require.NoError(t, err) + compInfoUnkown := compositeInfo{ + link: someUnknownLink, + height: 2, + } - err = doc2.Set("email", "alice-in-wonderland@yahoo.com") - require.NoError(t, err) - err = remoteCol2.Update(ctx, doc2) + compInfo3, err := d.generateCompositeUpdate(&lsys, map[string]any{"name": "Johny"}, compInfoUnkown) require.NoError(t, err) - // Sync the remote blocks to the local node - err = syncAndMerge(ctx, remoteDB2, remoteDB1, remoteCol2, remoteCol1, doc.ID().String()) - require.NoError(t, err) - err = syncAndMerge(ctx, remoteDB1, localDB, remoteCol1, localCol, doc.ID().String()) - require.NoError(t, err) + err = db.executeMerge(ctx, events.DAGMerge{ + Cid: compInfo3.link.Cid, + SchemaRoot: col.SchemaRoot(), + }) + require.ErrorContains(t, err, "could not find bafyreichk7jctbxhrodk5au3r4c4iqm627d4fi2cii2beseu4h6caoiwla") +} - // verify the local node has the same data as the remote node - localDoc, err := localCol.Get(ctx, doc.ID(), false) - require.NoError(t, err) - localDocString, err := localDoc.String() - require.NoError(t, err) - remoteDoc1, err := remoteCol1.Get(ctx, doc.ID(), false) - require.NoError(t, err) - remoteDocString1, err := remoteDoc1.String() - require.NoError(t, err) - require.Equal(t, remoteDocString1, localDocString) +type dagBuilder struct { + fieldsHeight map[string]uint64 + docID []byte + col client.Collection } -func syncAndMerge(ctx context.Context, from, to *db, fromCol, toCol client.Collection, docID string) error { - dsKey := base.MakeDataStoreKeyWithCollectionAndDocID(fromCol.Description(), docID) - headset := clock.NewHeadSet( - from.multistore.Headstore(), - dsKey.WithFieldId(core.COMPOSITE_NAMESPACE).ToHeadStoreKey(), +func newDagBuilder(col client.Collection, initalDocState map[string]any) (*dagBuilder, client.DocID) { + doc, err := client.NewDocFromMap( + initalDocState, + col.Definition(), ) - - cids, _, err := headset.List(ctx) if err != nil { - return err + panic(err) } + return &dagBuilder{ + fieldsHeight: make(map[string]uint64), + docID: []byte(doc.ID().String()), + col: col, + }, doc.ID() +} - for _, cid := range cids { - blockBytes, err := from.multistore.DAGstore().AsIPLDStorage().Get(ctx, cid.KeyString()) - if err != nil { - return err - } - block, err := coreblock.GetFromBytes(blockBytes) - if err != nil { - return err +type compositeInfo struct { + link cidlink.Link + height uint64 +} + +func (d *dagBuilder) generateCompositeUpdate(lsys *linking.LinkSystem, fields map[string]any, from compositeInfo) (compositeInfo, error) { + links := []coreblock.DAGLink{} + newPriority := from.height + 1 + if from.link.ByteLen() != 0 { + links = append(links, coreblock.DAGLink{ + Name: core.HEAD, + Link: from.link, + }) + } + for field, val := range fields { + d.fieldsHeight[field]++ + // Generate new Block and save to lsys + fieldBlock := coreblock.Block{ + Delta: crdt.CRDT{ + LWWRegDelta: &crdt.LWWRegDelta{ + DocID: d.docID, + FieldName: field, + Priority: d.fieldsHeight[field], + SchemaVersionID: d.col.Schema().VersionID, + Data: encodeValue(val), + }, + }, } - err = syncDAG(ctx, from, to, block) + fieldBlockLink, err := lsys.Store(ipld.LinkContext{}, coreblock.GetLinkPrototype(), fieldBlock.GenerateNode()) if err != nil { - return err + return compositeInfo{}, err } - err = to.executeMerge(ctx, events.DAGMerge{ - Cid: cid, - SchemaRoot: toCol.SchemaRoot(), + links = append(links, coreblock.DAGLink{ + Name: field, + Link: fieldBlockLink.(cidlink.Link), }) - if err != nil { - return err - } } - return nil -} -func syncDAG(ctx context.Context, from, to *db, block *coreblock.Block) error { - lsys := cidlink.DefaultLinkSystem() - lsys.SetWriteStorage(to.multistore.DAGstore().AsIPLDStorage()) - _, err := lsys.Store(linking.LinkContext{Ctx: ctx}, coreblock.GetLinkPrototype(), block.GenerateNode()) + compositeBlock := coreblock.Block{ + Delta: crdt.CRDT{ + CompositeDAGDelta: &crdt.CompositeDAGDelta{ + DocID: d.docID, + FieldName: "", + Priority: newPriority, + SchemaVersionID: d.col.Schema().VersionID, + Status: 1, + }, + }, + Links: links, + } + + compositeBlockLink, err := lsys.Store(ipld.LinkContext{}, coreblock.GetLinkPrototype(), compositeBlock.GenerateNode()) if err != nil { - return err + return compositeInfo{}, err } - for _, link := range block.Links { - lsys := cidlink.DefaultLinkSystem() - lsys.SetReadStorage(from.multistore.DAGstore().AsIPLDStorage()) - nd, err := lsys.Load(linking.LinkContext{Ctx: ctx}, link, coreblock.SchemaPrototype) - if err != nil { - return err - } - block, err := coreblock.GetFromNode(nd) - if err != nil { - return err - } - err = syncDAG(ctx, from, to, block) - if err != nil { - return err - } + return compositeInfo{ + link: compositeBlockLink.(cidlink.Link), + height: newPriority, + }, nil +} + +func encodeValue(val any) []byte { + em, err := cbor.EncOptions{Time: cbor.TimeRFC3339}.EncMode() + if err != nil { + // safe to panic here as this is a test + panic(err) + } + b, err := em.Marshal(val) + if err != nil { + // safe to panic here as this is a test + panic(err) } - return nil + return b } From cfdb655bb76197605e46885e5ff10af07c1986f2 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 7 Jun 2024 13:18:38 -0400 Subject: [PATCH 14/15] update mocks --- client/mocks/collection.go | 130 ------------------------------------- 1 file changed, 130 deletions(-) diff --git a/client/mocks/collection.go b/client/mocks/collection.go index 7c227edd2b..9e1cf9b654 100644 --- a/client/mocks/collection.go +++ b/client/mocks/collection.go @@ -68,49 +68,6 @@ func (_c *Collection_Create_Call) RunAndReturn(run func(context.Context, *client return _c } -// CreateDocIndex provides a mock function with given fields: _a0, _a1 -func (_m *Collection) CreateDocIndex(_a0 context.Context, _a1 *client.Document) error { - ret := _m.Called(_a0, _a1) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *client.Document) error); ok { - r0 = rf(_a0, _a1) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Collection_CreateDocIndex_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'CreateDocIndex' -type Collection_CreateDocIndex_Call struct { - *mock.Call -} - -// CreateDocIndex is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *client.Document -func (_e *Collection_Expecter) CreateDocIndex(_a0 interface{}, _a1 interface{}) *Collection_CreateDocIndex_Call { - return &Collection_CreateDocIndex_Call{Call: _e.mock.On("CreateDocIndex", _a0, _a1)} -} - -func (_c *Collection_CreateDocIndex_Call) Run(run func(_a0 context.Context, _a1 *client.Document)) *Collection_CreateDocIndex_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(*client.Document)) - }) - return _c -} - -func (_c *Collection_CreateDocIndex_Call) Return(_a0 error) *Collection_CreateDocIndex_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *Collection_CreateDocIndex_Call) RunAndReturn(run func(context.Context, *client.Document) error) *Collection_CreateDocIndex_Call { - _c.Call.Return(run) - return _c -} - // CreateIndex provides a mock function with given fields: _a0, _a1 func (_m *Collection) CreateIndex(_a0 context.Context, _a1 client.IndexDescription) (client.IndexDescription, error) { ret := _m.Called(_a0, _a1) @@ -301,49 +258,6 @@ func (_c *Collection_Delete_Call) RunAndReturn(run func(context.Context, client. return _c } -// DeleteDocIndex provides a mock function with given fields: _a0, _a1 -func (_m *Collection) DeleteDocIndex(_a0 context.Context, _a1 *client.Document) error { - ret := _m.Called(_a0, _a1) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *client.Document) error); ok { - r0 = rf(_a0, _a1) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Collection_DeleteDocIndex_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteDocIndex' -type Collection_DeleteDocIndex_Call struct { - *mock.Call -} - -// DeleteDocIndex is a helper method to define mock.On call -// - _a0 context.Context -// - _a1 *client.Document -func (_e *Collection_Expecter) DeleteDocIndex(_a0 interface{}, _a1 interface{}) *Collection_DeleteDocIndex_Call { - return &Collection_DeleteDocIndex_Call{Call: _e.mock.On("DeleteDocIndex", _a0, _a1)} -} - -func (_c *Collection_DeleteDocIndex_Call) Run(run func(_a0 context.Context, _a1 *client.Document)) *Collection_DeleteDocIndex_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(*client.Document)) - }) - return _c -} - -func (_c *Collection_DeleteDocIndex_Call) Return(_a0 error) *Collection_DeleteDocIndex_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *Collection_DeleteDocIndex_Call) RunAndReturn(run func(context.Context, *client.Document) error) *Collection_DeleteDocIndex_Call { - _c.Call.Return(run) - return _c -} - // DeleteWithFilter provides a mock function with given fields: ctx, filter func (_m *Collection) DeleteWithFilter(ctx context.Context, filter interface{}) (*client.DeleteResult, error) { ret := _m.Called(ctx, filter) @@ -950,50 +864,6 @@ func (_c *Collection_Update_Call) RunAndReturn(run func(context.Context, *client return _c } -// UpdateDocIndex provides a mock function with given fields: ctx, oldDoc, newDoc -func (_m *Collection) UpdateDocIndex(ctx context.Context, oldDoc *client.Document, newDoc *client.Document) error { - ret := _m.Called(ctx, oldDoc, newDoc) - - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *client.Document, *client.Document) error); ok { - r0 = rf(ctx, oldDoc, newDoc) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// Collection_UpdateDocIndex_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'UpdateDocIndex' -type Collection_UpdateDocIndex_Call struct { - *mock.Call -} - -// UpdateDocIndex is a helper method to define mock.On call -// - ctx context.Context -// - oldDoc *client.Document -// - newDoc *client.Document -func (_e *Collection_Expecter) UpdateDocIndex(ctx interface{}, oldDoc interface{}, newDoc interface{}) *Collection_UpdateDocIndex_Call { - return &Collection_UpdateDocIndex_Call{Call: _e.mock.On("UpdateDocIndex", ctx, oldDoc, newDoc)} -} - -func (_c *Collection_UpdateDocIndex_Call) Run(run func(ctx context.Context, oldDoc *client.Document, newDoc *client.Document)) *Collection_UpdateDocIndex_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(*client.Document), args[2].(*client.Document)) - }) - return _c -} - -func (_c *Collection_UpdateDocIndex_Call) Return(_a0 error) *Collection_UpdateDocIndex_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *Collection_UpdateDocIndex_Call) RunAndReturn(run func(context.Context, *client.Document, *client.Document) error) *Collection_UpdateDocIndex_Call { - _c.Call.Return(run) - return _c -} - // UpdateWithFilter provides a mock function with given fields: ctx, filter, updater func (_m *Collection) UpdateWithFilter(ctx context.Context, filter interface{}, updater string) (*client.UpdateResult, error) { ret := _m.Called(ctx, filter, updater) From 6aaeb51697c793be3e6e05d3ee6db3f34531eaa3 Mon Sep 17 00:00:00 2001 From: Fred Carle Date: Fri, 7 Jun 2024 14:07:44 -0400 Subject: [PATCH 15/15] add test documentation --- internal/db/merge_test.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/internal/db/merge_test.go b/internal/db/merge_test.go index bacaddd40f..b8671a5171 100644 --- a/internal/db/merge_test.go +++ b/internal/db/merge_test.go @@ -132,6 +132,8 @@ func TestMerge_DualBranch_NoError(t *testing.T) { require.Equal(t, expectedDocMap, docMap) } +// This test is not something we can reproduce in with integration tests. +// Until we introduce partial dag syncs to integration tests, this should not be removed. func TestMerge_DualBranchWithOneIncomplete_CouldNotFindCID(t *testing.T) { ctx := context.Background() @@ -150,7 +152,7 @@ func TestMerge_DualBranchWithOneIncomplete_CouldNotFindCID(t *testing.T) { initialDocState := map[string]any{ "name": "John", } - d, _ := newDagBuilder(col, initialDocState) + d, docID := newDagBuilder(col, initialDocState) compInfo, err := d.generateCompositeUpdate(&lsys, initialDocState, compositeInfo{}) require.NoError(t, err) compInfo2, err := d.generateCompositeUpdate(&lsys, map[string]any{"name": "Johny"}, compInfo) @@ -179,6 +181,19 @@ func TestMerge_DualBranchWithOneIncomplete_CouldNotFindCID(t *testing.T) { SchemaRoot: col.SchemaRoot(), }) require.ErrorContains(t, err, "could not find bafyreichk7jctbxhrodk5au3r4c4iqm627d4fi2cii2beseu4h6caoiwla") + + // Verify the document was created with the expected values + doc, err := col.Get(ctx, docID, false) + require.NoError(t, err) + docMap, err := doc.ToMap() + require.NoError(t, err) + + expectedDocMap := map[string]any{ + "_docID": docID.String(), + "name": "Johny", + } + + require.Equal(t, expectedDocMap, docMap) } type dagBuilder struct {