From 1a144d1fe1fb98599b853be7f552b8d458140c4e Mon Sep 17 00:00:00 2001
From: Daniel Olshansky <olshansky.daniel@gmail.com>
Date: Mon, 3 Jun 2024 18:30:23 -0700
Subject: [PATCH] Reply to harry's comments

---
 hasher.go        |  9 ++++++++-
 node_encoders.go |  8 +++++---
 proofs.go        |  2 +-
 smst.go          | 15 ++++++++++-----
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/hasher.go b/hasher.go
index 8930bf0..6cc9574 100644
--- a/hasher.go
+++ b/hasher.go
@@ -59,7 +59,9 @@ func NewTrieHasher(hasher hash.Hash) *trieHasher {
 	return &th
 }
 
-func NewNilPathHasher(hasherSize int) PathHasher {
+// newNilPathHasher returns a new nil path hasher with the given hash size.
+// It is not exported  the validation logic for the ClosestProof automatically handles this.
+func newNilPathHasher(hasherSize int) PathHasher {
 	return &nilPathHasher{hashSize: hasherSize}
 }
 
@@ -112,12 +114,14 @@ func (th *trieHasher) digestLeafNode(path, data []byte) (digest, value []byte) {
 	return
 }
 
+// digestInnerNode returns the encoded inner node data as well as its hash (i.e. digest)
 func (th *trieHasher) digestInnerNode(leftData, rightData []byte) (digest, value []byte) {
 	value = encodeInnerNode(leftData, rightData)
 	digest = th.digestData(value)
 	return
 }
 
+// digestSumNode returns the encoded leaf node data as well as its hash (i.e. digest)
 func (th *trieHasher) digestSumLeafNode(path, data []byte) (digest, value []byte) {
 	value = encodeLeafNode(path, data)
 	digest = th.digestData(value)
@@ -125,6 +129,7 @@ func (th *trieHasher) digestSumLeafNode(path, data []byte) (digest, value []byte
 	return
 }
 
+// digestSumInnerNode returns the encoded inner node data as well as its hash (i.e. digest)
 func (th *trieHasher) digestSumInnerNode(leftData, rightData []byte) (digest, value []byte) {
 	value = encodeSumInnerNode(leftData, rightData)
 	digest = th.digestData(value)
@@ -132,12 +137,14 @@ func (th *trieHasher) digestSumInnerNode(leftData, rightData []byte) (digest, va
 	return
 }
 
+// parseInnerNode returns the encoded left and right nodes
 func (th *trieHasher) parseInnerNode(data []byte) (leftData, rightData []byte) {
 	leftData = data[len(innerNodePrefix) : th.hashSize()+len(innerNodePrefix)]
 	rightData = data[len(innerNodePrefix)+th.hashSize():]
 	return
 }
 
+// parseSumInnerNode returns the encoded left and right nodes as well as the sum of the current node
 func (th *trieHasher) parseSumInnerNode(data []byte) (leftData, rightData []byte, sum uint64) {
 	// Extract the sum from the encoded node data
 	var sumBz [sumSizeBits]byte
diff --git a/node_encoders.go b/node_encoders.go
index 65ef604..27bf24d 100644
--- a/node_encoders.go
+++ b/node_encoders.go
@@ -8,9 +8,11 @@ import (
 // TODO_TECHDEBT: All of the parsing, encoding and checking functions in this file
 // can be abstracted out into the `trieNode` interface.
 
-// TODO_IMPROVE: We should create well-defined types & structs for every type of node
-// (e.g. protobufs) to streamline the process of encoding & encoding and to improve
-// readability.
+// TODO_IMPROVE: We should create well-defined structs for every type of node
+// to streamline the process of encoding & encoding and to improve readability.
+// If decoding needs to be language agnostic (to implement POKT clients), in other
+// languages, protobufs should be considered. If decoding does not need to be
+// language agnostic, we can use Go's gob package for more efficient serialization.
 
 // NB: In this file, all references to the variable `data` should be treated as `encodedNodeData`.
 // It was abbreviated to `data` for brevity.
diff --git a/proofs.go b/proofs.go
index 3b43dbc..c8f0497 100644
--- a/proofs.go
+++ b/proofs.go
@@ -353,7 +353,7 @@ func VerifyClosestProof(proof *SparseMerkleClosestProof, root []byte, spec *Trie
 	// will invalidate the proof.
 	nilSpec := &TrieSpec{
 		th:      spec.th,
-		ph:      NewNilPathHasher(spec.ph.PathSize()),
+		ph:      newNilPathHasher(spec.ph.PathSize()),
 		vh:      spec.vh,
 		sumTrie: spec.sumTrie,
 	}
diff --git a/smst.go b/smst.go
index 27d6c28..b87647e 100644
--- a/smst.go
+++ b/smst.go
@@ -32,9 +32,14 @@ func NewSparseMerkleSumTrie(
 		option(&trieSpec)
 	}
 
-	// Initialize a non-sum SMT and modify it to have a nil value hasher
-	// TODO_UPNEXT(@Olshansk): Understand the purpose of the nilValueHasher and
-	// why we're not applying it to the smst but we need it for the smt.
+	// Initialize a non-sum SMT and modify it to have a nil value hasher.
+	// NB: We are using a nil value hasher because the SMST pre-hashes its paths.
+	//     This results result in double path hashing because the SMST is a wrapper
+	//     around the SMT. The reason the SMST uses its own path hashing logic is
+	//     to account for the additional sum in the encoding/decoding process.
+	//     Therefore, the underlying SMT underneath needs a nil path hasher, while
+	//     the outer SMST does all the (non nil) path hashing itself.
+	// TODO_TECHDEBT(@Olshansk): Look for ways to simplify / cleanup the above.
 	smt := &SMT{
 		TrieSpec: trieSpec,
 		nodes:    nodes,
@@ -146,8 +151,8 @@ func (smst *SMST) Root() MerkleRoot {
 // If the tree is not a sum tree, it will panic.
 func (smst *SMST) Sum() uint64 {
 	rootDigest := smst.Root()
-	if len(rootDigest) != smst.th.hashSize()+sumSizeBits {
-		panic("roo#sum: not a merkle sum trie")
+	if !smst.Spec().sumTrie {
+		panic("SMST: not a merkle sum trie")
 	}
 	var sumbz [sumSizeBits]byte
 	copy(sumbz[:], []byte(rootDigest)[len([]byte(rootDigest))-sumSizeBits:])