Skip to content

Commit

Permalink
feat: implement merkleizeBlockArray (#421)
Browse files Browse the repository at this point in the history
* feat: implement merkleizeBlockArray

* chore: add comments

* chore: more docs for doMerkleizeBlockArray()
  • Loading branch information
twoeths authored Nov 9, 2024
1 parent cf3e1f0 commit e58781f
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 37 deletions.
15 changes: 12 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/as-sha256.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,23 @@ import {
import type {Hasher} from "./types";
import {Node} from "../node";
import type {HashComputationLevel} from "../hashComputation";
import {doDigestNLevel, doMerkleizeInto} from "./util";
import {BLOCK_SIZE, doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util";

/**
* hashInto() function of as-sha256 loop through every 256 bytes
* This is the same to hashInto() function of as-sha256 https://github.com/ChainSafe/ssz/blob/cf3e1f038c8bf7cba1bb27c38540e50b0391d0e6/packages/as-sha256/src/index.ts#L270
*/
const buffer = new Uint8Array(4 * BLOCK_SIZE);

export const hasher: Hasher = {
name: "as-sha256",
digest64: digest2Bytes32,
digest64HashObjects: digest64HashObjectsInto,
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
9 changes: 6 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/hashtree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {Hasher, HashObject} from "./types";
import {Node} from "../node";
import type {HashComputationLevel} from "../hashComputation";
import {byteArrayIntoHashObject} from "@chainsafe/as-sha256/lib/hashObject";
import {doDigestNLevel, doMerkleizeInto} from "./util";
import {doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util";

/**
* Best SIMD implementation is in 512 bits = 64 bytes
Expand Down Expand Up @@ -40,8 +40,11 @@ export const hasher: Hasher = {
hashInto(hash64Input, hash64Output);
byteArrayIntoHashObject(hash64Output, 0, parent);
},
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, uint8Input);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
19 changes: 17 additions & 2 deletions packages/persistent-merkle-tree/src/hasher/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,23 @@ export function digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return hasher.digestNLevel(data, nLevel);
}

export function merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
hasher.merkleizeInto(data, padFor, output, offset);
export function merkleizeBlocksBytes(
blocksBytes: Uint8Array,
padFor: number,
output: Uint8Array,
offset: number
): void {
hasher.merkleizeBlocksBytes(blocksBytes, padFor, output, offset);
}

export function merkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number
): void {
hasher.merkleizeBlockArray(blocks, blockLimit, padFor, output, offset);
}

export function executeHashComputations(hashComputations: HashComputationLevel[]): void {
Expand Down
18 changes: 15 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/noble.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import {sha256} from "@noble/hashes/sha256";
import {digest64HashObjects, byteArrayIntoHashObject} from "@chainsafe/as-sha256";
import type {Hasher} from "./types";
import {doDigestNLevel, doMerkleizeInto, hashObjectToUint8Array} from "./util";
import {
BLOCK_SIZE,
doDigestNLevel,
doMerkleizeBlockArray,
doMerkleizeBlocksBytes,
hashObjectToUint8Array,
} from "./util";

const digest64 = (a: Uint8Array, b: Uint8Array): Uint8Array => sha256.create().update(a).update(b).digest();
const hashInto = (input: Uint8Array, output: Uint8Array): void => {
Expand All @@ -22,14 +28,20 @@ const hashInto = (input: Uint8Array, output: Uint8Array): void => {
}
};

/** should be multiple of 64, make it the same to as-sha256 */
const buffer = new Uint8Array(4 * BLOCK_SIZE);

export const hasher: Hasher = {
name: "noble",
digest64,
digest64HashObjects: (left, right, parent) => {
byteArrayIntoHashObject(digest64(hashObjectToUint8Array(left), hashObjectToUint8Array(right)), 0, parent);
},
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
19 changes: 16 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,24 @@ export type Hasher = {
*/
digest64HashObjects(left: HashObject, right: HashObject, parent: HashObject): void;
/**
* Merkleize n chunk of data, 32 bytes each
* Merkleize n SHA256 blocks in a single Uint8Array, each block is 64 bytes
* padFor is maxChunkCount, use it to compute layers to hash
* data is mutated after the function
* blocksBytes is mutated after the function
*/
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void;
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void;
/**
* Merkleize n SHA256 blocks, each is 64 bytes Uint8Array
* blockLimit is the number of blocks to hash, should be <= blocks.length
* padFor is maxChunkCount, use it to compute layers to hash
* blocks are mutated after the function
*/
merkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number
): void;
/**
* Hash multiple chunks (1 chunk = 32 bytes) at multiple levels
* With nLevel = 3, hash multiple of 256 bytes, return multiple of 32 bytes.
Expand Down
157 changes: 138 additions & 19 deletions packages/persistent-merkle-tree/src/hasher/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@ export function uint8ArrayToHashObject(byteArr: Uint8Array): HashObject {

type HashIntoFn = (input: Uint8Array, output: Uint8Array) => void;

/** a SHA256 block is 64 bytes */
export const BLOCK_SIZE = 64;

/**
* Input data is unsafe because it's modified
* If its chunk count is not even, need to be appended with zero hash at layer 0 so that we don't need
* a new memory allocation here (even through we don't need it if padFor = 1)
* The Uint8Array(32) will be written to output at offset
* Merkleize multiple SHA256 blocks in a single Uint8Array into ${output} at ${offset}
* - if padFor > 1 blocksBytes need to be multiple of 64 bytes.
* - if padFor = 1, blocksBytes need to be at least 32 bytes
* - if padFor = 0, throw error
* blocksBytes is unsafe because it's modified
*/
export function doMerkleizeInto(
data: Uint8Array,
export function doMerkleizeBlocksBytes(
blocksBytes: Uint8Array,
padFor: number,
output: Uint8Array,
offset: number,
Expand All @@ -31,33 +35,35 @@ export function doMerkleizeInto(
}

const layerCount = Math.ceil(Math.log2(padFor));
if (data.length === 0) {
if (blocksBytes.length === 0) {
output.set(zeroHash(layerCount), offset);
return;
}

if (data.length % 32 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${data.length}`);
if (blocksBytes.length % 32 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${blocksBytes.length}`);
}

// if padFor = 1, only need 32 bytes
if (padFor > 1 && data.length % 64 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 64 bytes, got ${data.length}, padFor=${padFor}`);
if (padFor > 1 && blocksBytes.length % BLOCK_SIZE !== 0) {
throw new Error(
`Invalid input length, expect to be multiple of 64 bytes, got ${blocksBytes.length}, padFor=${padFor}`
);
}

let inputLength = data.length;
let inputLength = blocksBytes.length;
let outputLength = Math.floor(inputLength / 2);
let bufferIn = data;
// hash into the same buffer
for (let i = 0; i < layerCount; i++) {
const bufferOut = data.subarray(0, outputLength);
let bufferIn = blocksBytes;
// hash into the same buffer to save memory allocation
for (let layer = 0; layer < layerCount; layer++) {
const bufferOut = blocksBytes.subarray(0, outputLength);
hashInto(bufferIn, bufferOut);
const chunkCount = Math.floor(outputLength / 32);
if (chunkCount % 2 === 1 && i < layerCount - 1) {
if (chunkCount % 2 === 1 && layer < layerCount - 1) {
// extend to 1 more chunk
inputLength = outputLength + 32;
bufferIn = data.subarray(0, inputLength);
bufferIn.set(zeroHash(i + 1), outputLength);
bufferIn = blocksBytes.subarray(0, inputLength);
bufferIn.set(zeroHash(layer + 1), outputLength);
} else {
bufferIn = bufferOut;
inputLength = outputLength;
Expand All @@ -68,6 +74,119 @@ export function doMerkleizeInto(
output.set(bufferIn.subarray(0, 32), offset);
}

/**
* Merkleize multiple SHA256 blocks into ${output} at ${offset}
* @param blockLimit number of blocks, should be <= blocks.length so that consumer can reuse memory
* @param padFor is maxChunkCount, should be >= 2
* @param blocks is unsafe because it's modified
* @param output the result is stored here
* @param offset the offset to store the result
* @param hashInto the hash function of each hasher
* @param buffer is a temporary buffer of each hasher to work with the hashInto() function
*/
export function doMerkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number,
hashInto: HashIntoFn,
buffer: Uint8Array
): void {
if (padFor < 1) {
throw new Error(`Invalid padFor, expect to be at least 1, got ${padFor}`);
}

if (blockLimit > blocks.length) {
throw new Error(
`Invalid blockLimit, expect to be less than or equal blocks.length ${blocks.length}, got ${blockLimit}`
);
}

const layerCount = Math.ceil(Math.log2(padFor));
if (blockLimit === 0) {
output.set(zeroHash(layerCount), offset);
return;
}

for (const block of blocks) {
if (block.length !== BLOCK_SIZE) {
throw new Error(`Invalid block length, expect to be 64 bytes, got ${block.length}`);
}
}

// as-sha256 has a buffer of 4 * 64 bytes
// hashtree has a buffer of 16 * 64 bytes
if (buffer.length === 0 || buffer.length % (4 * BLOCK_SIZE) !== 0) {
throw new Error(`Invalid buffer length, expect to be multiple of 64 bytes, got ${buffer.length}`);
}

// batchSize is 4 for as-sha256, 16 for hashtree
const batchSize = Math.floor(buffer.length / BLOCK_SIZE);
const halfBatchSize = Math.floor(batchSize / 2);
let bufferIn = buffer;
// hash into the same buffer
let bufferOut = buffer.subarray(0, halfBatchSize * BLOCK_SIZE);
// ignore remaining blocks
let blockCount = blockLimit;
// hash into the same blocks to save memory allocation
for (let layer = 0; layer < layerCount; layer++) {
let outBlockIndex = 0;
const sameLayerLoop = Math.floor(blockCount / batchSize);
for (let i = 0; i < sameLayerLoop; i++) {
// populate bufferIn
for (let j = 0; j < batchSize; j++) {
const blockIndex = i * batchSize + j;
bufferIn.set(blocks[blockIndex], j * BLOCK_SIZE);
}

// hash into bufferOut
hashInto(bufferIn, bufferOut);

// copy bufferOut to blocks, bufferOut.len = halfBatchSize * BLOCK_SIZE
for (let j = 0; j < halfBatchSize; j++) {
blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE));
outBlockIndex++;
}
}

// remaining blocks
const remainingBlocks = blockCount % batchSize;
bufferIn = buffer.subarray(0, remainingBlocks * BLOCK_SIZE);
bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2));

// populate bufferIn
for (let blockIndex = Math.floor(blockCount / batchSize) * batchSize; blockIndex < blockCount; blockIndex++) {
bufferIn.set(blocks[blockIndex], (blockIndex % batchSize) * BLOCK_SIZE);
}

// hash into bufferOut
hashInto(bufferIn, bufferOut);

// copy bufferOut to blocks, note that bufferOut.len may not be divisible by BLOCK_SIZE
for (let j = 0; j < Math.floor(bufferOut.length / BLOCK_SIZE); j++) {
blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE));
outBlockIndex++;
}

if (bufferOut.length % BLOCK_SIZE !== 0) {
// set the last 32 bytes of bufferOut
blocks[outBlockIndex].set(bufferOut.subarray(bufferOut.length - 32, bufferOut.length), 0);
// add zeroHash
blocks[outBlockIndex].set(zeroHash(layer + 1), 32);
outBlockIndex++;
}

// end of layer, update blockCount, bufferIn, bufferOut
blockCount = outBlockIndex;
bufferIn = buffer.subarray(0, blockCount * BLOCK_SIZE);
bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2));
}

// the end result stays in blocks[0]
output.set(blocks[0].subarray(0, 32), offset);
}

/**
* Input data is unsafe because it's modified
* given nLevel = 3
Expand Down
Loading

0 comments on commit e58781f

Please sign in to comment.