Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement merkleizeBlockArray #421

Merged
merged 3 commits into from
Nov 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/as-sha256.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,23 @@ import {
import type {Hasher} from "./types";
import {Node} from "../node";
import type {HashComputationLevel} from "../hashComputation";
import {doDigestNLevel, doMerkleizeInto} from "./util";
import {BLOCK_SIZE, doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util";

/**
* hashInto() function of as-sha256 loop through every 256 bytes
* This is the same to hashInto() function of as-sha256 https://github.com/ChainSafe/ssz/blob/cf3e1f038c8bf7cba1bb27c38540e50b0391d0e6/packages/as-sha256/src/index.ts#L270
*/
const buffer = new Uint8Array(4 * BLOCK_SIZE);

export const hasher: Hasher = {
name: "as-sha256",
digest64: digest2Bytes32,
digest64HashObjects: digest64HashObjectsInto,
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
9 changes: 6 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/hashtree.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {Hasher, HashObject} from "./types";
import {Node} from "../node";
import type {HashComputationLevel} from "../hashComputation";
import {byteArrayIntoHashObject} from "@chainsafe/as-sha256/lib/hashObject";
import {doDigestNLevel, doMerkleizeInto} from "./util";
import {doDigestNLevel, doMerkleizeBlockArray, doMerkleizeBlocksBytes} from "./util";

/**
* Best SIMD implementation is in 512 bits = 64 bytes
Expand Down Expand Up @@ -40,8 +40,11 @@ export const hasher: Hasher = {
hashInto(hash64Input, hash64Output);
byteArrayIntoHashObject(hash64Output, 0, parent);
},
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, uint8Input);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
19 changes: 17 additions & 2 deletions packages/persistent-merkle-tree/src/hasher/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,23 @@ export function digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return hasher.digestNLevel(data, nLevel);
}

export function merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
hasher.merkleizeInto(data, padFor, output, offset);
export function merkleizeBlocksBytes(
blocksBytes: Uint8Array,
padFor: number,
output: Uint8Array,
offset: number
): void {
hasher.merkleizeBlocksBytes(blocksBytes, padFor, output, offset);
}

export function merkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number
): void {
hasher.merkleizeBlockArray(blocks, blockLimit, padFor, output, offset);
}

export function executeHashComputations(hashComputations: HashComputationLevel[]): void {
Expand Down
18 changes: 15 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/noble.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import {sha256} from "@noble/hashes/sha256";
import {digest64HashObjects, byteArrayIntoHashObject} from "@chainsafe/as-sha256";
import type {Hasher} from "./types";
import {doDigestNLevel, doMerkleizeInto, hashObjectToUint8Array} from "./util";
import {
BLOCK_SIZE,
doDigestNLevel,
doMerkleizeBlockArray,
doMerkleizeBlocksBytes,
hashObjectToUint8Array,
} from "./util";

const digest64 = (a: Uint8Array, b: Uint8Array): Uint8Array => sha256.create().update(a).update(b).digest();
const hashInto = (input: Uint8Array, output: Uint8Array): void => {
Expand All @@ -22,14 +28,20 @@ const hashInto = (input: Uint8Array, output: Uint8Array): void => {
}
};

/** should be multiple of 64, make it the same to as-sha256 */
const buffer = new Uint8Array(4 * BLOCK_SIZE);

export const hasher: Hasher = {
name: "noble",
digest64,
digest64HashObjects: (left, right, parent) => {
byteArrayIntoHashObject(digest64(hashObjectToUint8Array(left), hashObjectToUint8Array(right)), 0, parent);
},
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeInto(data, padFor, output, offset, hashInto);
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void {
return doMerkleizeBlocksBytes(blocksBytes, padFor, output, offset, hashInto);
},
merkleizeBlockArray(blocks, blockLimit, padFor, output, offset) {
return doMerkleizeBlockArray(blocks, blockLimit, padFor, output, offset, hashInto, buffer);
},
digestNLevel(data: Uint8Array, nLevel: number): Uint8Array {
return doDigestNLevel(data, nLevel, hashInto);
Expand Down
19 changes: 16 additions & 3 deletions packages/persistent-merkle-tree/src/hasher/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,24 @@ export type Hasher = {
*/
digest64HashObjects(left: HashObject, right: HashObject, parent: HashObject): void;
/**
* Merkleize n chunk of data, 32 bytes each
* Merkleize n SHA256 blocks in a single Uint8Array, each block is 64 bytes
* padFor is maxChunkCount, use it to compute layers to hash
* data is mutated after the function
* blocksBytes is mutated after the function
*/
merkleizeInto(data: Uint8Array, padFor: number, output: Uint8Array, offset: number): void;
merkleizeBlocksBytes(blocksBytes: Uint8Array, padFor: number, output: Uint8Array, offset: number): void;
/**
* Merkleize n SHA256 blocks, each is 64 bytes Uint8Array
* blockLimit is the number of blocks to hash, should be <= blocks.length
* padFor is maxChunkCount, use it to compute layers to hash
* blocks are mutated after the function
*/
merkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number
): void;
/**
* Hash multiple chunks (1 chunk = 32 bytes) at multiple levels
* With nLevel = 3, hash multiple of 256 bytes, return multiple of 32 bytes.
Expand Down
153 changes: 134 additions & 19 deletions packages/persistent-merkle-tree/src/hasher/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@ export function uint8ArrayToHashObject(byteArr: Uint8Array): HashObject {

type HashIntoFn = (input: Uint8Array, output: Uint8Array) => void;

/** a SHA256 block is 64 bytes */
export const BLOCK_SIZE = 64;

/**
* Input data is unsafe because it's modified
* If its chunk count is not even, need to be appended with zero hash at layer 0 so that we don't need
* a new memory allocation here (even through we don't need it if padFor = 1)
* The Uint8Array(32) will be written to output at offset
* Merkleize multiple SHA256 blocks in a single Uint8Array into ${output} at ${offset}
* - if padFor > 1 blocksBytes need to be multiple of 64 bytes.
* - if padFor = 1, blocksBytes need to be at least 32 bytes
* - if padFor = 0, throw error
* blocksBytes is unsafe because it's modified
*/
export function doMerkleizeInto(
data: Uint8Array,
export function doMerkleizeBlocksBytes(
blocksBytes: Uint8Array,
padFor: number,
output: Uint8Array,
offset: number,
Expand All @@ -31,33 +35,35 @@ export function doMerkleizeInto(
}

const layerCount = Math.ceil(Math.log2(padFor));
if (data.length === 0) {
if (blocksBytes.length === 0) {
output.set(zeroHash(layerCount), offset);
return;
}

if (data.length % 32 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${data.length}`);
if (blocksBytes.length % 32 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 32 bytes, got ${blocksBytes.length}`);
}

// if padFor = 1, only need 32 bytes
if (padFor > 1 && data.length % 64 !== 0) {
throw new Error(`Invalid input length, expect to be multiple of 64 bytes, got ${data.length}, padFor=${padFor}`);
if (padFor > 1 && blocksBytes.length % BLOCK_SIZE !== 0) {
throw new Error(
`Invalid input length, expect to be multiple of 64 bytes, got ${blocksBytes.length}, padFor=${padFor}`
);
}

let inputLength = data.length;
let inputLength = blocksBytes.length;
let outputLength = Math.floor(inputLength / 2);
let bufferIn = data;
// hash into the same buffer
for (let i = 0; i < layerCount; i++) {
const bufferOut = data.subarray(0, outputLength);
let bufferIn = blocksBytes;
// hash into the same buffer to save memory allocation
for (let layer = 0; layer < layerCount; layer++) {
const bufferOut = blocksBytes.subarray(0, outputLength);
hashInto(bufferIn, bufferOut);
const chunkCount = Math.floor(outputLength / 32);
if (chunkCount % 2 === 1 && i < layerCount - 1) {
if (chunkCount % 2 === 1 && layer < layerCount - 1) {
// extend to 1 more chunk
inputLength = outputLength + 32;
bufferIn = data.subarray(0, inputLength);
bufferIn.set(zeroHash(i + 1), outputLength);
bufferIn = blocksBytes.subarray(0, inputLength);
bufferIn.set(zeroHash(layer + 1), outputLength);
} else {
bufferIn = bufferOut;
inputLength = outputLength;
Expand All @@ -68,6 +74,115 @@ export function doMerkleizeInto(
output.set(bufferIn.subarray(0, 32), offset);
}

/**
* Merkleize multiple SHA256 blocks into ${output} at ${offset}
* @param blockLimit number of blocks, should be <= blocks.length so that consumer can reuse memory
* @param padFor is maxChunkCount, should be >= 2
* @param blocks is unsafe because it's modified
*/
export function doMerkleizeBlockArray(
blocks: Uint8Array[],
blockLimit: number,
padFor: number,
output: Uint8Array,
offset: number,
hashInto: HashIntoFn,
buffer: Uint8Array
twoeths marked this conversation as resolved.
Show resolved Hide resolved
): void {
if (padFor < 1) {
throw new Error(`Invalid padFor, expect to be at least 1, got ${padFor}`);
}

if (blockLimit > blocks.length) {
throw new Error(
`Invalid blockLimit, expect to be less than or equal blocks.length ${blocks.length}, got ${blockLimit}`
);
}

const layerCount = Math.ceil(Math.log2(padFor));
if (blockLimit === 0) {
output.set(zeroHash(layerCount), offset);
return;
}

for (const block of blocks) {
if (block.length !== BLOCK_SIZE) {
throw new Error(`Invalid block length, expect to be 64 bytes, got ${block.length}`);
}
}

// as-sha256 has a buffer of 4 * 64 bytes
// hashtree has a buffer of 16 * 64 bytes
if (buffer.length === 0 || buffer.length % (4 * BLOCK_SIZE) !== 0) {
throw new Error(`Invalid buffer length, expect to be multiple of 64 bytes, got ${buffer.length}`);
}

// batchSize is 4 for as-sha256, 16 for hashtree
const batchSize = Math.floor(buffer.length / BLOCK_SIZE);
const halfBatchSize = Math.floor(batchSize / 2);
let bufferIn = buffer;
// hash into the same buffer
let bufferOut = buffer.subarray(0, halfBatchSize * BLOCK_SIZE);
// ignore remaining blocks
let blockCount = blockLimit;
// hash into the same blocks to save memory allocation
for (let layer = 0; layer < layerCount; layer++) {
let outBlockIndex = 0;
const sameLayerLoop = Math.floor(blockCount / batchSize);
for (let i = 0; i < sameLayerLoop; i++) {
// populate bufferIn
for (let j = 0; j < batchSize; j++) {
const blockIndex = i * batchSize + j;
bufferIn.set(blocks[blockIndex], j * BLOCK_SIZE);
}

// hash into bufferOut
hashInto(bufferIn, bufferOut);

// copy bufferOut to blocks, bufferOut.len = halfBatchSize * BLOCK_SIZE
for (let j = 0; j < halfBatchSize; j++) {
blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE));
outBlockIndex++;
}
}

// remaining blocks
const remainingBlocks = blockCount % batchSize;
bufferIn = buffer.subarray(0, remainingBlocks * BLOCK_SIZE);
bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2));

// populate bufferIn
for (let blockIndex = Math.floor(blockCount / batchSize) * batchSize; blockIndex < blockCount; blockIndex++) {
bufferIn.set(blocks[blockIndex], (blockIndex % batchSize) * BLOCK_SIZE);
}

// hash into bufferOut
hashInto(bufferIn, bufferOut);

// copy bufferOut to blocks, note that bufferOut.len may not be divisible by BLOCK_SIZE
for (let j = 0; j < Math.floor(bufferOut.length / BLOCK_SIZE); j++) {
blocks[outBlockIndex].set(bufferOut.subarray(j * BLOCK_SIZE, (j + 1) * BLOCK_SIZE));
outBlockIndex++;
}

if (bufferOut.length % BLOCK_SIZE !== 0) {
// set the last 32 bytes of bufferOut
blocks[outBlockIndex].set(bufferOut.subarray(bufferOut.length - 32, bufferOut.length), 0);
// add zeroHash
blocks[outBlockIndex].set(zeroHash(layer + 1), 32);
outBlockIndex++;
}

// end of layer, update blockCount, bufferIn, bufferOut
blockCount = outBlockIndex;
bufferIn = buffer.subarray(0, blockCount * BLOCK_SIZE);
bufferOut = buffer.subarray(0, Math.floor(bufferIn.length / 2));
}

// the end result stays in blocks[0]
output.set(blocks[0].subarray(0, 32), offset);
}

/**
* Input data is unsafe because it's modified
* given nLevel = 3
Expand Down
Loading
Loading