Skip to content

Commit

Permalink
Align core types with execution spec
Browse files Browse the repository at this point in the history
Since these types were written, we've gained an executable spec:

https://github.com/ethereum/execution-specs

This PR aligns some of the types we use with this spec to simplify
comparisons and cross-referencing.

Using a `distinct` type is a tradeoff between nim ergonomics, type
safety and the ability to work around nim quirks and stdlib weaknesses.

In particular, it allows us to overload common functions such as `hash`
with correct and performant versions as well as maintain control over
string conversions etc at the cost of a little bit of ceremony when
instantiating them.

Apart from distinct byte types, `Hash32`, is introduced in lieu of the
existing `Hash256`, again aligning this commonly used type with the spec
which picks bytes rather than bits in the name.
  • Loading branch information
arnetheduck committed Sep 24, 2024
1 parent 3d51887 commit eed6dd1
Show file tree
Hide file tree
Showing 35 changed files with 465 additions and 336 deletions.
4 changes: 2 additions & 2 deletions doc/trie.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ Additional APIs are:
that starts with the same key prefix
* rootNode() -- get root node
* rootNode(node) -- replace the root node
* getRootHash(): `KeccakHash` with `seq[byte]` type
* getRootHash(): `Hash32` with `seq[byte]` type
* getDB(): `DB` -- get flat-db pointer

Constructor API:
* initBinaryTrie(DB, rootHash[optional]) -- rootHash has `seq[byte]` or KeccakHash type
* initBinaryTrie(DB, rootHash[optional]) -- rootHash has `seq[byte]` or Hash32 type
* init(BinaryTrie, DB, rootHash[optional])

Normally you would not set the rootHash when constructing an empty Binary-trie.
Expand Down
18 changes: 9 additions & 9 deletions eth/bloom.nim
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import stint, ./common/eth_hash

type UInt2048 = StUint[2048]

iterator chunksForBloom(h: MDigest[256]): array[2, uint8] =
iterator chunksForBloom(h: Hash32): array[2, uint8] =
yield [h.data[0], h.data[1]]
yield [h.data[2], h.data[3]]
yield [h.data[4], h.data[5]]
Expand All @@ -12,28 +12,28 @@ proc chunkToBloomBits(chunk: array[2, uint8]): UInt2048 =
let l = chunk[1].int
one(UInt2048) shl ((l + (h shl 8)) and 2047)

iterator bloomBits(h: MDigest[256]): UInt2048 =
iterator bloomBits(h: Hash32): UInt2048 =
for chunk in chunksForBloom(h):
yield chunkToBloomBits(chunk)

type BloomFilter* = object
value*: UInt2048

proc incl*(f: var BloomFilter, h: MDigest[256]) =
proc incl*(f: var BloomFilter, h: Hash32) =
for bits in bloomBits(h):
f.value = f.value or bits

proc init*(_: type BloomFilter, h: MDigest[256]): BloomFilter =
proc init*(_: type BloomFilter, h: Hash32): BloomFilter =
result.incl(h)

# TODO: The following 2 procs should be one genric, but it doesn't compile. Nim bug?
proc incl*(f: var BloomFilter, v: string) = f.incl(keccakHash(v))
proc incl*(f: var BloomFilter, v: openArray[byte]) = f.incl(keccakHash(v))
proc incl*(f: var BloomFilter, v: string) = f.incl(keccak256(v))
proc incl*(f: var BloomFilter, v: openArray[byte]) = f.incl(keccak256(v))

proc contains*(f: BloomFilter, h: MDigest[256]): bool =
proc contains*(f: BloomFilter, h: Hash32): bool =
for bits in bloomBits(h):
if (f.value and bits).isZero: return false
return true

template contains*[T](f: BloomFilter, v: openArray[T]): bool =
f.contains(keccakHash(v))
template contains*(f: BloomFilter, v: openArray): bool =
f.contains(keccak256(v))
4 changes: 2 additions & 2 deletions eth/common.nim
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
import ./common/[eth_types_rlp, utils]
export eth_types_rlp, utils
import ./common/eth_types_rlp
export eth_types_rlp
91 changes: 91 additions & 0 deletions eth/common/base_types.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# eth
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

## Base primitive types used in ethereum, as specified in the execution specs:
## https://github.com/ethereum/execution-specs/
##
## For all of `UInt` and `UIntXX`, we use native `uintXX` types and/or `stint`.
##
## In the specification `UInt` is often used to denote an unsigned
## arbitrary-precision integers - in actual code we opt for a bounded type
## instead depending on "reasonable bounds", ie bounds that are unlikely to be
## exceeded in the foreseeable future.

import std/[hashes, macros, typetraits], stint, results, stew/[byteutils, staticfor]

export stint, hashes, results

type FixedBytes*[N: static int] = distinct array[N, byte]
## Fixed-length byte sequence holding arbitrary data
## https://github.com/ethereum/execution-specs/blob/51fac24740e662844446439ceeb96a460aae0ba0/src/ethereum/base_types.py
##
## This type is specialized to `Bytes4`, `Bytes8` etc below.

# A distinct array is used to avoid copying on trivial type conversions

template to*[N: static int](v: array[N, byte], T: type FixedBytes[N]): T =
T(v)

template default*[N](T: type FixedBytes[N]): T =
# Avoid bad codegen where fixed bytes are zeroed byte-by-byte at call site
const def = system.default(T)
def

template data*(v: FixedBytes): array =
distinctBase(v)

func `==`*(a, b: FixedBytes): bool {.inline.} =
equalMem(addr a.data[0], addr b.data[0], a.N)

func hash*[N: static int](v: FixedBytes[N]): Hash {.inline.} =
copyMem(addr result, addr v.data[0], min(N, sizeof(Hash)))

when N > sizeof(Hash):
var tmp: Hash
staticFor i, 1 ..< N div sizeof(Hash):
copyMem(addr tmp, addr v.data[i * sizeof(Hash)], sizeof(Hash))
result = result !& tmp
const last = N mod sizeof(Hash)
when last > 0:
copyMem(addr tmp, addr v.data[N - last], last)
result !& tmp

func toHex*(v: FixedBytes): string =
toHex(v.data)

func to0xHex*(v: FixedBytes): string =
to0xHex(v.data)

func `$`*(v: FixedBytes): string =
# There's a strong tradition of including 0x in the execution layer
to0xHex(v)

func fromHex*(T: type FixedBytes, c: openArray[char]): T {.raises: [ValueError].} =
## Parse a string as hex after optionally stripping "0x", raising ValueError if:
## * the string is too long or to short
## * the string can't be parsed as hex
T(hexToByteArrayStrict(c, T.N))

template makeFixedBytesN(N: static int) =
# Create specific numbered instantiations along with helpers
type `Bytes N`* = FixedBytes[N]

const `zeroBytes N`* = system.default(`Bytes N`)

template `bytes N`*(s: static string): `Bytes N` =
`Bytes N`.fromHex(s)

makeFixedBytesN(4)
makeFixedBytesN(8)
makeFixedBytesN(20)
makeFixedBytesN(32)
makeFixedBytesN(48)
makeFixedBytesN(64)
makeFixedBytesN(96)
makeFixedBytesN(256)
89 changes: 89 additions & 0 deletions eth/common/eth_address.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# eth
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

## 20-byte ethereum account address, as derived from the keypair controlling it
## https://ethereum.org/en/developers/docs/accounts/#account-creation

import std/[typetraits, hashes], "."/[base_types, eth_hash]

export hashes

type Address* = distinct Bytes20
## https://github.com/ethereum/execution-specs/blob/51fac24740e662844446439ceeb96a460aae0ba0/src/ethereum/paris/fork_types.py#L28

const zeroAddress* = system.default(Address)
## Address consisting of all zeroes.
## Transactions to zeroAddress are legitimate transfers to that account, not
## contract creations. They are used to "burn" Eth. People also send Eth to
## address zero by accident, unrecoverably, due to poor user interface issues.

template to*(v: array[20, byte], _: type Address): Address =
Address(v)

template data*(v: Address): array[20, byte] =
distinctBase(v)

template default*(_: type Address): Address =
# Avoid bad codegen where fixed bytes are zeroed byte-by-byte at call site
zeroAddress

func `==`*(a, b: Address): bool {.borrow.}

func hash*(a: Address): Hash {.inline.} =
# Addresses are more or less random so we should not need a fancy mixing
# function
var a0 {.noinit.}, a1 {.noinit.}: uint64
var a2 {.noinit.}: uint32

copyMem(addr a0, unsafeAddr a.data[0], sizeof(a0))
copyMem(addr a1, unsafeAddr a.data[8], sizeof(a1))
copyMem(addr a2, unsafeAddr a.data[16], sizeof(a2))

cast[Hash](a0 + a1 + uint64(a2))

func toHex*(a: Address): string {.borrow.}
func to0xHex*(a: Address): string {.borrow.}
func `$`*(a: Address): string {.borrow.}

func fromHex*(_: type Address, s: openArray[char]): Address {.raises: [ValueError].} =
Address(Bytes20.fromHex(s))

template to*(s: static string, _: type Address): Address =
const hash = Address.fromHex(s)
hash

template address*(s: static string): Address =
s.to(Address)

func toChecksum0xHex*(a: Address): string =
## Convert the address to 0x-prefixed mixed-case EIP-55 format
let
# TODO avoid memory allocations here
hhash1 = a.toHex()
hhash2 = keccak256(hhash1).toHex()
result = newStringOfCap(hhash2.len + 2)
result.add "0x"

for i, c in hhash1:
if hhash2[i] >= '0' and hhash2[i] <= '7':
result.add c
else:
if c >= '0' and c <= '9':
result.add c
else:
result.add chr(ord(c) - ord('a') + ord('A'))

func hasValidChecksum*(_: type Address, a: string): bool =
## Validate checksumable mixed-case address (EIP-55).
let address =
try:
Address.fromHex(a)
except ValueError:
return false
a == address.toChecksum0xHex()
98 changes: 59 additions & 39 deletions eth/common/eth_hash.nim
Original file line number Diff line number Diff line change
@@ -1,46 +1,66 @@
# Copyright (c) 2022-2023 Status Research & Development GmbH
# nimbus
# Copyright (c) 2024 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [].}

## keccak256 is used across ethereum as the "default" hash function and this
## module provides a type and some helpers to produce such hashes

import
nimcrypto/[keccak, hash]

export
keccak.update, keccak.finish, hash

type
KeccakHash* = MDigest[256]
## A hash value computed using keccak256
## note: this aliases Eth2Digest too, which uses a different hash!

template withKeccakHash*(body: untyped): KeccakHash =
## This little helper will init the hash function and return the sliced
## hash:
## let hashOfData = withHash: h.update(data)
block:
var h {.inject.}: keccak256
# init(h) # not needed for new instance
body
finish(h)

func keccakHash*(input: openArray[byte]): KeccakHash {.noinit.} =
# We use the init-update-finish interface to avoid
# the expensive burning/clearing memory (20~30% perf)
var ctx: keccak256
ctx.update(input)
ctx.finish()

func keccakHash*(input: openArray[char]): KeccakHash {.noinit.} =
keccakHash(input.toOpenArrayByte(0, input.high()))

func keccakHash*(a, b: openArray[byte]): KeccakHash =
withKeccakHash:
h.update a
h.update b
## Keccak256 hash function use thoughout the ethereum execution specification
## https://github.com/ethereum/execution-specs/blob/51fac24740e662844446439ceeb96a460aae0ba0/src/ethereum/crypto/hash.py
import std/[typetraits, hashes], nimcrypto/keccak, ./base_types

export hashes, keccak.update, keccak.finish

type Hash32* = distinct Bytes32
## https://github.com/ethereum/execution-specs/blob/51fac24740e662844446439ceeb96a460aae0ba0/src/ethereum/crypto/hash.py#L19

const zeroHash32* = system.default(Hash32)

template to*(v: array[32, byte], _: type Hash32): Hash32 =
Address(v)

template data*(v: Hash32): array[32, byte] =
distinctBase(v)

template default*(_: type Hash32): Hash32 =
# Avoid bad codegen where fixed bytes are zeroed byte-by-byte at call site
zeroHash32

func `==`*(a, b: Hash32): bool {.borrow.}

func hash*(a: Hash32): Hash {.inline.} =
var tmp {.noinit.}: array[4, uint64]
copyMem(addr tmp[0], addr a.data[0], sizeof(a))
cast[Hash](tmp[0] + tmp[1] + tmp[2] + tmp[3])

func toHex*(a: Hash32): string {.borrow.}
func to0xHex*(a: Hash32): string {.borrow.}
func `$`*(a: Hash32): string {.borrow.}

func fromHex*(_: type Hash32, s: openArray[char]): Hash32 {.raises: [ValueError].} =
Hash32(Bytes32.fromHex(s))

template to*(s: static string, _: type Hash32): Hash32 =
const hash = Hash32.fromHex(s)
hash

template hash32*(s: static string): Hash32 =
s.to(Hash32)

template to*(v: MDigest[256], _: type Hash32): Hash32 =
Hash32(v.data)

func keccak256*(input: openArray[byte]): Hash32 {.noinit.} =
var ctx: keccak.keccak256
ctx.update(input)
ctx.finish().to(Hash32)

func keccak256*(input: openArray[char]): Hash32 {.noinit.} =
keccak256(input.toOpenArrayByte(0, input.high))

template withKeccak256*(body: untyped): Hash32 =
var h {.inject.}: keccak.keccak256
body
h.finish().to(Hash32)
6 changes: 3 additions & 3 deletions eth/common/eth_hash_rlp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import

export eth_hash, rlp

proc read*(rlp: var Rlp, T: typedesc[MDigest]): T =
result.data = rlp.read(type(result.data))
proc read*(rlp: var Rlp, T: type Hash32): Hash32 =
result = Hash32(rlp.read(type(result.data)))

proc append*(rlpWriter: var RlpWriter, a: MDigest) =
proc append*(rlpWriter: var RlpWriter, a: Hash32) =
rlpWriter.append(a.data)
Loading

0 comments on commit eed6dd1

Please sign in to comment.