Skip to content

Commit

Permalink
ssz: finish implementation (#42)
Browse files Browse the repository at this point in the history
* ssz: finish implementation

* add object support, simplify implementation
* fix extra round of hashing in tree_hash_root

* ssz: cleanups

* work around Nim range bug for Uint24, cleanups
  • Loading branch information
arnetheduck authored and mratsim committed Dec 17, 2018
1 parent 5dc06f4 commit 142aa8c
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 85 deletions.
2 changes: 1 addition & 1 deletion beacon_chain/spec/datatypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ type

latest_crosslinks*: array[SHARD_COUNT, CrosslinkRecord]
latest_state_recalculation_slot*: uint64
latest_block_roots*: array[LATEST_BLOCK_ROOTS_COUNT, Eth2Digest] ##\
latest_block_roots*: array[LATEST_BLOCK_ROOTS_COUNT.int, Eth2Digest] ##\
## Needed to process attestations, older to newer
latest_penalized_exit_balances*: seq[uint64] ##\
## Balances penalized in the current withdrawal period
Expand Down
239 changes: 163 additions & 76 deletions beacon_chain/ssz.nim
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,18 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.

# SSZ Serialization (simple serialize)
# See https://github.com/ethereum/beacon_chain/issues/100
# and https://github.com/ethereum/beacon_chain/tree/master/ssz
# See https://github.com/ethereum/eth2.0-specs/blob/master/specs/simple-serialize.md

import
endians, typetraits, options, algorithm,
eth_common, nimcrypto/blake2,
./spec/[crypto, datatypes, digest]

from milagro_crypto import getRaw
from milagro_crypto import getRaw, fromRaw

# ################### Helper functions ###################################

func len(x: Uint24): int = 3

# toBytesSSZ convert simple fixed-length types to their SSZ wire representation
func toBytesSSZ(x: SomeInteger): array[sizeof(x), byte] =
## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``)
## All integers are serialized as **big endian**.
Expand All @@ -40,7 +38,36 @@ func toBytesSSZ(x: Uint24): array[3, byte] =
func toBytesSSZ(x: EthAddress): array[sizeof(x), byte] = x
func toBytesSSZ(x: Eth2Digest): array[32, byte] = x.data

func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: ptr byte): T =
# TODO these two are still being debated:
# https://github.com/ethereum/eth2.0-specs/issues/308#issuecomment-447026815
func toBytesSSZ(x: ValidatorPubKey|ValidatorSig): auto = x.getRaw()

type TrivialTypes =
# Types that serialize down to a fixed-length array - most importantly, these
# values don't carry a length prefix in the final encoding. toBytesSSZ
# provides the actual nim-type-to-bytes conversion.
# TODO think about this for a bit - depends where the serialization of
# validator keys ends up going..
# TODO can't put ranges like Uint24 in here:
# https://github.com/nim-lang/Nim/issues/10027
SomeInteger | EthAddress | Eth2Digest | ValidatorPubKey | ValidatorSig

func sszLen(v: TrivialTypes): int = toBytesSSZ(v).len
func sszLen(v: Uint24): int = toBytesSSZ(v).len

func sszLen(v: object | tuple): int =
result = 4 # Length
for field in v.fields:
result += sszLen(type field)

func sszLen(v: seq | array): int =
result = 4 # Length
for i in v:
result += sszLen(i)

# fromBytesSSZUnsafe copy wire representation to a Nim variable, assuming
# there's enough data in the buffer
func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: pointer): T =
## Convert directly to bytes the size of the int. (e.g. ``uint16 = 2 bytes``)
## All integers are serialized as **big endian**.
## TODO: Assumes data points to a sufficiently large buffer
Expand All @@ -57,76 +84,141 @@ func fromBytesSSZUnsafe(T: typedesc[SomeInteger], data: ptr byte): T =
elif result.sizeof == 1: copyMem(result.addr, alignedBuf, sizeof(result))
else: {.fatal: "Unsupported type deserialization: " & $(type(result)).name.}

func `+`[T](p: ptr T, offset: int): ptr T =
## Pointer arithmetic: Addition
const size = sizeof T
cast[ptr T](cast[ByteAddress](p) +% offset * size)

func eat(x: var auto, data: ptr byte, pos: var int, len: int): bool =
if pos + x.sizeof > len: return
copyMem(x.addr, data + pos, x.sizeof)
inc pos, x.sizeof
return true

func eatInt[T: SomeInteger](x: var T, data: ptr byte, pos: var int, len: int):
bool =
if pos + x.sizeof > len: return

x = T.fromBytesSSZUnsafe(data + pos)

inc pos, x.sizeof
return true

func eatSeq[T: SomeInteger](x: var seq[T], data: ptr byte, pos: var int,
len: int): bool =
var items: int32
if not eatInt(items, data, pos, len): return
if pos + T.sizeof * items > len: return

x = newSeqUninitialized[T](items)
for val in x.mitems:
discard eatInt(val, data, pos, len) # Bounds-checked above
return true

func serInt(dest: var seq[byte], x: SomeInteger) =
dest.add x.toBytesSSZ()
func fromBytesSSZUnsafe(T: typedesc[Uint24], data: pointer): T =
## Integers are all encoded as bigendian and not padded
var tmp: uint32
let p = cast[ptr UncheckedArray[byte]](data)
tmp = tmp or uint32(p[2])
tmp = tmp or uint32(p[1]) shl 8
tmp = tmp or uint32(p[0]) shl 16
result = tmp.Uint24

func fromBytesSSZUnsafe(T: typedesc[EthAddress], data: pointer): T =
copyMem(result.addr, data, sizeof(result))

func fromBytesSSZUnsafe(T: typedesc[Eth2Digest], data: pointer): T =
copyMem(result.data.addr, data, sizeof(result.data))

proc deserialize[T: TrivialTypes](
dest: var T, offset: var int, data: openArray[byte]): bool =
# TODO proc because milagro is problematic
if offset + sszLen(dest) > data.len():
false
else:
when T is (ValidatorPubKey|ValidatorSig):
if T.fromRaw(data[offset..data.len-1], dest):
offset += sszLen(dest)
true
else:
false
else:
dest = fromBytesSSZUnsafe(T, data[offset].unsafeAddr)
offset += sszLen(dest)
true

func deserialize(
dest: var Uint24, offset: var int, data: openArray[byte]): bool =
if offset + sszLen(dest) > data.len():
false
else:
dest = fromBytesSSZUnsafe(Uint24, data[offset].unsafeAddr)
offset += sszLen(dest)
true

func serSeq(dest: var seq[byte], src: seq[SomeInteger]) =
dest.serInt src.len.uint32
for val in src:
dest.add val.toBytesSSZ()
func deserialize[T: enum](dest: var T, offset: var int, data: openArray[byte]): bool =
# TODO er, verify the size here, probably an uint64 but...
var tmp: uint64
if not deserialize(tmp, offset, data):
false
else:
# TODO what to do with out-of-range values?? rejecting means breaking
# forwards compatibility..
dest = cast[T](tmp)
true

proc deserialize[T: not (enum|TrivialTypes|Uint24)](
dest: var T, offset: var int, data: openArray[byte]): bool =
# Length in bytes, followed by each item
var totalLen: uint32
if not deserialize(totalLen, offset, data): return false

if offset + totalLen.int > data.len(): return false

let itemEnd = offset + totalLen.int
when T is seq:
# Items are of homogenous type, but not necessarily homogenous length,
# cannot pre-allocate item list generically
while offset < itemEnd:
dest.setLen dest.len + 1
if not deserialize(dest[^1], offset, data): return false
elif T is array:
var i = 0
while offset < itemEnd:
if not deserialize(dest[i], offset, data): return false
i += 1
if i > dest.len: return false
else:
for field in dest.fields:
if not deserialize(field, offset, data): return false
if offset != itemEnd: return false

true

func serialize(dest: var seq[byte], src: TrivialTypes) =
dest.add src.toBytesSSZ()
func serialize(dest: var seq[byte], src: Uint24) =
dest.add src.toBytesSSZ()

func serialize(dest: var seq[byte], x: enum) =
# TODO er, verify the size here, probably an uint64 but...
serialize dest, uint64(x)

func serialize[T: not enum](dest: var seq[byte], src: T) =
let lenPos = dest.len()

# Length is a prefix, so we'll put a dummy 0 here and fill it after
# serializing
dest.add toBytesSSZ(0'u32)

when T is seq|array:
# If you get an error here that looks like:
# type mismatch: got <type range 0..8191(uint64)>
# you just used an unsigned int for an array index thinking you'd get
# away with it (surprise, surprise: you can't, uints are crippled!)
# https://github.com/nim-lang/Nim/issues/9984
for val in src:
serialize dest, val
else:
# TODO to sort, or not to sort, that is the question:
# TODO or.. https://github.com/ethereum/eth2.0-specs/issues/275
when defined(debugFieldSizes) and T is (BeaconState | BeaconBlock):
# for research/serialized_sizes, remove when appropriate
for name, field in src.fieldPairs:
let start = dest.len()
serialize dest, field
let sz = dest.len() - start
debugEcho(name, ": ", sz)
else:
for field in src.fields:
serialize dest, field

# Write size (we only know it once we've serialized the object!)
var objLen = dest.len() - lenPos - 4
bigEndian32(dest[lenPos].addr, objLen.addr)

# ################### Core functions ###################################
func deserialize(data: ptr byte, pos: var int, len: int, typ: typedesc[object]):
auto =
var t: typ

for field in t.fields:
when field is EthAddress | Eth2Digest:
if not eat(field, data, pos, len): return
elif field is (SomeInteger or byte):
if not eatInt(field, data, pos, len): return
elif field is seq[SomeInteger or byte]:
if not eatSeq(field, data, pos, len): return
else: # TODO: deserializing subtypes (?, depends on final spec)
{.fatal: "Unsupported type deserialization: " & $typ.name.}
return some(t)

func deserialize*(
data: seq[byte or uint8] or openarray[byte or uint8] or string,
typ: typedesc[object]): auto {.inline.} =

proc deserialize*(data: openArray[byte],
typ: typedesc): auto {.inline.} =
# TODO: returns Option[typ]: https://github.com/nim-lang/Nim/issues/9195
var pos = 0
return deserialize((ptr byte)(data[0].unsafeAddr), pos, data.len, typ)
var ret: typ
var offset: int
if not deserialize(ret, offset, data): none(typ)
else: some(ret)

func serialize*[T](value: T): seq[byte] =
for field in value.fields:
when field is (EthAddress | Eth2Digest | SomeInteger):
result.add field.toBytesSSZ()
elif field is seq[SomeInteger or byte]:
result.serSeq field
else: # TODO: Serializing subtypes (?, depends on final spec)
{.fatal: "Unsupported type serialization: " & $typ.name.}
# TODO Fields should be sorted, but...
serialize(result, value)

# ################### Hashing ###################################

Expand Down Expand Up @@ -199,7 +291,7 @@ func hash_tree_root*[T: not enum](x: T): array[32, byte] =

withHash:
for name, value in fields.sortedByIt(it.name):
h.update hash_tree_root(value.value)
h.update value.value

# #################################
# hash_tree_root not part of official spec
Expand Down Expand Up @@ -271,9 +363,4 @@ func merkleHash[T](lst: openArray[T]): array[32, byte] =

chunkz.setLen(chunkz.len div 2)

if chunkz.len == 0:
const empty32 = empty(array[32, byte])
result = hash(empty32, dataLen)
return

result = hash(chunkz[0], dataLen)
11 changes: 11 additions & 0 deletions research/serialized_sizes.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import
../beacon_chain/[ssz],
../beacon_chain/spec/[beaconstate, digest],
../tests/testutil

proc stateSize(deposits: int) =
let state = on_startup(makeInitialDeposits(deposits), 0, Eth2Digest())

echo "Validators: ", deposits, ", total: ", state.serialize().len

stateSize(1000)
1 change: 1 addition & 0 deletions research/serialized_sizes.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-d:debugFieldSizes
Loading

0 comments on commit 142aa8c

Please sign in to comment.