Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copy ssz code from nimbus-eth2 and adjust to stand on its own #2

Merged
merged 5 commits into from
Oct 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 7 additions & 37 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,18 @@ jobs:
fail-fast: false
max-parallel: 20
matrix:
branch: [master]
test_lang: [c, cpp]
target:
- os: linux
cpu: amd64
TEST_LANG: c
- os: linux
cpu: amd64
TEST_LANG: cpp
- os: linux
cpu: i386
TEST_LANG: c
- os: linux
cpu: i386
TEST_LANG: cpp
- os: macos
cpu: amd64
TEST_LANG: c
- os: macos
cpu: amd64
TEST_LANG: cpp
- os: windows
cpu: amd64
TEST_LANG: c
- os: windows
cpu: amd64
TEST_LANG: cpp
- os: windows
cpu: i386
TEST_LANG: c
- os: windows
cpu: i386
TEST_LANG: cpp
include:
- target:
os: linux
Expand All @@ -50,7 +30,7 @@ jobs:
os: windows
builder: windows-2019

name: '${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ matrix.target.TEST_LANG }} (${{ matrix.branch }})'
name: '${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ matrix.test_lang }}'
runs-on: ${{ matrix.builder }}
steps:
- name: Checkout nim-ssz-serialization
Expand Down Expand Up @@ -103,10 +83,10 @@ jobs:
run: |
mkdir -p external
if [[ '${{ matrix.target.cpu }}' == 'amd64' ]]; then
MINGW_URL="https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win64/Personal Builds/mingw-builds/8.1.0/threads-posix/seh/x86_64-8.1.0-release-posix-seh-rt_v6-rev0.7z"
MINGW_URL="https://github.com/brechtsanders/winlibs_mingw/releases/download/11.2.0-12.0.1-9.0.0-r1/winlibs-x86_64-posix-seh-gcc-11.2.0-mingw-w64-9.0.0-r1.7z"
ARCH=64
else
MINGW_URL="https://sourceforge.net/projects/mingw-w64/files/Toolchains targetting Win32/Personal Builds/mingw-builds/8.1.0/threads-posix/dwarf/i686-8.1.0-release-posix-dwarf-rt_v6-rev0.7z"
MINGW_URL="https://github.com/brechtsanders/winlibs_mingw/releases/download/11.2.0-12.0.1-9.0.0-r1/winlibs-i686-posix-dwarf-gcc-11.2.0-mingw-w64-9.0.0-r1.7z"
ARCH=32
fi
curl -L "$MINGW_URL" -o "external/mingw-${{ matrix.target.cpu }}.7z"
Expand Down Expand Up @@ -145,11 +125,10 @@ jobs:
id: nim-cache
uses: actions/cache@v2
with:
path: nim
key: 'nim-${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ steps.versions.outputs.nimbus_build_system }}'
path: NimBinaries
key: 'NimBinaries-${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ steps.versions.outputs.nimbus_build_system }}'

- name: Build Nim and associated tools
if: steps.nim-cache.outputs.cache-hit != 'true'
shell: bash
run: |
curl -O -L -s -S https://raw.githubusercontent.com/status-im/nimbus-build-system/master/scripts/build_nim.sh
Expand All @@ -165,15 +144,6 @@ jobs:
fi
env MAKE="$MAKE_CMD -j2" ARCH_OVERRIDE=$PLATFORM CC=gcc bash build_nim.sh nim csources dist/nimble NimBinaries

# clean up to save cache space
cd nim
rm koch
rm -rf nimcache
rm -rf csources
rm -rf tests
rm -rf dist
rm -rf .git

- name: Setup environment
shell: bash
run: echo '${{ github.workspace }}/nim/bin' >> $GITHUB_PATH
Expand All @@ -183,4 +153,4 @@ jobs:
working-directory: nim-ssz-serialization
run: |
nimble install -y --depsOnly
env TEST_LANG="${{ matrix.target.TEST_LANG }}" nimble test
env TEST_LANG="${{ matrix.test_lang }}" nimble test
250 changes: 250 additions & 0 deletions ssz_serialization.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
# ssz_serialization
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

{.push raises: [Defect].}
{.pragma: raisesssz, raises: [Defect, MalformedSszError, SszSizeMismatchError].}

## SSZ serialization for core SSZ types, as specified in:
# https://github.com/ethereum/consensus-specs/blob/v1.0.1/ssz/simple-serialize.md#serialization

import
std/typetraits,
stew/[endians2, leb128, objects],
serialization, serialization/testing/tracing,
./ssz_serialization/[codec, bitseqs, types]

export
serialization, codec, types, bitseqs

type
SszReader* = object
stream: InputStream

SszWriter* = object
stream: OutputStream

SizePrefixed*[T] = distinct T
SszMaxSizeExceeded* = object of SerializationError

VarSizedWriterCtx = object
fixedParts: WriteCursor
offset: int

FixedSizedWriterCtx = object

serializationFormat SSZ

SSZ.setReader SszReader
SSZ.setWriter SszWriter, PreferredOutput = seq[byte]

template sizePrefixed*[TT](x: TT): untyped =
type T = TT
SizePrefixed[T](x)

proc init*(T: type SszReader,
stream: InputStream): T =
T(stream: stream)

proc writeFixedSized(s: var (OutputStream|WriteCursor), x: auto) {.raises: [Defect, IOError].} =
mixin toSszType

when x is byte:
s.write x
elif x is bool:
s.write byte(ord(x))
elif x is UintN:
when cpuEndian == bigEndian:
s.write toBytesLE(x)
else:
s.writeMemCopy x
elif x is array:
when x[0] is byte:
trs "APPENDING FIXED SIZE BYTES", x
s.write x
else:
for elem in x:
trs "WRITING FIXED SIZE ARRAY ELEMENT"
s.writeFixedSized toSszType(elem)
elif x is tuple|object:
enumInstanceSerializedFields(x, fieldName, field):
trs "WRITING FIXED SIZE FIELD", fieldName
s.writeFixedSized toSszType(field)
else:
unsupported x.type

template writeOffset(cursor: var WriteCursor, offset: int) =
write cursor, toBytesLE(uint32 offset)

template supports*(_: type SSZ, T: type): bool =
mixin toSszType
anonConst compiles(fixedPortionSize toSszType(declval T))

func init*(T: type SszWriter, stream: OutputStream): T =
result.stream = stream

proc writeVarSizeType(w: var SszWriter, value: auto) {.gcsafe, raises: [Defect, IOError].}

proc beginRecord*(w: var SszWriter, TT: type): auto =
type T = TT
when isFixedSize(T):
FixedSizedWriterCtx()
else:
const offset = when T is array|HashArray: len(T) * offsetSize
else: fixedPortionSize(T)
VarSizedWriterCtx(offset: offset,
fixedParts: w.stream.delayFixedSizeWrite(offset))

template writeField*(w: var SszWriter,
ctx: var auto,
fieldName: string,
field: auto) =
mixin toSszType
when ctx is FixedSizedWriterCtx:
writeFixedSized(w.stream, toSszType(field))
else:
type FieldType = type toSszType(field)

when isFixedSize(FieldType):
writeFixedSized(ctx.fixedParts, toSszType(field))
else:
trs "WRITING OFFSET ", ctx.offset, " FOR ", fieldName
writeOffset(ctx.fixedParts, ctx.offset)
let initPos = w.stream.pos
trs "WRITING VAR SIZE VALUE OF TYPE ", name(FieldType)
when FieldType is BitList:
trs "BIT SEQ ", bytes(field)
writeVarSizeType(w, toSszType(field))
ctx.offset += w.stream.pos - initPos

template endRecord*(w: var SszWriter, ctx: var auto) =
when ctx is VarSizedWriterCtx:
finalize ctx.fixedParts

proc writeSeq[T](w: var SszWriter, value: seq[T])
{.raises: [Defect, IOError].} =
# Please note that `writeSeq` exists in order to reduce the code bloat
# produced from generic instantiations of the unique `List[N, T]` types.
when isFixedSize(T):
trs "WRITING LIST WITH FIXED SIZE ELEMENTS"
for elem in value:
w.stream.writeFixedSized toSszType(elem)
trs "DONE"
else:
trs "WRITING LIST WITH VAR SIZE ELEMENTS"
var offset = value.len * offsetSize
var cursor = w.stream.delayFixedSizeWrite offset
for elem in value:
cursor.writeFixedSized uint32(offset)
let initPos = w.stream.pos
w.writeVarSizeType toSszType(elem)
offset += w.stream.pos - initPos
finalize cursor
trs "DONE"

proc writeVarSizeType(w: var SszWriter, value: auto) {.raises: [Defect, IOError].} =
trs "STARTING VAR SIZE TYPE"

when value is HashArray|HashList:
writeVarSizeType(w, value.data)
elif value is SingleMemberUnion:
doAssert value.selector == 0'u8
w.writeValue 0'u8
w.writeValue value.value
elif value is List:
# We reduce code bloat by forwarding all `List` types to a general `seq[T]` proc.
writeSeq(w, asSeq value)
elif value is BitList:
# ATTENTION! We can reuse `writeSeq` only as long as our BitList type is implemented
# to internally match the binary representation of SSZ BitLists in memory.
writeSeq(w, bytes value)
elif value is object|tuple|array:
trs "WRITING OBJECT OR ARRAY"
var ctx = beginRecord(w, type value)
enumerateSubFields(value, field):
writeField w, ctx, astToStr(field), field
endRecord w, ctx
else:
unsupported type(value)

proc writeValue*(w: var SszWriter, x: auto) {.gcsafe, raises: [Defect, IOError].} =
mixin toSszType
type T = type toSszType(x)

when isFixedSize(T):
w.stream.writeFixedSized toSszType(x)
else:
w.writeVarSizeType toSszType(x)

func sszSize*(value: auto): int {.gcsafe, raises: [Defect].}

func sszSizeForVarSizeList[T](value: openArray[T]): int =
result = len(value) * offsetSize
for elem in value:
result += sszSize(toSszType elem)

func sszSize*(value: auto): int {.gcsafe, raises: [Defect].} =
mixin toSszType
type T = type toSszType(value)

when isFixedSize(T):
anonConst fixedPortionSize(T)

elif T is array|List|HashList|HashArray:
type E = ElemType(T)
when isFixedSize(E):
len(value) * anonConst(fixedPortionSize(E))
elif T is HashArray:
sszSizeForVarSizeList(value.data)
elif T is array:
sszSizeForVarSizeList(value)
else:
sszSizeForVarSizeList(asSeq value)

elif T is BitList:
return len(bytes(value))

elif T is SingleMemberUnion:
sszSize(toSszType value.value) + 1

elif T is object|tuple:
result = anonConst fixedPortionSize(T)
enumInstanceSerializedFields(value, _{.used.}, field):
type FieldType = type toSszType(field)
when not isFixedSize(FieldType):
result += sszSize(toSszType field)

else:
unsupported T

proc writeValue*[T](w: var SszWriter, x: SizePrefixed[T]) {.raises: [Defect, IOError].} =
var cursor = w.stream.delayVarSizeWrite(Leb128.maxLen(uint64))
let initPos = w.stream.pos
w.writeValue T(x)
let length = toBytes(uint64(w.stream.pos - initPos), Leb128)
cursor.finalWrite length.toOpenArray()

proc readValue*(r: var SszReader, val: var auto) {.
raises: [Defect, MalformedSszError, SszSizeMismatchError, IOError].} =
mixin readSszBytes
type T = type val
when isFixedSize(T):
const minimalSize = fixedPortionSize(T)
if r.stream.readable(minimalSize):
readSszBytes(r.stream.read(minimalSize), val)
else:
raise newException(MalformedSszError, "SSZ input of insufficient size")
else:
# TODO(zah) Read the fixed portion first and precisely measure the
# size of the dynamic portion to consume the right number of bytes.
readSszBytes(r.stream.read(r.stream.len.get), val)

proc readSszBytes*[T](data: openArray[byte], val: var T) {.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was really intended as an eth2 specific thing - but one thought I've had is to remove nim-serialization completely from the process so as to have a stand-alone SSZ reading library that operates on openArray - then nim-ser can be glued on top for any .. uh .. advanced use cases should they materialize.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The codec module already provides this (reading that only depends on the compile-time type metadata aspects of nim-serialization such as the dontSerialize pragma). I don't see any good reason for aiming to drop this dependency.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed, codec was written with this in mind and readSszBytes as well, to a certain extent, and it should perhaps be moved there so that when using only readSszBytes, the symbol table isn't infected with the rest of nim-ser - the aim is to get rid of some of the generic instantiation code and other reader/writer magic that isn't really relevant when reading from openArray - this is also "usually" a good way to factor this kind of code into orthogonal responsibilities anyway: one layer that deals with concrete raw byte stuff and another layer that deals with magic object transformations and high-level representations and opinions about what should be magic and implicit and what shouldn't be.

raises: [Defect, MalformedSszError, SszSizeMismatchError].} =
# Overload `readSszBytes` to perform custom operations on T after
# deserialization
mixin readSszValue
readSszValue(data, val)
13 changes: 9 additions & 4 deletions ssz_serialization.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ skipDirs = @["tests"]

requires "nim >= 1.2.0",
"serialization",
"stew"
"json_serialization",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is an .. unfortunate compromise - ie an ssz library shouldn't depend on json, yet nim:s / nim-ser:s import model with open generics and different behavior, even within an application, depending on imports really leaves no other practical option than this - cc @zah

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for example, importing jsonser/std/option instead of std/option to get sane logging vs a memory dump is a trivial example of this, where the consequences are contained to ugly logs - it's however not humanly possible to keep track of whether something is imported or not in every place that matters specially when sandwitches come into play - it gets a lot worse when trying to implement a standard where a particular format is mandated.

Copy link
Contributor

@zah zah Oct 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Err, why is this dependency necessary? The correct action here is just to remove it IMO.

The JSON serialization definitions of HashList, etc can sit in modules such as eth2_rest_serialization, eth2_json_serialization, etc.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you are trying to provide an excuse for why you structured the code this way.

I think some problems just have to be addressed at the language level, because these inappropriate compromises can get you only thus far - you cannot fix the options module for example (or any other module developed by a third party).

I've suggested one possible language solution multiple times, the last one has been here:
nim-lang/RFCs#380 (comment)

Copy link
Member

@arnetheduck arnetheduck Oct 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, in fantasy-land, anything is possible.

In Nim, the best thing that could happen with the current, arguably broken nim-ser design, would actually be that json-ser includes serializers for all std types it supports out-of-the box without magic imports - or at least the "common" ones, and libraries that are used with nim-json-ser or some other ser always declare support for those serialization formats in the same module as the type itself is declared - anything else doesn't work, in actual real code - for this reason, the best thing this library can do is to depend on json-ser because json-ser is more "commonly" used than ssz.

The other, better option is that nim-ser and json-ser no longer contain generic catch-alls for objects at all: every type you want to serialize, you are forced to declare support for it up-front - this doesn't require any language changes: it simply requires that under no circumstances is there something that matches object (and tuple of course).

This way, when you "forget" to import the right serializer, you get a compile error instead of a runtime explosion. It's 100% wrong that objects are automatically serialized this way - the only thing that does is make the simple trivial action of adding an import easier, while making the already difficult task of remembering what needs to be imported where impossible. From a design perspective of any library, that's simply wrong - hard things should never be made be made harder, and it's really no loss that if you want to add json-ser support for your type, you need to tell the library "hey, please support this type for me". It can be as trivial as a {.xxx.} annotation or a forwarded call to a "default" serializer for objects.

"stew",
"stint",
"nimcrypto",
"blscurve",
"unittest2"

proc test(env, path: string) =
# Compilation language is controlled by TEST_LANG
Expand All @@ -20,8 +25,8 @@ proc test(env, path: string) =
if not dirExists "build":
mkDir "build"
exec "nim " & lang & " " & env &
" -r --hints:off --warnings:off " & path
" -r --hints:off --warnings:on " & path

task test, "Run all tests":
test "--threads:off", "tests/test_all"
test "--threads:on", "tests/test_all"
test "--threads:off -d:PREFER_BLST_SHA256=false", "tests/test_all"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Building it currently only using the sha256 implementation of nimcrypto as I'm having issues with BLST outside of the nimbus-eth2 env.

test "--threads:on -d:PREFER_BLST_SHA256=false", "tests/test_all"
Loading