diff --git a/.bazel/patches/com_github_ava_labs_firewood_go_ethhash_ffi.patch b/.bazel/patches/com_github_ava_labs_firewood_go_ethhash_ffi.patch index 69cd2974b47f..d7986e5361c0 100644 --- a/.bazel/patches/com_github_ava_labs_firewood_go_ethhash_ffi.patch +++ b/.bazel/patches/com_github_ava_labs_firewood_go_ethhash_ffi.patch @@ -2,7 +2,7 @@ diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 --- /dev/null +++ b/BUILD.bazel -@@ -0,0 +1,59 @@ +@@ -0,0 +1, 59 @@ +# gazelle:ignore +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + diff --git a/.bazel/patches/com_github_ava_labs_libevm.patch b/.bazel/patches/com_github_ava_labs_libevm.patch index aa6372bcec01..951786252e02 100644 --- a/.bazel/patches/com_github_ava_labs_libevm.patch +++ b/.bazel/patches/com_github_ava_labs_libevm.patch @@ -2,7 +2,7 @@ diff --git a/crypto/secp256k1/BUILD.bazel b/crypto/secp256k1/BUILD.bazel new file mode 100644 --- /dev/null +++ b/crypto/secp256k1/BUILD.bazel -@@ -0,0 +1,27 @@ +@@ -0,0 +1, 27 @@ +# gazelle:ignore +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + @@ -34,7 +34,7 @@ diff --git a/crypto/secp256k1/libsecp256k1/BUILD.bazel b/crypto/secp256k1/libsec new file mode 100644 --- /dev/null +++ b/crypto/secp256k1/libsecp256k1/BUILD.bazel -@@ -0,0 +1,9 @@ +@@ -0,0 +1, 9 @@ +# gazelle:ignore +cc_library( + name = "libsecp256k1", diff --git a/.bazel/patches/com_github_supranational_blst.patch b/.bazel/patches/com_github_supranational_blst.patch index ee7528310675..cbf5aa1493bf 100644 --- a/.bazel/patches/com_github_supranational_blst.patch +++ b/.bazel/patches/com_github_supranational_blst.patch @@ -2,7 +2,7 @@ diff --git a/bindings/BUILD.bazel b/bindings/BUILD.bazel new file mode 100644 --- /dev/null +++ b/bindings/BUILD.bazel -@@ -0,0 +1,9 @@ +@@ -0,0 +1, 9 @@ +# gazelle:ignore +# Header library for blst C bindings + @@ -16,7 +16,7 @@ diff --git a/bindings/go/BUILD.bazel b/bindings/go/BUILD.bazel new file mode 100644 --- /dev/null +++ b/bindings/go/BUILD.bazel -@@ -0,0 +1,60 @@ +@@ -0,0 +1, 60 @@ +# gazelle:ignore +# Go bindings for blst library +# @@ -81,7 +81,7 @@ diff --git a/bindings/vectors/BUILD.bazel b/bindings/vectors/BUILD.bazel new file mode 100644 --- /dev/null +++ b/bindings/vectors/BUILD.bazel -@@ -0,0 +1,8 @@ +@@ -0,0 +1, 8 @@ +# gazelle:ignore +# Test vectors for blst + @@ -94,7 +94,7 @@ diff --git a/build/BUILD.bazel b/build/BUILD.bazel new file mode 100644 --- /dev/null +++ b/build/BUILD.bazel -@@ -0,0 +1,20 @@ +@@ -0,0 +1, 20 @@ +# gazelle:ignore +# Assembly and build file library for blst +# @@ -119,7 +119,7 @@ diff --git a/src/BUILD.bazel b/src/BUILD.bazel new file mode 100644 --- /dev/null +++ b/src/BUILD.bazel -@@ -0,0 +1,10 @@ +@@ -0,0 +1, 10 @@ +# gazelle:ignore +# Source library for blst - headers and C files for unity build + diff --git a/go.work.sum b/go.work.sum index 6d6d0f6c885c..832e6e5684a9 100644 --- a/go.work.sum +++ b/go.work.sum @@ -335,6 +335,7 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310 h1:BUAU3CGlLvorLI26 github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/ava-labs/firewood-go-ethhash/ffi v0.3.0/go.mod h1:71C76bo47zlDX9gWnn3p/0QZQXkTQ/GNqUNI6fchvjs= +github.com/ava-labs/libevm v1.13.14-0.4.0.rc.2/go.mod h1:oyJdZfpQTc9fVzAbDry+QRYeiCbw8s/kGaDUsEMpb4I= github.com/aws/aws-sdk-go-v2 v1.21.2 h1:+LXZ0sgo8quN9UOKXXzAWRT3FWd4NxeXWOZom9pE7GA= github.com/aws/aws-sdk-go-v2 v1.21.2/go.mod h1:ErQhvNuEMhJjweavOYhxVkn2RUx7kQXVATHrjKtxIpM= github.com/aws/aws-sdk-go-v2/config v1.18.45 h1:Aka9bI7n8ysuwPeFdm77nfbyHCAKQ3z9ghB3S/38zes= @@ -381,6 +382,8 @@ github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792 h1:R8vQdOQdZ9Y3 github.com/btcsuite/winsvc v1.0.0 h1:J9B4L7e3oqhXOcm+2IuNApwzQec85lE+QaikUcCs+dk= github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY= github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -575,6 +578,7 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= github.com/guptarohit/asciigraph v0.5.5 h1:ccFnUF8xYIOUPPY3tmdvRyHqmn1MYI9iv1pLKX+/ZkQ= github.com/guptarohit/asciigraph v0.5.5/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag= github.com/hashicorp/consul/api v1.1.0 h1:BNQPM9ytxj6jbjjdRPioQ94T6YXriSopn0i8COv6SRA= @@ -695,6 +699,7 @@ github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGw github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw= github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/goveralls v0.0.2 h1:7eJB6EqsPhRVxvwEXGnqdO2sJI0PTsrWoTMXEk9/OQc= +github.com/mattn/goveralls v0.0.5/go.mod h1:Xg2LHi51faXLyKXwsndxiW6uxEEQT9+3sjGzzwU4xy0= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= @@ -891,17 +896,24 @@ go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7 go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.37.0/go.mod h1:MJTqhM0im3mRLw1i8uGHnCvUEeS7VwRyxlLC78PA18M= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0/go.mod h1:hYwym2nDEeZfG/motx0p7L7J1N1vyzIThemQsb4g2qY= go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU= go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.opentelemetry.io/proto/otlp v1.7.0/go.mod h1:fSKjH6YJ7HDlwzltzyMj036AJ3ejJLCgCSHGj4efDDo= go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -922,6 +934,8 @@ golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZv golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= @@ -931,6 +945,7 @@ golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhp golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028 h1:4+4C/Iv2U4fMZBiMCc98MG1In4gJY5YRhtpDNeDeHWs= golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -949,11 +964,14 @@ golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= +golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= golang.org/x/oauth2 v0.0.0-20170207211851-4464e7848382/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/perf v0.0.0-20230113213139-801c7ef9e5c5 h1:ObuXPmIgI4ZMyQLIz48cJYgSyWdjUXc2SZAdyJMwEAU= golang.org/x/perf v0.0.0-20230113213139-801c7ef9e5c5/go.mod h1:UBKtEnL8aqnd+0JHqZ+2qoMDwtuy6cYhhKNoHLBiTQc= golang.org/x/sync v0.0.0-20220819030929-7fc1605a5dde/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -963,6 +981,8 @@ golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -988,10 +1008,13 @@ golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 h1:LvzTn0GQhWuvKH/kVRS3R3bVAsdQWI7hvfLHGgh9+lU= golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8/go.mod h1:Pi4ztBfryZoJEkyFTI5/Ocsu2jXyDr6iSdgJiYE/uwE= golang.org/x/telemetry v0.0.0-20260209163413-e7419c687ee4 h1:bTLqdHv7xrGlFbvf5/TXNxy/iUwwdkjhqQTJDjW7aj0= golang.org/x/telemetry v0.0.0-20260209163413-e7419c687ee4/go.mod h1:g5NllXBEermZrmR51cJDQxmJUHUOfRAaNyWBM+R+548= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= +golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= @@ -1001,10 +1024,14 @@ golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU= golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20200113040837-eac381796e91/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200317205521-2944c61d58b4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= @@ -1015,6 +1042,7 @@ golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= gonum.org/v1/plot v0.10.0/go.mod h1:JWIHJ7U20drSQb/aDpTetJzfC1KlAPldJLpkSy88dvQ= gonum.org/v1/plot v0.15.2 h1:Tlfh/jBk2tqjLZ4/P8ZIwGrLEWQSPDLRm/SNWKNXiGI= @@ -1037,6 +1065,9 @@ google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go. google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08= google.golang.org/genproto/googleapis/api v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:kXqgZtrWaf6qS3jZOCnCH7WYfrvFjkC51bM8fz3RsCA= google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0/go.mod h1:8ytArBbtOy2xfht+y2fqKd5DRDJRUQhqbyEnQ4bDChs= +google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= +google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:p3MLuOwURrGBRoEyFHBT3GjUwaCQVKeNqqWxlcISGdw= +google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY= google.golang.org/genproto/googleapis/bytestream v0.0.0-20250313205543-e70fdf4c4cb4/go.mod h1:WkJpQl6Ujj3ElX4qZaNm5t6cT95ffI4K+HKQ0+1NyMw= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= @@ -1048,6 +1079,10 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go. google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/genproto/googleapis/rpc v0.0.0-20250818200422-3122310a409c/go.mod h1:gw1tLEfykwDz2ET4a12jcXt4couGAm7IwsVaTy0Sflo= google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401001100-f93e5f3e9f0f/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v0.0.0-20170208002647-2a6bf6142e96/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.66.2/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= @@ -1057,6 +1092,9 @@ google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40Rmc google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM= google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/grpc v1.79.2/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0 h1:M1YKkFIboKNieVO5DLUEVzQfGwJD30Nv2jfUgzb5UcE= google.golang.org/grpc/examples v0.0.0-20230224211313-3775f633ce20/go.mod h1:Nr5H8+MlGWr5+xX/STzdoEqJrO+YteqFbMyCsrb6mH0= google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= @@ -1066,6 +1104,7 @@ google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojt google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= diff --git a/graft/coreth/core/blockchain.go b/graft/coreth/core/blockchain.go index cb3d620d02ea..bb7a72588c97 100644 --- a/graft/coreth/core/blockchain.go +++ b/graft/coreth/core/blockchain.go @@ -2163,6 +2163,12 @@ func (bc *BlockChain) ResetToStateSyncedBlock(block *types.Block) error { bc.chainmu.Lock() defer bc.chainmu.Unlock() + // Persist the block to the raw DB. During dynamic sync the commit-target + // block may not have been fetched by the block syncer if the target + // advanced beyond its initial fetch window. + rawdb.WriteBlock(bc.db, block) + rawdb.WriteCanonicalHash(bc.db, block.Hash(), block.NumberU64()) + // Update head block and snapshot pointers on disk batch := bc.db.NewBatch() if err := bc.batchBlockAcceptedIndices(batch, block); err != nil { diff --git a/graft/coreth/plugin/evm/atomic/state/atomic_trie.go b/graft/coreth/plugin/evm/atomic/state/atomic_trie.go index 250b6cb1adb9..3af734c445ef 100644 --- a/graft/coreth/plugin/evm/atomic/state/atomic_trie.go +++ b/graft/coreth/plugin/evm/atomic/state/atomic_trie.go @@ -238,6 +238,13 @@ func (a *AtomicTrie) LastAcceptedRoot() common.Hash { return a.lastAcceptedRoot } +// ResetToLastCommitted resets lastAcceptedRoot to the last committed root. +// Called between dynamic syncer sessions to clear stale references from a +// cancelled session. +func (a *AtomicTrie) ResetToLastCommitted() { + a.lastAcceptedRoot = a.lastCommittedRoot +} + func (a *AtomicTrie) InsertTrie(nodes *trienode.NodeSet, root common.Hash) error { if nodes != nil { if err := a.trieDB.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil); err != nil { diff --git a/graft/coreth/plugin/evm/atomic/sync/BUILD.bazel b/graft/coreth/plugin/evm/atomic/sync/BUILD.bazel index aaaf2d8e7f01..4c2707834f3c 100644 --- a/graft/coreth/plugin/evm/atomic/sync/BUILD.bazel +++ b/graft/coreth/plugin/evm/atomic/sync/BUILD.bazel @@ -32,6 +32,7 @@ go_library( "@com_github_ava_labs_libevm//core/types", "@com_github_ava_labs_libevm//crypto", "@com_github_ava_labs_libevm//libevm/options", + "@com_github_ava_labs_libevm//log", "@com_github_ava_labs_libevm//metrics", "@com_github_ava_labs_libevm//trie", "@com_github_ava_labs_libevm//triedb", diff --git a/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer.go b/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer.go new file mode 100644 index 000000000000..b7e45864e497 --- /dev/null +++ b/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer.go @@ -0,0 +1,100 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package sync + +import ( + "context" + + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/libevm/log" + + "github.com/ava-labs/avalanchego/database/versiondb" + "github.com/ava-labs/avalanchego/graft/evm/sync/types" + + atomicstate "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm/atomic/state" +) + +const ( + atomicDynamicSyncerName = "Atomic State Syncer (dynamic)" + atomicDynamicSyncerID = "state_atomic_sync" +) + +var _ types.PivotSession = (*atomicPivotSession)(nil) + +// atomicPivotSession implements types.PivotSession for atomic trie sync. +type atomicPivotSession struct { + inner *Syncer + client types.LeafClient + db *versiondb.Database + atomicTrie *atomicstate.AtomicTrie + opts []SyncerOption +} + +func (s *atomicPivotSession) Run(ctx context.Context) error { + return s.inner.Sync(ctx) +} + +func (*atomicPivotSession) ShouldPivot(common.Hash) bool { + // Always returns true because the incoming root is a block root + // (proxy), not the actual atomic root, so comparing it is meaningless. + return true +} + +func (s *atomicPivotSession) Rebuild(newRoot common.Hash, newHeight uint64) (types.PivotSession, error) { + log.Info("atomic syncer pivoting", "newHeight", newHeight, "newRoot", newRoot) + + if err := s.inner.Finalize(); err != nil { + log.Error("failed to flush atomic syncer during pivot", "err", err) + } + s.atomicTrie.ResetToLastCommitted() + + newInner, err := NewSyncer(s.client, s.db, s.atomicTrie, newRoot, newHeight, s.opts...) + if err != nil { + return nil, err + } + return &atomicPivotSession{ + inner: newInner, + client: s.client, + db: s.db, + atomicTrie: s.atomicTrie, + opts: s.opts, + }, nil +} + +func (*atomicPivotSession) OnSessionComplete() error { + return nil +} + +// Finalize flushes the inner syncer's in-progress work. +func (s *atomicPivotSession) Finalize() error { + return s.inner.Finalize() +} + +// NewAtomicDynamicSyncer creates a DynamicSyncer backed by an +// atomicPivotSession. The returned syncer supports pivoting to new +// targets during sync. +func NewAtomicDynamicSyncer( + inner *Syncer, + client types.LeafClient, + db *versiondb.Database, + atomicTrie *atomicstate.AtomicTrie, + initialRoot common.Hash, + initialHeight uint64, + opts ...SyncerOption, +) *types.DynamicSyncer { + session := &atomicPivotSession{ + inner: inner, + client: client, + db: db, + atomicTrie: atomicTrie, + opts: opts, + } + return types.NewDynamicSyncer( + atomicDynamicSyncerName, + atomicDynamicSyncerID, + session, + initialRoot, + initialHeight, + ) +} diff --git a/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer_test.go b/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer_test.go new file mode 100644 index 000000000000..3ccd95edca33 --- /dev/null +++ b/graft/coreth/plugin/evm/atomic/sync/dynamic_syncer_test.go @@ -0,0 +1,51 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package sync + +import ( + "math/rand" + "testing" + + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/libevm/core/rawdb" + "github.com/ava-labs/libevm/triedb" + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm/atomic/state" + "github.com/ava-labs/avalanchego/graft/evm/sync/synctest" + "github.com/ava-labs/avalanchego/graft/evm/sync/types" +) + +// TestAtomicDynamicSyncer_CompletesWithoutPivot verifies that the dynamic +// wrapper completes a full sync when no pivot is triggered. +func TestAtomicDynamicSyncer_CompletesWithoutPivot(t *testing.T) { + targetHeight := 2 * uint64(testCommitInterval) + r := rand.New(rand.NewSource(1)) + serverTrieDB := triedb.NewDatabase(rawdb.NewMemoryDatabase(), nil) + root, _, _ := synctest.GenerateIndependentTrie(t, r, serverTrieDB, int(targetHeight), state.TrieKeyLength) + + ctx, mockClient, atomicBackend, clientDB := setupTestInfrastructure(t, serverTrieDB) + atomicTrie := atomicBackend.AtomicTrie() + + inner, err := NewSyncer(mockClient, clientDB, atomicTrie, root, targetHeight) + require.NoError(t, err) + + ds := NewAtomicDynamicSyncer(inner, mockClient, clientDB, atomicTrie, root, targetHeight) + + require.NoError(t, ds.Sync(ctx)) + require.Equal(t, targetHeight, ds.TargetHeight()) +} + +// TestAtomicDynamicSyncer_UpdateTarget_StaleIgnored verifies that UpdateTarget +// with a height at or below the current desired height is a no-op. +func TestAtomicDynamicSyncer_UpdateTarget_StaleIgnored(t *testing.T) { + ds := types.NewDynamicSyncer("test", "test", &synctest.PivotSession{}, common.Hash{1}, 100) + + require.NoError(t, ds.UpdateTarget(&synctest.SyncTarget{BlockRoot: common.Hash{2}, BlockHeight: 100})) + require.Equal(t, common.Hash{1}, ds.DesiredRoot()) + + require.NoError(t, ds.UpdateTarget(&synctest.SyncTarget{BlockRoot: common.Hash{3}, BlockHeight: 50})) + require.Equal(t, common.Hash{1}, ds.DesiredRoot()) + require.Equal(t, uint64(100), ds.TargetHeight()) +} diff --git a/graft/coreth/plugin/evm/atomic/sync/extender.go b/graft/coreth/plugin/evm/atomic/sync/extender.go index 7eea3800f635..93032ad84e72 100644 --- a/graft/coreth/plugin/evm/atomic/sync/extender.go +++ b/graft/coreth/plugin/evm/atomic/sync/extender.go @@ -6,6 +6,8 @@ package sync import ( "fmt" + "github.com/ava-labs/libevm/log" + "github.com/ava-labs/avalanchego/database/versiondb" "github.com/ava-labs/avalanchego/graft/coreth/plugin/evm/atomic/state" "github.com/ava-labs/avalanchego/graft/evm/message" @@ -14,37 +16,49 @@ import ( // Extender is the sync extender for the atomic VM. type Extender struct { - backend *state.AtomicBackend - trie *state.AtomicTrie - requestSize uint16 // maximum number of leaves to sync in a single request + backend *state.AtomicBackend + trie *state.AtomicTrie + requestSize uint16 // maximum number of leaves to sync in a single request + dynamicEnabled bool // whether to create a dynamic syncer that supports pivoting } // Initialize initializes the sync extender with the backend and trie and request size. -func (e *Extender) Initialize(backend *state.AtomicBackend, trie *state.AtomicTrie, requestSize uint16) { +func (e *Extender) Initialize(backend *state.AtomicBackend, trie *state.AtomicTrie, requestSize uint16, dynamicEnabled bool) { e.backend = backend e.trie = trie e.requestSize = requestSize + e.dynamicEnabled = dynamicEnabled } // CreateSyncer creates the atomic syncer with the given client and verDB. +// When dynamic mode is enabled, wraps the static syncer in an +// AtomicDynamicSyncer that supports pivoting to new targets. func (e *Extender) CreateSyncer(client types.LeafClient, verDB *versiondb.Database, summary message.Syncable) (types.Syncer, error) { atomicSummary, ok := summary.(*Summary) if !ok { return nil, fmt.Errorf("atomic sync extender: expected *Summary, got %T", summary) } + opts := []SyncerOption{WithRequestSize(e.requestSize)} syncer, err := NewSyncer( client, verDB, e.trie, atomicSummary.AtomicRoot, atomicSummary.BlockNumber, - WithRequestSize(e.requestSize), + opts..., ) if err != nil { return nil, fmt.Errorf("atomic.NewSyncer failed: %w", err) } - return syncer, nil + if !e.dynamicEnabled { + return syncer, nil + } + return NewAtomicDynamicSyncer( + syncer, client, verDB, e.trie, + atomicSummary.AtomicRoot, atomicSummary.BlockNumber, + opts..., + ), nil } // OnFinishBeforeCommit implements the sync.Extender interface by marking the previously last accepted block for the shared memory cursor. @@ -61,12 +75,28 @@ func (e *Extender) OnFinishBeforeCommit(lastAcceptedHeight uint64, summary messa // OnFinishAfterCommit implements the sync.Extender interface by applying the atomic trie to the shared memory. func (e *Extender) OnFinishAfterCommit(summaryHeight uint64) error { - // the chain state is already restored, and, from this point on, - // the block synced to is the accepted block. The last operation - // is updating shared memory with the atomic trie. - // ApplyToSharedMemory does this, and, even if the VM is stopped - // (gracefully or ungracefully), since MarkApplyToSharedMemoryCursor - // is called, VM will resume ApplyToSharedMemory on Initialize. + // Check if the atomic trie has complete data up to summaryHeight. + // During dynamic sync the coordinator's commitTarget may be ahead of the + // atomic syncer's fixed target. In that case the trie only covers up to + // the atomic syncer's target, so we must skip ApplyToSharedMemory and + // keep the cursor alive. The gap is filled during batch replay (each + // replayed block's Accept applies atomic ops to shared memory inline) + // and the cursor is cleaned up on the next VM restart via + // NewAtomicBackend -> ApplyToSharedMemory(lastAcceptedHeight). + _, lastCommittedHeight := e.trie.LastCommitted() + if lastCommittedHeight < summaryHeight { + log.Info( + "skipping ApplyToSharedMemory: atomic trie has partial data", + "lastCommittedHeight", lastCommittedHeight, + "summaryHeight", summaryHeight, + ) + return nil + } + + // The atomic trie covers the full range. Apply to shared memory now. + // Even if the VM is stopped (gracefully or ungracefully), since + // MarkApplyToSharedMemoryCursor was called in OnFinishBeforeCommit, + // the VM will resume ApplyToSharedMemory on Initialize. if err := e.backend.ApplyToSharedMemory(summaryHeight); err != nil { return fmt.Errorf("failed to apply atomic trie to shared memory after commit: %w", err) } diff --git a/graft/coreth/plugin/evm/atomic/sync/summary_test.go b/graft/coreth/plugin/evm/atomic/sync/summary_test.go index 42db2338dc2d..ee7391dbb7f3 100644 --- a/graft/coreth/plugin/evm/atomic/sync/summary_test.go +++ b/graft/coreth/plugin/evm/atomic/sync/summary_test.go @@ -23,10 +23,10 @@ func TestMarshalSummary(t *testing.T) { require.Equal(t, uint64(2), atomicSummary.Height()) require.Equal(t, common.Hash{3}, atomicSummary.GetBlockRoot()) - expectedBase64Bytes := "AAAAAAAAAAAAAgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" - require.Equal(t, expectedBase64Bytes, base64.StdEncoding.EncodeToString(atomicSummary.Bytes())) - expectedID := ids.FromStringOrPanic("256pj4a3SBG5kervhxKfeKpNRcVR1xk5BzTpkTkybkM8uMPu6Q") - require.Equal(t, expectedID, atomicSummary.ID()) + wantBase64Bytes := "AAAAAAAAAAAAAgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==" + require.Equal(t, wantBase64Bytes, base64.StdEncoding.EncodeToString(atomicSummary.Bytes())) + wantID := ids.FromStringOrPanic("256pj4a3SBG5kervhxKfeKpNRcVR1xk5BzTpkTkybkM8uMPu6Q") + require.Equal(t, wantID, atomicSummary.ID()) provider := &SummaryProvider{} called := false diff --git a/graft/coreth/plugin/evm/atomic/sync/syncer.go b/graft/coreth/plugin/evm/atomic/sync/syncer.go index ec07f3a41a86..32cff437de12 100644 --- a/graft/coreth/plugin/evm/atomic/sync/syncer.go +++ b/graft/coreth/plugin/evm/atomic/sync/syncer.go @@ -147,6 +147,13 @@ func (s *Syncer) Sync(ctx context.Context) error { return s.syncer.Sync(ctx) } +func (*Syncer) UpdateTarget(message.Syncable) error { + // The atomic syncer does not pivot. It syncs to the initial target from + // the peer summary and relies on batch replay to fill the gap between + // its target and the coordinator's commitTarget. + return nil +} + // Finalize commits any pending database changes to disk. // This ensures that even if the sync is cancelled or fails, we preserve // the progress up to the last fully synced height. diff --git a/graft/coreth/plugin/evm/atomic/sync/syncer_test.go b/graft/coreth/plugin/evm/atomic/sync/syncer_test.go index 2f111734c982..4e41cb25895e 100644 --- a/graft/coreth/plugin/evm/atomic/sync/syncer_test.go +++ b/graft/coreth/plugin/evm/atomic/sync/syncer_test.go @@ -37,10 +37,10 @@ const ( ) type atomicSyncTestCheckpoint struct { - expectedNumLeavesSynced int64 // expected number of leaves to have synced at this checkpoint - leafCutoff int // Number of leafs to sync before cutting off responses - targetRoot common.Hash // Root of trie to resume syncing from after stopping - targetHeight uint64 // Height to sync to after stopping + wantNumLeavesSynced int64 // expected number of leaves to have synced at this checkpoint + leafCutoff int // Number of leafs to sync before cutting off responses + targetRoot common.Hash // Root of trie to resume syncing from after stopping + targetHeight uint64 // Height to sync to after stopping } // TestSyncerScenarios is a parameterized test that covers basic syncing scenarios with different worker configurations. @@ -113,10 +113,10 @@ func TestSyncerResumeScenarios(t *testing.T) { testSyncer(t, serverTrieDB, targetHeight, root, []atomicSyncTestCheckpoint{ { - targetRoot: root, - targetHeight: targetHeight, - leafCutoff: testCommitInterval*5 - 1, - expectedNumLeavesSynced: testCommitInterval * 4, + targetRoot: root, + targetHeight: targetHeight, + leafCutoff: testCommitInterval*5 - 1, + wantNumLeavesSynced: testCommitInterval * 4, }, }, int64(targetHeight)+testCommitInterval-1, tt.numWorkers) // we will resync the last commitInterval - 1 leafs }) @@ -163,10 +163,10 @@ func TestSyncerResumeNewRootCheckpointScenarios(t *testing.T) { testSyncer(t, serverTrieDB, targetHeight1, root1, []atomicSyncTestCheckpoint{ { - targetRoot: root2, - targetHeight: targetHeight2, - leafCutoff: testCommitInterval*5 - 1, - expectedNumLeavesSynced: testCommitInterval * 4, + targetRoot: root2, + targetHeight: targetHeight2, + leafCutoff: testCommitInterval*5 - 1, + wantNumLeavesSynced: testCommitInterval * 4, }, }, int64(targetHeight2)+testCommitInterval-1, tt.numWorkers) // we will resync the last commitInterval - 1 leafs }) @@ -277,7 +277,7 @@ func testSyncer(t *testing.T, serverTrieDB *triedb.Database, targetHeight uint64 err = syncer.Sync(ctx) require.ErrorIs(t, err, leaf.ErrFailedToFetchLeafs) - require.Equal(t, checkpoint.expectedNumLeavesSynced, int64(numLeaves), "unexpected number of leaves received at checkpoint %d", i) + require.Equal(t, checkpoint.wantNumLeavesSynced, int64(numLeaves), "unexpected number of leaves received at checkpoint %d", i) // Replace the target root and height for the next checkpoint targetRoot = checkpoint.targetRoot targetHeight = checkpoint.targetHeight @@ -334,10 +334,10 @@ func testSyncer(t *testing.T, serverTrieDB *triedb.Database, targetHeight uint64 require.NoErrorf(t, addAllKeysWithPrefix(database.PackUInt64(height)), "failed to add keys for height %d", height) if height%testCommitInterval == 0 { - expected := hasher.Hash() + want := hasher.Hash() root, err := atomicTrie.Root(height) require.NoError(t, err) - require.Equal(t, expected, root) + require.Equal(t, want, root) } } } diff --git a/graft/coreth/plugin/evm/atomic/vm/vm.go b/graft/coreth/plugin/evm/atomic/vm/vm.go index 9aa08bef2d2c..1811a12d2da5 100644 --- a/graft/coreth/plugin/evm/atomic/vm/vm.go +++ b/graft/coreth/plugin/evm/atomic/vm/vm.go @@ -243,7 +243,7 @@ func (vm *VM) Initialize( // Atomic backend is available now, we can initialize structs that depend on it atomicTrie := vm.AtomicBackend.AtomicTrie() syncProvider.Initialize(atomicTrie) - syncExtender.Initialize(vm.AtomicBackend, atomicTrie, vm.InnerVM.Config().StateSyncRequestSize) + syncExtender.Initialize(vm.AtomicBackend, atomicTrie, vm.InnerVM.Config().StateSyncRequestSize, vm.InnerVM.Config().StateSyncDynamicEnabled) leafHandler.Initialize(atomicTrie.TrieDB(), atomicstate.TrieKeyLength, message.CorethCodec) vm.SecpCache = secp256k1.NewRecoverCache(secpCacheSize) diff --git a/graft/coreth/plugin/evm/config/config.go b/graft/coreth/plugin/evm/config/config.go index e21f78bf5120..fc4e04deab93 100644 --- a/graft/coreth/plugin/evm/config/config.go +++ b/graft/coreth/plugin/evm/config/config.go @@ -133,8 +133,10 @@ type Config struct { MaxOutboundActiveRequests int64 `json:"max-outbound-active-requests"` // Sync settings - StateSyncEnabled *bool `json:"state-sync-enabled"` // Pointer distinguishes false (no state sync) and not set (state sync only at genesis). - StateSyncSkipResume bool `json:"state-sync-skip-resume"` // Forces state sync to use the highest available summary block + StateSyncEnabled *bool `json:"state-sync-enabled"` // Pointer distinguishes false (no state sync) and not set (state sync only at genesis). + StateSyncSkipResume bool `json:"state-sync-skip-resume"` // Forces state sync to use the highest available summary block + StateSyncDynamicEnabled bool `json:"state-sync-dynamic-enabled"` // Enables dynamic state sync orchestration. + StateSyncPivotInterval uint64 `json:"state-sync-pivot-interval"` // Block interval for forwarding dynamic sync target updates. StateSyncServerTrieCache int `json:"state-sync-server-trie-cache"` StateSyncIDs string `json:"state-sync-ids"` StateSyncCommitInterval uint64 `json:"state-sync-commit-interval"` diff --git a/graft/coreth/plugin/evm/config/config.md b/graft/coreth/plugin/evm/config/config.md index 8ed113fe567c..e2fa12e252a8 100644 --- a/graft/coreth/plugin/evm/config/config.md +++ b/graft/coreth/plugin/evm/config/config.md @@ -249,6 +249,8 @@ Configuration is provided as a JSON object. All fields are optional unless other |--------|------|-------------|---------| | `state-sync-enabled` | bool | Enable state sync | `false` | | `state-sync-skip-resume` | bool | Force state sync to use highest available summary block | `false` | +| `state-sync-dynamic-enabled` | bool | Enable dynamic state sync orchestration (deferred block operations + pivot updates) | `false` | +| `state-sync-pivot-interval` | uint64 | Number of blocks between dynamic sync target updates (used when dynamic sync is enabled) | `10000` | | `state-sync-ids` | string | Comma-separated list of state sync IDs; If not specified (or empty), peers are selected at random.| `""`| | `state-sync-commit-interval` | uint64 | Commit interval for state sync (blocks) | `16384` | | `state-sync-min-blocks` | uint64 | Minimum blocks ahead required for state sync | `300000` | diff --git a/graft/coreth/plugin/evm/config/default_config.go b/graft/coreth/plugin/evm/config/default_config.go index 587a81134147..dff743ec601f 100644 --- a/graft/coreth/plugin/evm/config/default_config.go +++ b/graft/coreth/plugin/evm/config/default_config.go @@ -11,7 +11,10 @@ import ( "github.com/ava-labs/avalanchego/graft/evm/utils" ) -const defaultCommitInterval = 4096 +const ( + defaultCommitInterval = 4096 + defaultStateSyncPivotInterval = 10_000 +) func NewDefaultConfig() Config { return Config{ @@ -77,7 +80,11 @@ func NewDefaultConfig() Config { StateSyncMinBlocks: 300_000, // the number of key/values to ask peers for per request StateSyncRequestSize: 1024, - StateHistory: uint64(32), + // Dynamic state sync is opt-in. + StateSyncDynamicEnabled: false, + // Number of blocks between dynamic sync target updates. + StateSyncPivotInterval: defaultStateSyncPivotInterval, + StateHistory: uint64(32), // Estimated block count in 24 hours with 2s block accept period HistoricalProofQueryWindow: uint64(24 * time.Hour / (2 * time.Second)), // Price Option Defaults diff --git a/graft/coreth/plugin/evm/vm.go b/graft/coreth/plugin/evm/vm.go index 98bdd1c54c78..f974b66c5fdd 100644 --- a/graft/coreth/plugin/evm/vm.go +++ b/graft/coreth/plugin/evm/vm.go @@ -671,17 +671,19 @@ func (vm *VM) initializeStateSync(lastAcceptedHeight uint64) error { BlockParser: vm, }, ), - Enabled: stateSyncEnabled, - SkipResume: vm.config.StateSyncSkipResume, - MinBlocks: vm.config.StateSyncMinBlocks, - RequestSize: vm.config.StateSyncRequestSize, - LastAcceptedHeight: lastAcceptedHeight, // TODO clean up how this is passed around - ChainDB: vm.chaindb, - VerDB: vm.versiondb, - MetadataDB: vm.metadataDB, - Acceptor: vm, - SyncSummaryProvider: vm.extensionConfig.SyncSummaryProvider, - Extender: vm.extensionConfig.SyncExtender, + Enabled: stateSyncEnabled, + SkipResume: vm.config.StateSyncSkipResume, + DynamicStateSyncEnabled: vm.config.StateSyncDynamicEnabled, + PivotInterval: vm.config.StateSyncPivotInterval, + MinBlocks: vm.config.StateSyncMinBlocks, + RequestSize: vm.config.StateSyncRequestSize, + LastAcceptedHeight: lastAcceptedHeight, // TODO clean up how this is passed around + ChainDB: vm.chaindb, + VerDB: vm.versiondb, + MetadataDB: vm.metadataDB, + Acceptor: vm, + SyncSummaryProvider: vm.extensionConfig.SyncSummaryProvider, + Extender: vm.extensionConfig.SyncExtender, }) // If StateSync is disabled, clear any ongoing summary so that we will not attempt to resume diff --git a/graft/coreth/plugin/evm/vmtest/test_syncervm.go b/graft/coreth/plugin/evm/vmtest/test_syncervm.go index 27ba1ac90f91..7a615a305550 100644 --- a/graft/coreth/plugin/evm/vmtest/test_syncervm.go +++ b/graft/coreth/plugin/evm/vmtest/test_syncervm.go @@ -68,8 +68,8 @@ var SyncerVMTests = []SyncerVMTest{ TestFunc: SkipStateSyncTest, }, { - Name: "StateSyncFromScratchTest", - TestFunc: StateSyncFromScratchTest, + Name: "StateSyncFromScratchModesTest", + TestFunc: StateSyncFromScratchModesTest, }, { Name: "StateSyncFromScratchExceedParentTest", @@ -83,6 +83,10 @@ var SyncerVMTests = []SyncerVMTest{ Name: "VMShutdownWhileSyncingTest", TestFunc: VMShutdownWhileSyncingTest, }, + { + Name: "DynamicSyncWithBlockInjectionTest", + TestFunc: DynamicSyncWithBlockInjectionTest, + }, } func SkipStateSyncTest(t *testing.T, testSetup *SyncTestSetup) { @@ -101,18 +105,33 @@ func SkipStateSyncTest(t *testing.T, testSetup *SyncTestSetup) { } } -func StateSyncFromScratchTest(t *testing.T, testSetup *SyncTestSetup) { - test := SyncTestParams{ - SyncableInterval: 256, - StateSyncMinBlocks: 50, // must be less than [syncableInterval] to perform sync - SyncMode: block.StateSyncStatic, +func StateSyncFromScratchModesTest(t *testing.T, testSetup *SyncTestSetup) { + modes := []struct { + name string + syncMode block.StateSyncMode + dynamicStateSyncEnabled bool + stateSyncPivotInterval uint64 + }{ + {"static", block.StateSyncStatic, false, 0}, + {"dynamic", block.StateSyncDynamic, true, 1}, } - for _, scheme := range schemes { - test.StateScheme = scheme - t.Run(scheme, func(t *testing.T) { - testSyncVMSetup := initSyncServerAndClientVMs(t, test, engine.BlocksToFetch, testSetup) - testSyncerVM(t, testSyncVMSetup, test, testSetup.ExtraSyncerVMTest) + for _, mode := range modes { + t.Run(mode.name, func(t *testing.T) { + test := SyncTestParams{ + SyncableInterval: 256, + StateSyncMinBlocks: 50, // must be less than [syncableInterval] to perform sync + SyncMode: mode.syncMode, + DynamicStateSyncEnabled: mode.dynamicStateSyncEnabled, + StateSyncPivotInterval: mode.stateSyncPivotInterval, + } + for _, scheme := range schemes { + test.StateScheme = scheme + t.Run(scheme, func(t *testing.T) { + testSyncVMSetup := initSyncServerAndClientVMs(t, test, engine.BlocksToFetch, testSetup) + testSyncerVM(t, testSyncVMSetup, test, testSetup.ExtraSyncerVMTest) + }) + } }) } } @@ -307,6 +326,137 @@ func VMShutdownWhileSyncingTest(t *testing.T, testSetup *SyncTestSetup) { testSyncerVM(t, testSyncVMSetup, test, testSetup.ExtraSyncerVMTest) } +// DynamicSyncWithBlockInjectionTest verifies that blocks injected during +// dynamic state sync trigger coordinator pivots and that blocks above the +// commit target are batch-replayed after sync completes. +func DynamicSyncWithBlockInjectionTest(t *testing.T, testSetup *SyncTestSetup) { + const ( + syncableInterval = 256 + extraBlockCount = 12 + ) + + for _, scheme := range schemes { + t.Run(scheme, func(t *testing.T) { + var ( + extraBlockBytes [][]byte + mu sync.Mutex + injected bool + ) + fork := upgradetest.Latest + + serverConfig := fmt.Sprintf(`{"commit-interval": %d, "state-history": %d, "state-sync-commit-interval": %d}`, + syncableInterval, syncableInterval, syncableInterval) + serverVM, serverCB := testSetup.NewVM() + serverTest := SetupTestVM(t, serverVM, TestVMConfig{ + Fork: &fork, ConfigJSON: serverConfig, Scheme: scheme, + }) + t.Cleanup(func() { require.NoError(t, serverVM.Shutdown(t.Context())) }) + if testSetup.AfterInit != nil { + testSetup.AfterInit(t, + SyncTestParams{ + SyncableInterval: syncableInterval, + StateScheme: scheme, + }, + SyncVMSetup{ + VM: serverVM, + AppSender: serverTest.AppSender, + SnowCtx: serverTest.Ctx, + ConsensusCallbacks: serverCB, + DB: serverTest.DB, + AtomicMemory: serverTest.AtomicMemory, + }, + true, + ) + } + + // Separate key to avoid exhausting the primary key's balance. + extraBlockGen := func(_ int, vm extension.InnerVM, gen *core.BlockGen) { + br := predicate.BlockResults{} + b, err := br.Bytes() + require.NoError(t, err) + gen.AppendExtra(b) + + tx := types.NewTransaction(gen.TxNonce(TestEthAddrs[1]), TestEthAddrs[0], common.Big1, 21000, InitialBaseFee, nil) + signedTx, err := types.SignTx(tx, types.NewEIP155Signer(vm.Ethereum().BlockChain().Config().ChainID), TestKeys[1].ToECDSA()) + require.NoError(t, err) + gen.AddTx(signedTx) + } + + generateAndAcceptBlocks(t, serverVM, syncableInterval, testSetup.GenFn, nil, serverCB) + generateAndAcceptBlocks(t, serverVM, extraBlockCount, extraBlockGen, + func(blk *types.Block) { + b, err := rlp.EncodeToBytes(blk) + require.NoError(t, err) + extraBlockBytes = append(extraBlockBytes, b) + }, + serverCB, + ) + serverHeight := serverVM.LastAcceptedExtendedBlock().Height() + + syncerConfig := fmt.Sprintf( + `{"state-sync-enabled":true, "state-sync-min-blocks": 50, "tx-lookup-limit": 4, "commit-interval": %d, "state-sync-dynamic-enabled": true, "state-sync-pivot-interval": 1000}`, + syncableInterval) + syncerVM, syncerCB := testSetup.NewVM() + syncerTest := SetupTestVM(t, syncerVM, TestVMConfig{ + Fork: &fork, ConfigJSON: syncerConfig, Scheme: scheme, IsSyncing: true, + }) + t.Cleanup(func() { require.NoError(t, syncerVM.Shutdown(t.Context())) }) + require.NoError(t, syncerVM.SetState(t.Context(), snow.StateSyncing)) + + deadline, _ := t.Deadline() + serverTest.AppSender.SendAppResponseF = func(ctx context.Context, nodeID ids.NodeID, requestID uint32, response []byte) error { + // Inject before the first response. The mutex serializes all + // interceptors so no response flows until injection completes. + go func() { + mu.Lock() + if !injected { + injected = true + for _, blkBytes := range extraBlockBytes { + blk, err := syncerVM.ParseBlock(t.Context(), blkBytes) + require.NoError(t, err) + require.NoError(t, blk.Verify(t.Context())) + require.NoError(t, blk.Accept(t.Context())) + } + } + mu.Unlock() + require.NoError(t, syncerVM.AppResponse(ctx, nodeID, requestID, response)) + }() + return nil + } + require.NoError(t, syncerVM.Connected(t.Context(), serverTest.Ctx.NodeID, client.StateSyncVersion)) + syncerTest.AppSender.SendAppRequestF = func(ctx context.Context, nodeSet set.Set[ids.NodeID], requestID uint32, request []byte) error { + nodeID, hasItem := nodeSet.Pop() + require.True(t, hasItem) + require.NoError(t, serverVM.AppRequest(ctx, nodeID, requestID, deadline, request)) + return nil + } + + summary, err := serverVM.GetLastStateSummary(t.Context()) + require.NoError(t, err) + parsedSummary, err := syncerVM.ParseStateSummary(t.Context(), summary.Bytes()) + require.NoError(t, err) + + syncMode, err := parsedSummary.Accept(t.Context()) + require.NoError(t, err) + require.Equal(t, block.StateSyncDynamic, syncMode) + + msg, err := syncerVM.WaitForEvent(t.Context()) + require.NoError(t, err) + require.Equal(t, commonEng.StateSyncDone, msg) + require.NoError(t, syncerVM.SyncerClient().Error()) + + require.NoError(t, syncerVM.SetState(t.Context(), snow.Bootstrapping)) + // Verify both chain.State and the blockchain agree on the height. + require.Equal(t, serverHeight, syncerVM.LastAcceptedExtendedBlock().Height(), "chain.State height mismatch after block injection") + syncerChain := syncerVM.Ethereum().BlockChain() + require.Equal(t, serverHeight, syncerChain.LastAcceptedBlock().NumberU64(), "blockchain height mismatch after block injection") + require.True(t, syncerChain.HasState(syncerChain.LastAcceptedBlock().Root()), "state unavailable for last accepted block") + + generateAndAcceptBlocks(t, syncerVM, 5, extraBlockGen, nil, syncerCB) + }) + } +} + type SyncTestSetup struct { NewVM func() (extension.InnerVM, dummy.ConsensusCallbacks) // should not be initialized AfterInit func(t *testing.T, testParams SyncTestParams, vmSetup SyncVMSetup, isServer bool) @@ -382,7 +532,14 @@ func initSyncServerAndClientVMs(t *testing.T, test SyncTestParams, numBlocks int // initialise [syncerVM] with blank genesis state // we also override [syncerVM]'s commit interval so the atomic trie works correctly. - stateSyncEnabledJSON := fmt.Sprintf(`{"state-sync-enabled":true, "state-sync-min-blocks": %d, "tx-lookup-limit": %d, "commit-interval": %d}`, test.StateSyncMinBlocks, 4, test.SyncableInterval) + stateSyncEnabledJSON := fmt.Sprintf( + `{"state-sync-enabled":true, "state-sync-min-blocks": %d, "tx-lookup-limit": %d, "commit-interval": %d, "state-sync-dynamic-enabled": %t, "state-sync-pivot-interval": %d}`, + test.StateSyncMinBlocks, + 4, + test.SyncableInterval, + test.DynamicStateSyncEnabled, + test.StateSyncPivotInterval, + ) syncerVM, syncerCB := testSetup.NewVM() syncerTest := SetupTestVM(t, syncerVM, TestVMConfig{ @@ -495,12 +652,14 @@ func (vm *shutdownOnceVM) Shutdown(ctx context.Context) error { // SyncTestParams contains both the actual VMs as well as the parameters with the expected output. type SyncTestParams struct { - responseIntercept func(vm extension.InnerVM, nodeID ids.NodeID, requestID uint32, response []byte) - StateSyncMinBlocks uint64 - SyncableInterval uint64 - SyncMode block.StateSyncMode - StateScheme string - expectedErr error + responseIntercept func(vm extension.InnerVM, nodeID ids.NodeID, requestID uint32, response []byte) + StateSyncMinBlocks uint64 + SyncableInterval uint64 + SyncMode block.StateSyncMode + DynamicStateSyncEnabled bool + StateSyncPivotInterval uint64 + StateScheme string + expectedErr error } func testSyncerVM(t *testing.T, testSyncVMSetup *testSyncVMSetup, test SyncTestParams, extraSyncerVMTest func(t *testing.T, syncerVMSetup SyncVMSetup)) { diff --git a/graft/coreth/plugin/evm/wrapped_block.go b/graft/coreth/plugin/evm/wrapped_block.go index 853e563f3ffc..fe6700fa6866 100644 --- a/graft/coreth/plugin/evm/wrapped_block.go +++ b/graft/coreth/plugin/evm/wrapped_block.go @@ -92,6 +92,18 @@ func (b *wrappedBlock) ID() ids.ID { return b.id } // Accept implements the snowman.Block interface func (b *wrappedBlock) Accept(context.Context) error { + // Notify sync client that engine accepted a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineAccept(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block accept: %w", err) + } + if deferred { + return nil + } + } + vm := b.vm // Although returning an error from Accept is considered fatal, it is good // practice to cleanup the batch we were modifying in the case of an error. @@ -174,6 +186,18 @@ func (b *wrappedBlock) handlePrecompileAccept(rules extras.Rules) error { // Reject implements the snowman.Block interface // If [b] contains an atomic transaction, attempt to re-issue it func (b *wrappedBlock) Reject(context.Context) error { + // Notify sync client that engine rejected a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineReject(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block reject: %w", err) + } + if deferred { + return nil + } + } + blkID := b.ID() log.Debug("rejecting block", "hash", blkID.Hex(), @@ -206,6 +230,18 @@ func (b *wrappedBlock) Timestamp() time.Time { // Verify implements the snowman.Block interface func (b *wrappedBlock) Verify(context.Context) error { + // Notify sync client that engine verified a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineVerify(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block verify: %w", err) + } + if deferred { + return nil + } + } + return b.verify(&precompileconfig.PredicateContext{ SnowCtx: b.vm.ctx, ProposerVMBlockCtx: nil, @@ -238,6 +274,18 @@ func (b *wrappedBlock) ShouldVerifyWithContext(context.Context) (bool, error) { // VerifyWithContext implements the block.WithVerifyContext interface func (b *wrappedBlock) VerifyWithContext(_ context.Context, proposerVMBlockCtx *block.Context) error { + // Notify sync client that engine verified a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineVerify(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block verify: %w", err) + } + if deferred { + return nil + } + } + return b.verify(&precompileconfig.PredicateContext{ SnowCtx: b.vm.ctx, ProposerVMBlockCtx: proposerVMBlockCtx, diff --git a/graft/evm/firewood/BUILD.bazel b/graft/evm/firewood/BUILD.bazel index 9639cc70d6bf..1dbb9c3bac02 100644 --- a/graft/evm/firewood/BUILD.bazel +++ b/graft/evm/firewood/BUILD.bazel @@ -6,6 +6,7 @@ go_library( srcs = [ "account_trie.go", "base_trie.go", + "iterator.go", "metrics.go", "reconstructed_state.go", "reconstructed_trie.go", diff --git a/graft/evm/message/block_sync_summary_test.go b/graft/evm/message/block_sync_summary_test.go index c4156c1b699b..6e73fcde593a 100644 --- a/graft/evm/message/block_sync_summary_test.go +++ b/graft/evm/message/block_sync_summary_test.go @@ -25,8 +25,8 @@ func TestMarshalBlockSyncSummary(t *testing.T) { require.Equal(t, uint64(2), blockSyncSummary.Height()) require.Equal(t, common.Hash{3}, blockSyncSummary.GetBlockRoot()) - expectedBase64Bytes := "AAAAAAAAAAAAAgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - require.Equal(t, expectedBase64Bytes, base64.StdEncoding.EncodeToString(blockSyncSummary.Bytes())) + wantBase64Bytes := "AAAAAAAAAAAAAgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + require.Equal(t, wantBase64Bytes, base64.StdEncoding.EncodeToString(blockSyncSummary.Bytes())) provider := message.NewBlockSyncSummaryProvider(c) called := false diff --git a/graft/evm/sync/README.md b/graft/evm/sync/README.md index f830c3921b20..321b821177ac 100644 --- a/graft/evm/sync/README.md +++ b/graft/evm/sync/README.md @@ -39,7 +39,9 @@ This package (`evm/sync`) contains the shared state sync implementation used by - `leaf`: Generic leaf syncer (`CallbackSyncer`) that iterates trie ranges and invokes callbacks on each batch of received leafs. - `block`: Syncs parent blocks of the syncable block. - `code`: Syncs contract code referenced in the account trie, with concurrent workers and a persistent queue. -- `engine`: `SyncerRegistry` orchestrates execution of multiple `Syncer` implementations (EVM state, blocks, code, and VM-specific syncers like the atomic trie). +- `engine`: `SyncerRegistry` orchestrates execution of multiple `Syncer` implementations (EVM state, blocks, code, and VM-specific syncers like the atomic trie). The engine supports: + - Static mode (default): run syncers to completion, then finalize. + - Dynamic mode (opt-in): allow sync target updates while sync is running, defer block operations, and replay deferred operations after finalization. - `synctest`: Shared test utilities for trie and block generation. - `syncutils`: Iterator utilities for trie traversal. @@ -66,6 +68,15 @@ The above information is called a _state summary_, and each syncable block corre 1. The VM sends `common.StateSyncDone` on the `toEngine` channel on completion. 1. The engine calls `VM.SetState(Bootstrapping)`. Then, blocks after the syncable block are processed one by one. +### Engine execution modes + +`evm/sync/engine` has two execution modes: + +- Static mode: default behavior, equivalent to the original state sync flow. +- Dynamic mode: syncers continue running while accepted blocks can advance the sync target (with pivot throttling), and block operations (`Accept`, `Reject`, `Verify`) are deferred and replayed in FIFO order after sync finalization. + +Unless explicitly enabled by VM configuration plumbing, state sync runs in static mode. + ## Syncing state The following steps are executed by the VM to sync its state from peers: diff --git a/graft/evm/sync/block/BUILD.bazel b/graft/evm/sync/block/BUILD.bazel index 534f3472e810..feb474daa942 100644 --- a/graft/evm/sync/block/BUILD.bazel +++ b/graft/evm/sync/block/BUILD.bazel @@ -7,6 +7,7 @@ go_library( importpath = "github.com/ava-labs/avalanchego/graft/evm/sync/block", visibility = ["//visibility:public"], deps = [ + "//graft/evm/message", "//graft/evm/sync/client", "//graft/evm/sync/types", "@com_github_ava_labs_libevm//common", diff --git a/graft/evm/sync/block/syncer.go b/graft/evm/sync/block/syncer.go index f4f7fcff1ed3..41463a60b583 100644 --- a/graft/evm/sync/block/syncer.go +++ b/graft/evm/sync/block/syncer.go @@ -7,33 +7,48 @@ import ( "context" "errors" "fmt" + "sync" "github.com/ava-labs/libevm/common" "github.com/ava-labs/libevm/core/rawdb" "github.com/ava-labs/libevm/ethdb" "github.com/ava-labs/libevm/log" + "github.com/ava-labs/avalanchego/graft/evm/message" "github.com/ava-labs/avalanchego/graft/evm/sync/client" "github.com/ava-labs/avalanchego/graft/evm/sync/types" ) -const blocksPerRequest = 32 +const ( + blocksPerRequest = 32 + + // SyncerID is the stable identifier for the block syncer. + SyncerID = "state_block_sync" +) var ( - _ types.Syncer = (*BlockSyncer)(nil) + _ types.Syncer = (*Syncer)(nil) errBlocksToFetchRequired = errors.New("blocksToFetch must be > 0") errFromHashRequired = errors.New("fromHash must be non-zero when fromHeight > 0") ) -type BlockSyncer struct { +type syncTarget struct { + hash common.Hash + height uint64 +} + +type Syncer struct { db ethdb.Database client client.Client fromHash common.Hash fromHeight uint64 blocksToFetch uint64 + + targetMu sync.Mutex + latestTarget *syncTarget } -func NewSyncer(client client.Client, db ethdb.Database, fromHash common.Hash, fromHeight uint64, blocksToFetch uint64) (*BlockSyncer, error) { +func NewSyncer(client client.Client, db ethdb.Database, fromHash common.Hash, fromHeight uint64, blocksToFetch uint64) (*Syncer, error) { if blocksToFetch == 0 { return nil, errBlocksToFetchRequired } @@ -42,7 +57,7 @@ func NewSyncer(client client.Client, db ethdb.Database, fromHash common.Hash, fr return nil, errFromHashRequired } - return &BlockSyncer{ + return &Syncer{ client: client, db: db, fromHash: fromHash, @@ -52,45 +67,93 @@ func NewSyncer(client client.Client, db ethdb.Database, fromHash common.Hash, fr } // Name returns the human-readable name for this sync task. -func (*BlockSyncer) Name() string { +func (*Syncer) Name() string { return "Block Syncer" } // ID returns the stable identifier for this sync task. -func (*BlockSyncer) ID() string { - return "state_block_sync" +func (*Syncer) ID() string { + return SyncerID +} + +// Sync fetches blocks for the initial target and, if a materially newer +// target arrived via UpdateTarget during the first pass, performs one +// bounded catch-up pass. At most two passes are executed per Sync call. +func (s *Syncer) Sync(ctx context.Context) error { + if err := s.syncWindow(ctx, s.fromHash, s.fromHeight); err != nil { + return err + } + + catchUp := s.consumeTarget(s.fromHeight) + if catchUp == nil { + return nil + } + return s.syncWindow(ctx, catchUp.hash, catchUp.height) } -// Sync fetches (up to) BlocksToFetch blocks from peers -// using Client and writes them to disk. -// the process begins with FromHash and it fetches parents recursively. -// fetching starts from the first ancestor not found on disk +// UpdateTarget records a newer sync target. It is thread-safe, non-blocking, +// and monotonic - targets at or below the current ceiling are ignored. +func (s *Syncer) UpdateTarget(newTarget message.Syncable) error { + s.targetMu.Lock() + defer s.targetMu.Unlock() + + newHeight := newTarget.Height() + ceiling := s.fromHeight + if s.latestTarget != nil && s.latestTarget.height > ceiling { + ceiling = s.latestTarget.height + } + if newHeight <= ceiling { + return nil + } + s.latestTarget = &syncTarget{ + hash: newTarget.GetBlockHash(), + height: newHeight, + } + return nil +} + +// consumeTarget atomically reads and clears latestTarget if the drift from +// passStartHeight is material (greater than blocksToFetch). Returns nil +// when no catch-up is needed. +func (s *Syncer) consumeTarget(passStartHeight uint64) *syncTarget { + s.targetMu.Lock() + defer s.targetMu.Unlock() + + t := s.latestTarget + // A catch-up pass is only worthwhile when the new target has drifted + // beyond what the previous pass already covered (blocksToFetch). + driftExceedsWindow := t != nil && t.height > passStartHeight && t.height-passStartHeight > s.blocksToFetch + if !driftExceedsWindow { + return nil + } + s.latestTarget = nil + return t +} + +// syncWindow fetches up to blocksToFetch blocks ending at targetHash/targetHeight. +// Blocks already on disk are skipped - remaining blocks are fetched from peers. // -// TODO: We could inspect the database more accurately to ensure we never fetch -// any blocks that are locally available. -// We could also prevent overrequesting blocks, if the number of blocks needed -// to be fetched isn't a multiple of blocksPerRequest. -func (s *BlockSyncer) Sync(ctx context.Context) error { - nextHash := s.fromHash - nextHeight := s.fromHeight +// TODO(powerslider): We could inspect the database more accurately to ensure we never fetch +// any blocks that are locally available. We could also prevent overrequesting blocks, if +// the number of blocks needed to be fetched isn't a multiple of blocksPerRequest. +func (s *Syncer) syncWindow(ctx context.Context, targetHash common.Hash, targetHeight uint64) error { + nextHash := targetHash + nextHeight := targetHeight blocksToFetch := s.blocksToFetch - // first, check for blocks already available on disk so we don't + // First, check for blocks already available on disk so we don't // request them from peers. for blocksToFetch > 0 { blk := rawdb.ReadBlock(s.db, nextHash, nextHeight) if blk == nil { - // block was not found break } - - // block exists nextHash = blk.ParentHash() nextHeight-- blocksToFetch-- } - // get any blocks we couldn't find on disk from peers and write + // Fetch any blocks we couldn't find on disk from peers and write // them to disk. batch := s.db.NewBatch() for fetched := uint64(0); fetched < blocksToFetch && (nextHash != common.Hash{}); { diff --git a/graft/evm/sync/block/syncer_test.go b/graft/evm/sync/block/syncer_test.go index 564445dd9717..5a4bde250aca 100644 --- a/graft/evm/sync/block/syncer_test.go +++ b/graft/evm/sync/block/syncer_test.go @@ -25,74 +25,65 @@ import ( ) func TestBlockSyncer_ParameterizedTests(t *testing.T) { + const defaultFromHeight = uint64(10) + tests := []struct { - name string - numBlocks int - prePopulateBlocks []int - fromHeight uint64 - blocksToFetch uint64 - expectedBlocks []int - verifyZeroBlocksReceived bool + name string + prePopulateBlocks []int + fromHeight uint64 + blocksToFetch uint64 + wantBlocks []int }{ { - name: "normal case - all blocks retrieved from network", - numBlocks: 10, - fromHeight: 5, - blocksToFetch: 3, - expectedBlocks: []int{3, 4, 5}, + name: "normal case - all blocks retrieved from network", + fromHeight: 5, + blocksToFetch: 3, + wantBlocks: []int{3, 4, 5}, }, { - name: "all blocks already available", - numBlocks: 10, - prePopulateBlocks: []int{3, 4, 5}, - fromHeight: 5, - blocksToFetch: 3, - expectedBlocks: []int{3, 4, 5}, - verifyZeroBlocksReceived: true, + name: "all blocks already available", + prePopulateBlocks: []int{3, 4, 5}, + fromHeight: 5, + blocksToFetch: 3, + wantBlocks: []int{3, 4, 5}, }, { name: "some blocks already available", - numBlocks: 10, prePopulateBlocks: []int{4, 5}, fromHeight: 5, blocksToFetch: 3, - expectedBlocks: []int{3, 4, 5}, + wantBlocks: []int{3, 4, 5}, }, { name: "most recent block missing", - numBlocks: 10, prePopulateBlocks: []int{3, 4}, fromHeight: 5, blocksToFetch: 3, - expectedBlocks: []int{3, 4, 5}, + wantBlocks: []int{3, 4, 5}, }, { - name: "edge case - from height 1", - numBlocks: 10, - fromHeight: 1, - blocksToFetch: 1, - expectedBlocks: []int{1}, + name: "edge case - from height 1", + fromHeight: 1, + blocksToFetch: 1, + wantBlocks: []int{1}, }, { - name: "single block sync", - numBlocks: 10, - fromHeight: 7, - blocksToFetch: 1, - expectedBlocks: []int{7}, + name: "single block sync", + fromHeight: 7, + blocksToFetch: 1, + wantBlocks: []int{7}, }, { - name: "large sync - many blocks", - numBlocks: 50, - fromHeight: 40, - blocksToFetch: 35, - expectedBlocks: []int{6, 10, 20, 30, 40}, + name: "large sync - many blocks", + fromHeight: 40, + blocksToFetch: 35, + wantBlocks: []int{6, 10, 20, 30, 40}, }, { - name: "fetch genesis block", - numBlocks: 10, - fromHeight: 10, - blocksToFetch: 30, - expectedBlocks: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + name: "fetch genesis block", + fromHeight: 10, + blocksToFetch: 30, + wantBlocks: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, }, } @@ -100,7 +91,12 @@ func TestBlockSyncer_ParameterizedTests(t *testing.T) { messagetest.ForEachCodec(t, func(c codec.Manager, _ message.LeafsRequestType) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - env := newTestEnvironment(t, tt.numBlocks, c) + numBlocks := int(defaultFromHeight) + if tt.fromHeight > defaultFromHeight { + numBlocks = int(tt.fromHeight) + } + + env := newTestEnvironment(t, numBlocks, c) require.NoError(t, env.prePopulateBlocks(tt.prePopulateBlocks)) syncer, err := env.createSyncer(tt.fromHeight, tt.blocksToFetch) @@ -108,12 +104,7 @@ func TestBlockSyncer_ParameterizedTests(t *testing.T) { require.NoError(t, syncer.Sync(t.Context())) - env.verifyBlocksInDB(t, tt.expectedBlocks) - - if tt.verifyZeroBlocksReceived { - // Client should not have received any block requests since all blocks were on disk - require.Zero(t, env.client.BlocksReceived()) - } + env.verifyBlocksInDB(t, tt.wantBlocks) }) }) } @@ -135,6 +126,21 @@ func TestBlockSyncer_ContextCancellation(t *testing.T) { }) } +func TestBlockSyncer_NoNetworkRequestsWhenBlocksAlreadyOnDisk(t *testing.T) { + t.Parallel() + + messagetest.ForEachCodec(t, func(c codec.Manager, _ message.LeafsRequestType) { + env := newTestEnvironment(t, 10, c) + require.NoError(t, env.prePopulateBlocks([]int{3, 4, 5})) + + syncer, err := env.createSyncer(5, 3) + require.NoError(t, err) + + require.NoError(t, syncer.Sync(t.Context())) + require.Zero(t, env.client.BlocksReceived()) + }) +} + // testEnvironment provides an abstraction for setting up block syncer tests type testEnvironment struct { chainDB ethdb.Database @@ -183,8 +189,8 @@ func newTestEnvironment(t *testing.T, numBlocks int, c codec.Manager) *testEnvir func (e *testEnvironment) prePopulateBlocks(blockHeights []int) error { batch := e.chainDB.NewBatch() for _, height := range blockHeights { - if height <= len(e.blocks) { - // blocks[0] is block number 1, blocks[1] is block number 2, etc. + if height < len(e.blocks) { + // Generated test blocks are indexed by height. block := e.blocks[height] rawdb.WriteBlock(batch, block) rawdb.WriteCanonicalHash(batch, block.Hash(), block.NumberU64()) @@ -194,7 +200,7 @@ func (e *testEnvironment) prePopulateBlocks(blockHeights []int) error { } // createSyncer creates a block syncer with the given configuration -func (e *testEnvironment) createSyncer(fromHeight uint64, blocksToFetch uint64) (*BlockSyncer, error) { +func (e *testEnvironment) createSyncer(fromHeight uint64, blocksToFetch uint64) (*Syncer, error) { if fromHeight > uint64(len(e.blocks)) { return nil, fmt.Errorf("fromHeight %d exceeds available blocks %d", fromHeight, len(e.blocks)) } @@ -209,11 +215,11 @@ func (e *testEnvironment) createSyncer(fromHeight uint64, blocksToFetch uint64) } // verifyBlocksInDB checks that the expected blocks are present in the database (by block height) -func (e *testEnvironment) verifyBlocksInDB(t *testing.T, expectedBlockHeights []int) { +func (e *testEnvironment) verifyBlocksInDB(t *testing.T, wantBlockHeights []int) { t.Helper() // Verify expected blocks are present - for _, height := range expectedBlockHeights { + for _, height := range wantBlockHeights { if height >= len(e.blocks) { continue } @@ -223,3 +229,76 @@ func (e *testEnvironment) verifyBlocksInDB(t *testing.T, expectedBlockHeights [] require.Equal(t, block.Hash(), dbBlock.Hash(), "Block %d hash mismatch", height) } } + +func TestUpdateTarget_Monotonic(t *testing.T) { + t.Parallel() + + syncer, err := NewSyncer(nil, rawdb.NewMemoryDatabase(), common.HexToHash("0x1"), 100, 10) + require.NoError(t, err) + + // At or below fromHeight: ignored. + require.NoError(t, syncer.UpdateTarget(&synctest.SyncTarget{BlockHash: common.HexToHash("0xa"), BlockHeight: 100})) + require.Nil(t, syncer.latestTarget) + + // Higher: accepted. + require.NoError(t, syncer.UpdateTarget(&synctest.SyncTarget{BlockHash: common.HexToHash("0xb"), BlockHeight: 200})) + require.Equal(t, uint64(200), syncer.latestTarget.height) + + // Stale or equal: ignored. + require.NoError(t, syncer.UpdateTarget(&synctest.SyncTarget{BlockHash: common.HexToHash("0xc"), BlockHeight: 150})) + require.NoError(t, syncer.UpdateTarget(&synctest.SyncTarget{BlockHash: common.HexToHash("0xd"), BlockHeight: 200})) + require.Equal(t, common.HexToHash("0xb"), syncer.latestTarget.hash) +} + +func TestSync_CatchUpBehavior(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + fromHeight uint64 + blocksToFetch uint64 + updateHeight uint64 + wantBlocks []int + absentBlocks []int + }{ + { + name: "drift exceeds window - catch-up runs", + fromHeight: 20, + blocksToFetch: 5, + updateHeight: 40, + wantBlocks: []int{16, 17, 18, 19, 20, 36, 37, 38, 39, 40}, + }, + { + name: "drift within window - no catch-up", + fromHeight: 10, + blocksToFetch: 5, + updateHeight: 15, + wantBlocks: []int{6, 7, 8, 9, 10}, + absentBlocks: []int{15}, + }, + } + + for _, tt := range tests { + messagetest.ForEachCodec(t, func(c codec.Manager, _ message.LeafsRequestType) { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + env := newTestEnvironment(t, int(tt.updateHeight), c) + syncer, err := env.createSyncer(tt.fromHeight, tt.blocksToFetch) + require.NoError(t, err) + + require.NoError(t, syncer.UpdateTarget(&synctest.SyncTarget{ + BlockHash: env.blocks[tt.updateHeight].Hash(), + BlockHeight: tt.updateHeight, + })) + require.NoError(t, syncer.Sync(t.Context())) + + env.verifyBlocksInDB(t, tt.wantBlocks) + for _, h := range tt.absentBlocks { + dbBlock := rawdb.ReadBlock(env.chainDB, env.blocks[h].Hash(), uint64(h)) + require.Nil(t, dbBlock, "block %d should not be fetched", h) + } + }) + }) + } +} diff --git a/graft/evm/sync/client/client_test.go b/graft/evm/sync/client/client_test.go index ce93acd01bad..79dc4b915d37 100644 --- a/graft/evm/sync/client/client_test.go +++ b/graft/evm/sync/client/client_test.go @@ -31,8 +31,8 @@ import ( func TestGetCode(t *testing.T) { tests := map[string]struct { - setupRequest func() (requestHashes []common.Hash, testResponse message.CodeResponse, expectedCode [][]byte) - expectedErr error + setupRequest func() (requestHashes []common.Hash, testResponse message.CodeResponse, wantCode [][]byte) + wantErr error }{ "normal": { setupRequest: func() ([]common.Hash, message.CodeResponse, [][]byte) { @@ -43,41 +43,41 @@ func TestGetCode(t *testing.T) { Data: codeSlices, }, codeSlices }, - expectedErr: nil, + wantErr: nil, }, "unexpected code bytes": { - setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, expectedCode [][]byte) { + setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, wantCode [][]byte) { return []common.Hash{{1}}, message.CodeResponse{ Data: [][]byte{{1}}, }, nil }, - expectedErr: errHashMismatch, + wantErr: errHashMismatch, }, "too many code elements returned": { - setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, expectedCode [][]byte) { + setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, wantCode [][]byte) { return []common.Hash{{1}}, message.CodeResponse{ Data: [][]byte{{1}, {2}}, }, nil }, - expectedErr: errInvalidCodeResponseLen, + wantErr: errInvalidCodeResponseLen, }, "too few code elements returned": { - setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, expectedCode [][]byte) { + setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, wantCode [][]byte) { return []common.Hash{{1}}, message.CodeResponse{ Data: [][]byte{}, }, nil }, - expectedErr: errInvalidCodeResponseLen, + wantErr: errInvalidCodeResponseLen, }, "code size is too large": { - setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, expectedCode [][]byte) { + setupRequest: func() (requestHashes []common.Hash, testResponse message.CodeResponse, wantCode [][]byte) { oversizedCode := make([]byte, ethparams.MaxCodeSize+1) codeHash := crypto.Keccak256Hash(oversizedCode) return []common.Hash{codeHash}, message.CodeResponse{ Data: [][]byte{oversizedCode}, }, nil }, - expectedErr: errMaxCodeSizeExceeded, + wantErr: errMaxCodeSizeExceeded, }, } @@ -94,14 +94,14 @@ func TestGetCode(t *testing.T) { }) ctx, cancel := context.WithCancel(t.Context()) defer cancel() - codeHashes, res, expectedCode := test.setupRequest() + codeHashes, res, wantCode := test.setupRequest() responseBytes, err := c.Marshal(message.Version, res) require.NoError(t, err) // Dirty hack required because the client will re-request if it encounters // an error. attempted := false - if test.expectedErr == nil { + if test.wantErr == nil { testNetClient.testResponse(1, nil, responseBytes) } else { testNetClient.testResponse(2, func() { @@ -114,16 +114,16 @@ func TestGetCode(t *testing.T) { } codeBytes, err := stateSyncClient.GetCode(ctx, codeHashes) - require.ErrorIs(t, err, test.expectedErr) + require.ErrorIs(t, err, test.wantErr) // If we expected an error, verify retry behavior and return - if test.expectedErr != nil { + if test.wantErr != nil { require.Equal(t, uint(2), testNetClient.numCalls) return } // Otherwise, require that the result is as expected - require.Len(t, codeBytes, len(expectedCode)) + require.Len(t, codeBytes, len(wantCode)) for i, code := range codeBytes { - require.Equal(t, expectedCode[i], code) + require.Equal(t, wantCode[i], code) } require.Equal(t, uint(1), testNetClient.numCalls) }) @@ -153,7 +153,7 @@ func TestGetBlocks(t *testing.T) { request message.BlockRequest response func(t *testing.T, c codec.Manager, blocksRequestHandler *handlers.BlockRequestHandler, request message.BlockRequest) []byte assertResponse func(t *testing.T, response []*types.Block) - expectedErr error + wantErr error }{ "normal response": { request: message.BlockRequest{ @@ -200,7 +200,7 @@ func TestGetBlocks(t *testing.T) { response: func(_ *testing.T, _ codec.Manager, _ *handlers.BlockRequestHandler, _ message.BlockRequest) []byte { return []byte("gibberish") }, - expectedErr: errUnmarshalResponse, + wantErr: errUnmarshalResponse, }, "invalid value replacing block": { request: message.BlockRequest{ @@ -221,7 +221,7 @@ func TestGetBlocks(t *testing.T) { return responseBytes }, - expectedErr: errUnmarshalResponse, + wantErr: errUnmarshalResponse, }, "incorrect starting point": { request: message.BlockRequest{ @@ -240,7 +240,7 @@ func TestGetBlocks(t *testing.T) { return response }, - expectedErr: errHashMismatch, + wantErr: errHashMismatch, }, "missing link in between blocks": { request: message.BlockRequest{ @@ -263,7 +263,7 @@ func TestGetBlocks(t *testing.T) { return responseBytes }, - expectedErr: errHashMismatch, + wantErr: errHashMismatch, }, "no blocks": { request: message.BlockRequest{ @@ -280,7 +280,7 @@ func TestGetBlocks(t *testing.T) { return responseBytes }, - expectedErr: errEmptyResponse, + wantErr: errEmptyResponse, }, "more than requested blocks": { request: message.BlockRequest{ @@ -299,7 +299,7 @@ func TestGetBlocks(t *testing.T) { return responseBytes }, - expectedErr: errTooManyBlocks, + wantErr: errTooManyBlocks, }, } for name, test := range tests { @@ -322,7 +322,7 @@ func TestGetBlocks(t *testing.T) { defer cancel() responseBytes := test.response(t, c, blocksRequestHandler, test.request) - if test.expectedErr == nil { + if test.wantErr == nil { testNetClient.testResponse(1, nil, responseBytes) } else { attempted := false @@ -335,8 +335,8 @@ func TestGetBlocks(t *testing.T) { } blockResponse, err := stateSyncClient.GetBlocks(ctx, test.request.Hash, test.request.Height, test.request.Parents) - require.ErrorIs(t, err, test.expectedErr) - if test.expectedErr != nil { + require.ErrorIs(t, err, test.wantErr) + if test.wantErr != nil { return } @@ -374,7 +374,7 @@ func TestGetLeafs(t *testing.T) { request func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest response func(t *testing.T, c codec.Manager, handler handlers.LeafRequestHandler, request message.LeafsRequest, leafReqType message.LeafsRequestType) []byte requireResponse func(t *testing.T, response message.LeafsResponse) - expectedErr error + wantErr error }{ "full response for small (single request) trie": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -429,7 +429,7 @@ func TestGetLeafs(t *testing.T) { return response }, - expectedErr: errTooManyLeaves, + wantErr: errTooManyLeaves, }, "partial response to request for entire trie (full leaf limit)": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -558,7 +558,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, "removed first key in response and replaced proof": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -592,7 +592,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, "removed last key in response": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -620,7 +620,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, "removed key from middle of response": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -649,7 +649,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, "corrupted value in middle of response": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -677,7 +677,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, "all proof keys removed from response": { request: func(t *testing.T, leafReqType message.LeafsRequestType) message.LeafsRequest { @@ -706,7 +706,7 @@ func TestGetLeafs(t *testing.T) { require.NoError(t, err) return modifiedResponse }, - expectedErr: errInvalidRangeProof, + wantErr: errInvalidRangeProof, }, } for name, test := range tests { @@ -732,8 +732,8 @@ func TestGetLeafs(t *testing.T) { responseBytes := test.response(t, c, handler, request, leafReqType) response, _, err := parseLeafsResponse(client.codec, request, responseBytes) - require.ErrorIs(t, err, test.expectedErr) - if test.expectedErr != nil { + require.ErrorIs(t, err, test.wantErr) + if test.wantErr != nil { return } diff --git a/graft/evm/sync/code/BUILD.bazel b/graft/evm/sync/code/BUILD.bazel index 28078c588ee9..48ab30536de9 100644 --- a/graft/evm/sync/code/BUILD.bazel +++ b/graft/evm/sync/code/BUILD.bazel @@ -5,6 +5,7 @@ go_library( name = "code", srcs = [ "queue.go", + "sessioned_queue.go", "syncer.go", ], importpath = "github.com/ava-labs/avalanchego/graft/evm/sync/code", @@ -12,6 +13,7 @@ go_library( deps = [ "//graft/evm/message", "//graft/evm/sync/client", + "//graft/evm/sync/session", "//graft/evm/sync/types", "//vms/evm/sync/customrawdb", "@com_github_ava_labs_libevm//common", @@ -26,6 +28,7 @@ graft_go_test( name = "code_test", srcs = [ "queue_test.go", + "sessioned_queue_test.go", "syncer_test.go", ], embed = [":code"], @@ -36,13 +39,13 @@ graft_go_test( "//graft/evm/sync/client", "//graft/evm/sync/handlers", "//graft/evm/sync/handlers/stats", + "//graft/evm/sync/session", "//utils", "//utils/set", "//vms/evm/sync/customrawdb", "@com_github_ava_labs_libevm//common", "@com_github_ava_labs_libevm//core/rawdb", "@com_github_ava_labs_libevm//crypto", - "@com_github_ava_labs_libevm//ethdb", "@com_github_ava_labs_libevm//ethdb/memorydb", "@com_github_stretchr_testify//require", "@org_golang_x_sync//errgroup", diff --git a/graft/evm/sync/code/queue.go b/graft/evm/sync/code/queue.go index 2bc9e18bce48..23634f27cc00 100644 --- a/graft/evm/sync/code/queue.go +++ b/graft/evm/sync/code/queue.go @@ -21,7 +21,7 @@ import ( const defaultQueueCapacity = 5000 var ( - _ types.Finalizer = (*Queue)(nil) + _ types.CodeRequestQueue = (*Queue)(nil) ErrQueueClosed = errors.New("code queue is closed") ) diff --git a/graft/evm/sync/code/syncer.go b/graft/evm/sync/code/syncer.go index ef923b067bb2..338217c59841 100644 --- a/graft/evm/sync/code/syncer.go +++ b/graft/evm/sync/code/syncer.go @@ -20,7 +20,12 @@ import ( "github.com/ava-labs/avalanchego/vms/evm/sync/customrawdb" ) -const defaultNumCodeFetchingWorkers = 5 +const ( + defaultNumCodeFetchingWorkers = 5 + + // SyncerID is the stable identifier for the code syncer. + SyncerID = "state_code_sync" +) var _ types.Syncer = (*Syncer)(nil) @@ -31,25 +36,26 @@ var _ types.Syncer = (*Syncer)(nil) type Syncer struct { db ethdb.Database client client.Client + // Channel of incoming code hash requests provided by the fetcher. codeHashes <-chan common.Hash // Config options. numWorkers int - codeHashesPerReq int // best-effort target size - final batch may be smaller + codeHashesPerReq int // best-effort target size, final batch may be smaller // inFlight tracks code hashes currently being processed to dedupe work // across workers and across repeated queue submissions. inFlight sync.Map // key: common.Hash, value: struct{} } -// codeSyncerConfig carries construction-time options for code syncer. +// syncerConfig carries construction-time options for the code syncer. type syncerConfig struct { numWorkers int codeHashesPerReq int } -// CodeSyncerOption configures CodeSyncer at construction time. +// SyncerOption configures the code syncer at construction time. type SyncerOption = options.Option[syncerConfig] // WithNumWorkers overrides the number of concurrent workers. @@ -72,8 +78,6 @@ func WithCodeHashesPerRequest(n int) SyncerOption { }) } -// NewSyncer allows external packages (e.g., registry wiring) to create a code syncer -// that consumes hashes from a provided fetcher queue. func NewSyncer(client client.Client, db ethdb.Database, codeHashes <-chan common.Hash, opts ...SyncerOption) (*Syncer, error) { cfg := syncerConfig{ numWorkers: defaultNumCodeFetchingWorkers, @@ -91,39 +95,43 @@ func NewSyncer(client client.Client, db ethdb.Database, codeHashes <-chan common } // Name returns the human-readable name for this sync task. -func (*Syncer) Name() string { - return "Code Syncer" -} +func (*Syncer) Name() string { return "Code Syncer" } // ID returns the stable identifier for this sync task. -func (*Syncer) ID() string { - return "state_code_sync" -} +func (*Syncer) ID() string { return SyncerID } -// Sync starts the worker thread and populates the code hashes queue with active work. -// Blocks until all outstanding code requests from a previous sync have been -// fetched and the code channel has been closed, or the context is cancelled. +// Sync starts the worker threads and blocks until all outstanding code +// requests have been fetched and the code channel has been closed, or +// the context is cancelled. func (c *Syncer) Sync(ctx context.Context) error { eg, egCtx := errgroup.WithContext(ctx) - - // Start NumCodeFetchingWorkers threads to fetch code from the network. for range c.numWorkers { - eg.Go(func() error { return c.work(egCtx) }) + eg.Go(func() error { return c.work(egCtx, c.codeHashes) }) } - return eg.Wait() } -// work fulfills any incoming requests from the producer channel by fetching code bytes from the network -// and fulfilling them by updating the database. -func (c *Syncer) work(ctx context.Context) error { +func (*Syncer) UpdateTarget(message.Syncable) error { + return nil +} + +func (c *Syncer) releaseInFlight(codeHashes []common.Hash) { + for _, h := range codeHashes { + c.inFlight.Delete(h) + } +} + +// work fulfills incoming requests from the producer channel by fetching code +// bytes from the network and persisting them to the database. +func (c *Syncer) work(ctx context.Context, codeHashesCh <-chan common.Hash) error { codeHashes := make([]common.Hash, 0, message.MaxCodeHashesPerRequest) for { select { case <-ctx.Done(): // If ctx is done, set the error to the ctx error since work has been cancelled. + c.releaseInFlight(codeHashes) return ctx.Err() - case codeHash, ok := <-c.codeHashes: + case codeHash, ok := <-codeHashesCh: // If there are no more [codeHashes], fulfill a last code request for any [codeHashes] previously // read from the channel, then return. if !ok { @@ -171,11 +179,12 @@ func (c *Syncer) work(ctx context.Context) error { } } -// fulfillCodeRequest sends a request for [codeHashes], writes the result to the database, and -// marks the work as complete. -// codeHashes should not be empty or contain duplicate hashes. -// Returns an error if one is encountered, signaling the worker thread to terminate. +// fulfillCodeRequest sends a request for codeHashes, writes the result to the +// database, and marks the work as complete. Returns an error if one is +// encountered, signaling the worker thread to terminate. func (c *Syncer) fulfillCodeRequest(ctx context.Context, codeHashes []common.Hash) error { + defer c.releaseInFlight(codeHashes) + codeByteSlices, err := c.client.GetCode(ctx, codeHashes) if err != nil { return err @@ -192,10 +201,5 @@ func (c *Syncer) fulfillCodeRequest(ctx context.Context, codeHashes []common.Has if err := batch.Write(); err != nil { return fmt.Errorf("failed to write batch for fulfilled code requests: %w", err) } - // After successfully committing to the database, release in-flight ownership - // so that subsequent work for these hashes can be considered again if needed. - for _, codeHash := range codeHashes { - c.inFlight.Delete(codeHash) - } return nil } diff --git a/graft/evm/sync/engine/BUILD.bazel b/graft/evm/sync/engine/BUILD.bazel index 1b32c6add0af..993c67e10a01 100644 --- a/graft/evm/sync/engine/BUILD.bazel +++ b/graft/evm/sync/engine/BUILD.bazel @@ -4,10 +4,15 @@ load("//.bazel:defs.bzl", "graft_go_test") go_library( name = "engine", srcs = [ + "block_queue.go", "client.go", + "coordinator.go", + "executor_dynamic.go", "executor_static.go", + "pivot_policy.go", "registry.go", "server.go", + "sync_target.go", ], importpath = "github.com/ava-labs/avalanchego/graft/evm/sync/engine", visibility = ["//visibility:public"], @@ -32,6 +37,7 @@ go_library( "@com_github_ava_labs_libevm//common", "@com_github_ava_labs_libevm//core/types", "@com_github_ava_labs_libevm//ethdb", + "@com_github_ava_labs_libevm//libevm/options", "@com_github_ava_labs_libevm//log", "@com_github_ava_labs_libevm//params", "@com_github_ava_labs_libevm//triedb", @@ -42,7 +48,11 @@ go_library( graft_go_test( name = "engine_test", srcs = [ + "block_queue_test.go", + "coordinator_test.go", "doubles_test.go", + "executor_dynamic_test.go", + "pivot_policy_test.go", "registry_test.go", ], embed = [":engine"], @@ -53,6 +63,7 @@ graft_go_test( "//graft/evm/sync/types", "//graft/evm/utils/utilstest", "@com_github_ava_labs_libevm//common", + "@com_github_ava_labs_libevm//core/types", "@com_github_stretchr_testify//require", ], ) diff --git a/graft/evm/sync/engine/block_queue.go b/graft/evm/sync/engine/block_queue.go new file mode 100644 index 000000000000..6c52b82e0665 --- /dev/null +++ b/graft/evm/sync/engine/block_queue.go @@ -0,0 +1,177 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "sync" + + "github.com/ava-labs/libevm/common" +) + +// BlockOperationType represents the type of operation to perform on a block. +type BlockOperationType int + +const ( + OpAccept BlockOperationType = iota + OpReject + OpVerify +) + +// String returns the string representation of the block operation. +func (op BlockOperationType) String() string { + switch op { + case OpAccept: + return "accept" + case OpReject: + return "reject" + case OpVerify: + return "verify" + default: + return "unknown" + } +} + +// blockOperation represents a queued block operation. +type blockOperation struct { + block EthBlockWrapper + operation BlockOperationType +} + +// blockQueue buffers block operations (accept/reject/verify) that arrive while +// the coordinator is in the Running state. Operations are processed in FIFO order. +// Blocks below the sync target height are pruned on UpdateSyncTarget and the +// buffer is snapshotted at finalization via dequeueBatch. Enqueue is always +// allowed. A dequeueBatch only captures the current buffered operations and +// clears them. New enqueues after the snapshot are not part of that batch. +type blockQueue struct { + mu sync.Mutex + // buffered operations accumulated before finalization + items []blockOperation + + verifyDedupe verifyDedupeTracker +} + +// newBlockQueue creates a new empty queue. +func newBlockQueue() *blockQueue { + return &blockQueue{} +} + +// enqueue appends a block operation to the buffer. Returns true if the operation +// was queued, false if the block is nil. +func (q *blockQueue) enqueue(b EthBlockWrapper, op BlockOperationType) bool { + if b == nil { + return false + } + q.mu.Lock() + defer q.mu.Unlock() + + // Verify may be called multiple times by the engine; dedupe it so we only queue one verify per block. + var hash common.Hash + if ethb := b.GetEthBlock(); ethb != nil { + hash = ethb.Hash() + } + if q.verifyDedupe.isQueued(op, hash) { + // Already queued a verify for this block: still deferred, but don't duplicate. + return true + } + q.verifyDedupe.markQueued(op, hash) + + q.items = append(q.items, blockOperation{ + block: b, + operation: op, + }) + return true +} + +// dequeueBatch returns the current buffered operations and clears the buffer. New +// arrivals after the snapshot are not included and remain buffered for later. +func (q *blockQueue) dequeueBatch() []blockOperation { + q.mu.Lock() + defer q.mu.Unlock() + out := q.items + q.items = nil + return out +} + +// forget clears dedupe markers for the given operations after they have been executed +// (or abandoned due to error). This ensures the verify dedupe map does not grow unbounded. +func (q *blockQueue) forget(ops []blockOperation) { + if len(ops) == 0 { + return + } + q.mu.Lock() + defer q.mu.Unlock() + for _, op := range ops { + ethb := op.block.GetEthBlock() + if ethb == nil { + continue + } + q.verifyDedupe.unmarkQueued(op.operation, ethb.Hash()) + } +} + +// removeBelowHeight drops blocks with height < targetHeight. +// Called during pivots to discard blocks below the new target. +func (q *blockQueue) removeBelowHeight(targetHeight uint64) { + q.remove(func(height uint64) bool { return height >= targetHeight }) +} + +// removeThroughHeight drops blocks with height <= targetHeight. +// Called before batch replay because the commit-target block is already applied by FinalizeVM. +func (q *blockQueue) removeThroughHeight(targetHeight uint64) { + q.remove(func(height uint64) bool { return height > targetHeight }) +} + +// remove drops queued blocks for which keep(height) returns false. +func (q *blockQueue) remove(keep func(height uint64) bool) { + q.mu.Lock() + defer q.mu.Unlock() + + filtered := q.items[:0] + for _, op := range q.items { + ethBlock := op.block.GetEthBlock() + if ethBlock != nil && keep(ethBlock.NumberU64()) { + filtered = append(filtered, op) + continue + } + if ethBlock != nil { + q.verifyDedupe.unmarkQueued(op.operation, ethBlock.Hash()) + } + } + q.items = filtered +} + +// verifyDedupeTracker tracks which blocks already have a queued OpVerify operation. +// +// It is intentionally domain-specific: +// - It only applies to OpVerify (other ops are ignored). +// - enqueue should still report "deferred" on duplicates, but avoid queuing duplicates. +type verifyDedupeTracker struct { + seen map[common.Hash]struct{} +} + +func (d *verifyDedupeTracker) isQueued(op BlockOperationType, h common.Hash) bool { + if op != OpVerify || d.seen == nil { + return false + } + _, ok := d.seen[h] + return ok +} + +func (d *verifyDedupeTracker) markQueued(op BlockOperationType, h common.Hash) { + if op != OpVerify { + return + } + if d.seen == nil { + d.seen = make(map[common.Hash]struct{}) + } + d.seen[h] = struct{}{} +} + +func (d *verifyDedupeTracker) unmarkQueued(op BlockOperationType, h common.Hash) { + if op != OpVerify || d.seen == nil { + return + } + delete(d.seen, h) +} diff --git a/graft/evm/sync/engine/block_queue_test.go b/graft/evm/sync/engine/block_queue_test.go new file mode 100644 index 000000000000..cba542554371 --- /dev/null +++ b/graft/evm/sync/engine/block_queue_test.go @@ -0,0 +1,161 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBlockQueue_EnqueueAndDequeue(t *testing.T) { + q := newBlockQueue() + + // Nil block should be rejected. + require.False(t, q.enqueue(nil, OpAccept)) + + // Enqueue blocks. + for i := uint64(100); i < 105; i++ { + require.True(t, q.enqueue(newMockBlock(i), OpAccept)) + } + + // Dequeue returns all in FIFO order and clears queue. + batch := q.dequeueBatch() + require.Len(t, batch, 5) + for i, op := range batch { + require.Equal(t, uint64(100+i), op.block.GetEthBlock().NumberU64()) + } + + // Queue is now empty. + require.Empty(t, q.dequeueBatch()) +} + +func TestBlockQueue_RemoveThroughHeight(t *testing.T) { + q := newBlockQueue() + + // Enqueue blocks at heights 100-110. + for i := uint64(100); i <= 110; i++ { + q.enqueue(newMockBlock(i), OpAccept) + } + + // Remove blocks at or below height 105 (commit-target block included). + q.removeThroughHeight(105) + + // Only blocks strictly above 105 should remain (106, 107, 108, 109, 110). + batch := q.dequeueBatch() + require.Len(t, batch, 5) + require.Equal(t, uint64(106), batch[0].block.GetEthBlock().NumberU64()) +} + +func TestBlockQueue_OperationDedupSemantics(t *testing.T) { + tests := []struct { + name string + enqueueOps []BlockOperationType + wantOps []BlockOperationType + }{ + { + name: "dedupe verify duplicates", + enqueueOps: []BlockOperationType{OpVerify, OpVerify}, + wantOps: []BlockOperationType{OpVerify}, + }, + { + name: "allow different operations for same block", + enqueueOps: []BlockOperationType{OpVerify, OpAccept}, + wantOps: []BlockOperationType{OpVerify, OpAccept}, + }, + { + name: "do not dedupe accept/reject", + enqueueOps: []BlockOperationType{OpAccept, OpAccept, OpReject, OpReject}, + wantOps: []BlockOperationType{OpAccept, OpAccept, OpReject, OpReject}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + q := newBlockQueue() + b := newMockBlock(100) + + for _, op := range tt.enqueueOps { + // Enqueue should report deferred even when deduping verify operations. + require.True(t, q.enqueue(b, op)) + } + + batch := q.dequeueBatch() + require.Len(t, batch, len(tt.wantOps)) + for i, wantOp := range tt.wantOps { + require.Equal(t, wantOp, batch[i].operation) + } + }) + } +} + +func TestBlockQueue_VerifyCanBeRequeuedAfterCleanup(t *testing.T) { + tests := []struct { + name string + forgetCurrent bool + pruneBelow uint64 + }{ + { + name: "after forget", + forgetCurrent: true, + }, + { + name: "after prune drop", + pruneBelow: 101, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + q := newBlockQueue() + b := newMockBlock(100) + require.True(t, q.enqueue(b, OpVerify)) + + if tt.forgetCurrent { + batch := q.dequeueBatch() + require.Len(t, batch, 1) + require.Equal(t, OpVerify, batch[0].operation) + q.forget(batch) + } + if tt.pruneBelow != 0 { + // Drop blocks strictly below 101, so our block at height 100 is pruned. + q.removeBelowHeight(tt.pruneBelow) + } + require.Empty(t, q.dequeueBatch()) + + // Verify should be enqueueable again after cleanup. + require.True(t, q.enqueue(b, OpVerify)) + batch := q.dequeueBatch() + require.Len(t, batch, 1) + require.Equal(t, OpVerify, batch[0].operation) + }) + } +} + +func TestBlockQueue_ConcurrentAccess(t *testing.T) { + t.Parallel() + + q := newBlockQueue() + const numGoroutines = 10 + const numOps = 100 + + var wg sync.WaitGroup + wg.Add(numGoroutines) + + for g := 0; g < numGoroutines; g++ { + go func(id int) { + defer wg.Done() + for i := 0; i < numOps; i++ { + q.enqueue(newMockBlock(uint64(id*numOps+i)), OpAccept) + } + }(g) + } + + wg.Wait() + + // All operations should have been enqueued. + batch := q.dequeueBatch() + require.Len(t, batch, numGoroutines*numOps) +} diff --git a/graft/evm/sync/engine/client.go b/graft/evm/sync/engine/client.go index 28c0724f58d0..f75402f1a0c7 100644 --- a/graft/evm/sync/engine/client.go +++ b/graft/evm/sync/engine/client.go @@ -55,6 +55,9 @@ var ( // end of the sync operation. type EthBlockWrapper interface { GetEthBlock() *ethtypes.Block + Accept(context.Context) error + Reject(context.Context) error + Verify(context.Context) error } // BloomIndexer provides bloom filter indexing functionality. @@ -84,6 +87,9 @@ type BlockAcceptor interface { // Acceptor applies the results of state sync to the VM, preparing it for bootstrapping. type Acceptor interface { AcceptSync(ctx context.Context, summary message.Syncable) error + // DrainAcceptorQueue blocks until all pending accepted blocks have been + // fully processed by the async acceptor. + DrainAcceptorQueue() } // Executor defines how state sync is executed. @@ -91,6 +97,15 @@ type Acceptor interface { type Executor interface { // Execute runs the sync process and blocks until completion or error. Execute(ctx context.Context, summary message.Syncable) error + + // OnBlockAccepted handles a block accepted during sync. + OnBlockAccepted(EthBlockWrapper) (bool, error) + + // OnBlockRejected handles a block rejected during sync. + OnBlockRejected(EthBlockWrapper) (bool, error) + + // OnBlockVerified handles a block verified during sync. + OnBlockVerified(EthBlockWrapper) (bool, error) } var _ Acceptor = (*client)(nil) @@ -120,6 +135,11 @@ type ClientConfig struct { RequestSize uint16 // number of key/value pairs to ask peers for per request Enabled bool SkipResume bool + // DynamicStateSyncEnabled toggles dynamic vs static state sync orchestration. + DynamicStateSyncEnabled bool + + // PivotInterval advances the sync target every N blocks in dynamic mode. + PivotInterval uint64 // LeafsRequestType specifies the wire format for leafs requests. // Must be set explicitly by the caller. @@ -132,7 +152,10 @@ type client struct { cancel context.CancelFunc codeQueue *code.Queue wg sync.WaitGroup + executorLock sync.RWMutex + executor Executor err error + stateSyncOnce sync.Once } func NewClient(config *ClientConfig) Client { @@ -151,6 +174,15 @@ type Client interface { ClearOngoingSummary() error Shutdown() error Error() error + // OnEngineAccept should be called by the engine when a block is accepted. + // Returns true if the block was enqueued for deferred processing, false otherwise. + OnEngineAccept(EthBlockWrapper) (bool, error) + // OnEngineReject should be called by the engine when a block is rejected. + // Returns true if the block was enqueued for deferred processing, false otherwise. + OnEngineReject(EthBlockWrapper) (bool, error) + // OnEngineVerify should be called by the engine when a block is verified. + // Returns true if the block was enqueued for deferred processing, false otherwise. + OnEngineVerify(EthBlockWrapper) (bool, error) } // StateSyncEnabled returns [client.enabled], which is set in the chain's config file. @@ -196,24 +228,69 @@ func (c *client) ParseStateSummary(_ context.Context, summaryBytes []byte) (bloc return c.config.SyncSummaryProvider.Parse(summaryBytes, c.acceptSyncSummary) } +func (c *client) getExecutor() Executor { + c.executorLock.RLock() + defer c.executorLock.RUnlock() + return c.executor +} + +// OnEngineAccept delegates to the active executor, if any. +func (c *client) OnEngineAccept(b EthBlockWrapper) (bool, error) { + executor := c.getExecutor() + if executor == nil { + return false, nil + } + return executor.OnBlockAccepted(b) +} + +// OnEngineReject delegates to the active executor, if any. +func (c *client) OnEngineReject(b EthBlockWrapper) (bool, error) { + executor := c.getExecutor() + if executor == nil { + return false, nil + } + return executor.OnBlockRejected(b) +} + +// OnEngineVerify delegates to the active executor, if any. +func (c *client) OnEngineVerify(b EthBlockWrapper) (bool, error) { + executor := c.getExecutor() + if executor == nil { + return false, nil + } + return executor.OnBlockVerified(b) +} + // acceptSyncSummary returns true if sync will be performed and launches the state sync process // in a goroutine. -func (c *client) acceptSyncSummary(summary message.Syncable) (block.StateSyncMode, error) { - if err := c.prepareForSync(summary); err != nil { +func (c *client) acceptSyncSummary(proposedSummary message.Syncable) (block.StateSyncMode, error) { + executor := c.getExecutor() + + // If dynamic sync is already running, treat new summaries as target updates. + if ds, ok := executor.(*dynamicExecutor); ok && ds.CurrentState() == StateRunning { + if err := ds.UpdateSyncTarget(proposedSummary); err != nil { + return block.StateSyncSkipped, err + } + return block.StateSyncDynamic, nil + } + + if err := c.prepareForSync(proposedSummary); err != nil { if errors.Is(err, errSkipSync) { return block.StateSyncSkipped, nil } return block.StateSyncSkipped, err } - registry, err := c.newSyncerRegistry(summary) + registry, err := c.newSyncerRegistry(proposedSummary) if err != nil { return block.StateSyncSkipped, fmt.Errorf("failed to create syncer registry: %w", err) } - executor := newStaticExecutor(registry, c) + if c.config.DynamicStateSyncEnabled { + return c.startAsync(newDynamicExecutor(registry, c, c.config.PivotInterval), proposedSummary, block.StateSyncDynamic), nil + } - return c.startAsync(executor, summary), nil + return c.startAsync(newStaticExecutor(registry, c), proposedSummary, block.StateSyncStatic), nil } // prepareForSync handles resume check and snapshot wipe before sync starts. @@ -260,43 +337,50 @@ func (c *client) prepareForSync(summary message.Syncable) error { } // startAsync launches the sync executor in a background goroutine. -func (c *client) startAsync(executor Executor, summary message.Syncable) block.StateSyncMode { +func (c *client) startAsync(executor Executor, summary message.Syncable, mode block.StateSyncMode) block.StateSyncMode { ctx, cancel := context.WithCancel(context.Background()) c.cancel = cancel + c.executorLock.Lock() + c.executor = executor + c.executorLock.Unlock() c.wg.Add(1) go func() { defer c.wg.Done() defer cancel() - if err := executor.Execute(ctx, summary); err != nil { - c.err = err - } - // notify engine regardless of whether err == nil, - // this error will be propagated to the engine when it calls - // vm.SetState(snow.Bootstrapping) - log.Info("state sync completed, notifying engine", "err", c.err) - close(c.config.StateSyncDone) + err := executor.Execute(ctx, summary) + c.signalDone(err) }() - log.Info("state sync started", "mode", block.StateSyncStatic) - return block.StateSyncStatic + log.Info("state sync started", "mode", mode.String(), "summary", summary.GetBlockHash().Hex(), "height", summary.Height()) + return mode } func (c *client) Shutdown() error { - if c.cancel != nil { - c.cancel() - } + c.signalDone(context.Canceled) if c.codeQueue != nil { c.codeQueue.Shutdown() } - c.wg.Wait() // wait for the background goroutine to exit + c.wg.Wait() return nil } // Error returns a non-nil error if one occurred during the sync. func (c *client) Error() error { return c.err } +// signalDone sets the terminal error exactly once and signals completion to the engine. +func (c *client) signalDone(err error) { + c.stateSyncOnce.Do(func() { + c.err = err + if c.cancel != nil { + c.cancel() + } + log.Info("state sync completed, notifying engine", "err", err) + close(c.config.StateSyncDone) + }) +} + // AcceptSync implements Acceptor. It resets the blockchain to the synced block, // preparing it for execution, and updates disk and memory pointers so the VM // is ready for bootstrapping. Also executes any shared memory operations from @@ -381,6 +465,32 @@ func (c *client) commitMarkers(summary message.Syncable) error { return c.config.VerDB.Commit() } +// DrainAcceptorQueue implements Acceptor. +func (c *client) DrainAcceptorQueue() { + c.config.Chain.BlockChain().DrainAcceptorQueue() + + // Batch replay advances the blockchain but not chain.State. Sync the + // two so the engine sees the correct tip during bootstrapping. + if err := c.syncLastAcceptedToChainState(); err != nil { + log.Error("failed to reconcile chain.State after batch replay", "err", err) + } +} + +// syncLastAcceptedToChainState propagates the blockchain's last accepted +// block into chain.State. +func (c *client) syncLastAcceptedToChainState() error { + lastAccepted := c.config.Chain.BlockChain().LastAcceptedBlock() + blk, err := c.config.State.GetBlock(context.Background(), ids.ID(lastAccepted.Hash())) + if err != nil { + return err + } + // Unwrap to avoid double-wrapping inside SetLastAcceptedBlock. + if bw, ok := blk.(*chain.BlockWrapper); ok { + blk = bw.Block + } + return c.config.State.SetLastAcceptedBlock(blk) +} + // newSyncerRegistry creates a registry with all required syncers for the given summary. func (c *client) newSyncerRegistry(summary message.Syncable) (*SyncerRegistry, error) { registry := NewSyncerRegistry() @@ -394,52 +504,16 @@ func (c *client) newSyncerRegistry(summary message.Syncable) (*SyncerRegistry, e return nil, fmt.Errorf("failed to create block syncer: %w", err) } - codeQueue, err := code.NewQueue(c.config.ChainDB) + codeSyncer, stateSyncer, err := c.newCodeAndStateSyncers(summary) if err != nil { - return nil, fmt.Errorf("failed to create code queue: %w", err) + return nil, err } - c.codeQueue = codeQueue - codeSyncer, err := code.NewSyncer(c.config.Client, c.config.ChainDB, codeQueue.CodeHashes()) - if err != nil { - return nil, fmt.Errorf("failed to create code syncer: %w", err) - } - - var stateSyncer types.Syncer - if tdb, ok := c.config.Chain.BlockChain().TrieDB().Backend().(*firewood.TrieDB); ok { - registerer, err := metrics.MakeAndRegister(c.config.SnowCtx.Metrics, "sync_firewood") - if err != nil { - return nil, fmt.Errorf("failed to create firewood syncer metrics registerer: %w", err) - } - stateSyncer, err = evmstate.NewFirewoodSyncer( - syncer.Config{ - Log: c.config.SnowCtx.Log, - Registerer: registerer, - StateSyncNodes: c.config.Client.StateSyncNodes(), - }, - tdb.Firewood, - summary.GetBlockRoot(), - codeQueue, - c.config.Client.AddClient(p2p.FirewoodRangeProofHandlerID), - c.config.Client.AddClient(p2p.FirewoodChangeProofHandlerID), - ) - if err != nil { - return nil, fmt.Errorf("failed to create firewood syncer: %w", err) - } - } else { - stateSyncer, err = evmstate.NewSyncer( - c.config.Client, c.config.ChainDB, - summary.GetBlockRoot(), - codeQueue, c.config.RequestSize, - c.config.LeafsRequestType, - ) - if err != nil { - return nil, fmt.Errorf("failed to create EVM state syncer: %w", err) - } + syncers := []types.Syncer{blockSyncer, stateSyncer} + if codeSyncer != nil { + syncers = append(syncers, codeSyncer) } - syncers := []types.Syncer{blockSyncer, codeSyncer, stateSyncer} - if c.config.Extender != nil { extenderSyncer, err := c.config.Extender.CreateSyncer(c.config.Client, c.config.VerDB, summary) if err != nil { @@ -456,3 +530,88 @@ func (c *client) newSyncerRegistry(summary message.Syncable) (*SyncerRegistry, e return registry, nil } + +func (c *client) newCodeAndStateSyncers(summary message.Syncable) (types.Syncer, types.Syncer, error) { + if tdb, ok := c.config.Chain.BlockChain().TrieDB().Backend().(*firewood.TrieDB); ok { + return c.newFirewoodSyncers(summary, tdb) + } + if c.config.DynamicStateSyncEnabled { + return c.newHashDBDynamicSyncers(summary) + } + return c.newHashDBStaticSyncers(summary) +} + +func (c *client) newFirewoodSyncers(summary message.Syncable, tdb *firewood.TrieDB) (types.Syncer, types.Syncer, error) { + codeQueue, err := code.NewQueue(c.config.ChainDB) + if err != nil { + return nil, nil, fmt.Errorf("failed to create code queue: %w", err) + } + c.codeQueue = codeQueue + + codeSyncer, err := code.NewSyncer(c.config.Client, c.config.ChainDB, codeQueue.CodeHashes()) + if err != nil { + return nil, nil, fmt.Errorf("failed to create code syncer: %w", err) + } + + registerer, err := metrics.MakeAndRegister(c.config.SnowCtx.Metrics, "sync_firewood") + if err != nil { + return nil, nil, fmt.Errorf("failed to create firewood syncer metrics registerer: %w", err) + } + stateSyncer, err := evmstate.NewFirewoodSyncer( + syncer.Config{ + Log: c.config.SnowCtx.Log, + Registerer: registerer, + StateSyncNodes: c.config.Client.StateSyncNodes(), + }, + tdb.Firewood, + summary.GetBlockRoot(), + codeQueue, + c.config.Client.AddClient(p2p.FirewoodRangeProofHandlerID), + c.config.Client.AddClient(p2p.FirewoodChangeProofHandlerID), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to create firewood syncer: %w", err) + } + return codeSyncer, stateSyncer, nil +} + +func (c *client) newHashDBDynamicSyncers(summary message.Syncable) (types.Syncer, types.Syncer, error) { + // The code syncer is managed internally by the pivot session, not + // registered separately. Each pivot creates a fresh code queue and + // code syncer alongside the state syncer. + stateSyncer, err := evmstate.NewHashDBDynamicSyncer( + c.config.Client, c.config.ChainDB, + summary.GetBlockRoot(), + c.config.RequestSize, + c.config.LeafsRequestType, + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to create dynamic EVM state syncer: %w", err) + } + return nil, stateSyncer, nil +} + +func (c *client) newHashDBStaticSyncers(summary message.Syncable) (types.Syncer, types.Syncer, error) { + codeQueue, err := code.NewQueue(c.config.ChainDB) + if err != nil { + return nil, nil, fmt.Errorf("failed to create code queue: %w", err) + } + c.codeQueue = codeQueue + + codeSyncer, err := code.NewSyncer(c.config.Client, c.config.ChainDB, codeQueue.CodeHashes()) + if err != nil { + return nil, nil, fmt.Errorf("failed to create code syncer: %w", err) + } + + stateSyncer, err := evmstate.NewHashDBSyncer( + c.config.Client, c.config.ChainDB, + summary.GetBlockRoot(), + codeQueue, c.config.RequestSize, + c.config.LeafsRequestType, + evmstate.WithFinalizeCodeQueue(codeQueue.Finalize), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to create EVM state syncer: %w", err) + } + return codeSyncer, stateSyncer, nil +} diff --git a/graft/evm/sync/engine/coordinator.go b/graft/evm/sync/engine/coordinator.go new file mode 100644 index 000000000000..6e2e53d6f9cb --- /dev/null +++ b/graft/evm/sync/engine/coordinator.go @@ -0,0 +1,360 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "context" + "errors" + "fmt" + "sync" + "sync/atomic" + + "github.com/ava-labs/libevm/libevm/options" + "github.com/ava-labs/libevm/log" + + "github.com/ava-labs/avalanchego/graft/evm/message" +) + +// State represents the lifecycle phases of dynamic state sync orchestration. +type State int + +const ( + StateIdle State = iota + StateInitializing + StateRunning + StateFinalizing + StateExecutingBatch + StateCompleted + StateAborted +) + +var ( + errInvalidState = errors.New("invalid coordinator state") + errBatchCancelled = errors.New("batch execution cancelled") + errBatchOperationFailed = errors.New("batch operation failed") + errCommitTargetRequired = errors.New("commit target not set") +) + +// Callbacks delegates VM-specific work back to the client. +type Callbacks struct { + // FinalizeVM applies the sync result to the blockchain (same as AcceptSync). + FinalizeVM func(ctx context.Context, target message.Syncable) error + // DrainAcceptorQueue waits for all batch-replayed blocks to be fully processed. + DrainAcceptorQueue func() + // OnDone signals sync completion (success or failure). + OnDone func(err error) +} + +// Coordinator orchestrates dynamic state sync across multiple syncers. +type Coordinator struct { + // state is managed atomically to allow cheap concurrent checks/updates. + state atomic.Int32 + // updateMu serializes [UpdateSyncTarget] calls. + updateMu sync.Mutex + // targetMu protects commitTarget reads/writes. + targetMu sync.RWMutex + // commitTarget is the latest fully accepted fanout target. + commitTarget message.Syncable + // targetEpoch increments after each successful commitTarget update. + targetEpoch atomic.Uint64 + + queue *blockQueue + syncerRegistry *SyncerRegistry + callbacks Callbacks + + // doneOnce ensures [Callbacks.OnDone] is invoked at most once. + doneOnce sync.Once + + pivotInterval uint64 + pivot *pivotPolicy + + // initial is the first sync target, used as fallback if commitTarget is nil. + initial message.Syncable + cancel context.CancelCauseFunc +} + +// CoordinatorOption follows the functional options pattern for Coordinator. +type CoordinatorOption = options.Option[Coordinator] + +// WithPivotInterval configures the interval-based pivot policy. 0 disables custom +// interval and uses default policy behavior. +func WithPivotInterval(interval uint64) CoordinatorOption { + return options.Func[Coordinator](func(co *Coordinator) { + co.pivotInterval = interval + }) +} + +// NewCoordinator constructs a coordinator to orchestrate dynamic state sync across multiple syncers. +func NewCoordinator(syncerRegistry *SyncerRegistry, cbs Callbacks, opts ...CoordinatorOption) *Coordinator { + co := &Coordinator{ + queue: newBlockQueue(), + syncerRegistry: syncerRegistry, + callbacks: cbs, + } + options.ApplyTo(co, opts...) + co.state.Store(int32(StateIdle)) + + return co +} + +// Start launches all syncers and returns immediately. Failures are monitored +// in the background and will transition to [StateAborted]. +func (co *Coordinator) Start(ctx context.Context, initial message.Syncable) { + co.state.Store(int32(StateInitializing)) + co.initial = initial + co.setCommitTarget(initial) + co.pivot = newPivotPolicy(co.pivotInterval) + + cctx, cancel := context.WithCancelCause(ctx) + co.cancel = cancel + g := co.syncerRegistry.StartAsync(cctx, initial) + + co.state.Store(int32(StateRunning)) + + go func() { + err := g.Wait() + log.Info("all syncer goroutines exited", "err", err) + if errors.Is(err, context.Canceled) { + err = contextCause(cctx, err) + } + if err == nil { + err = co.freezeCommitTarget(cctx) + } + + finalizeTarget := co.getCommitTarget() + if finalizeTarget == nil { + finalizeTarget = co.initial + } + co.syncerRegistry.FinalizeAll(finalizeTarget) + + if err == nil { + err = co.ProcessQueuedBlockOperations(cctx) + } + co.finish(cancel, err) + }() +} + +// ProcessQueuedBlockOperations finalizes the VM at the commit target and +// replays deferred block operations in FIFO order. +func (co *Coordinator) ProcessQueuedBlockOperations(ctx context.Context) error { + if err := ctx.Err(); err != nil { + return err + } + + // The caller may have already transitioned to StateFinalizing. + if co.CurrentState() != StateFinalizing { + if err := co.beginFinalizing(); err != nil { + return errInvalidState + } + } + + target := co.getCommitTarget() + if target == nil { + co.markAborted() + return errCommitTargetRequired + } + + if co.callbacks.FinalizeVM != nil { + if err := co.callbacks.FinalizeVM(ctx, target); err != nil { + co.markAborted() + return err + } + } + + if err := ctx.Err(); err != nil { + co.markAborted() + return err + } + + if !co.state.CompareAndSwap(int32(StateFinalizing), int32(StateExecutingBatch)) { + return errInvalidState + } + + // Drop blocks <= commit target (already applied by FinalizeVM). + co.queue.removeThroughHeight(target.Height()) + + // Drain the queue. New enqueues during execution are picked up in subsequent iterations. + for { + operations := co.queue.dequeueBatch() + if len(operations) == 0 { + break + } + err := executeBlockOperations(ctx, operations) + // Drop dedupe markers regardless of outcome. + co.queue.forget(operations) + if err != nil { + co.markAborted() + return err + } + } + + if co.callbacks.DrainAcceptorQueue != nil { + co.callbacks.DrainAcceptorQueue() + } + + return nil +} + +// UpdateSyncTarget broadcasts a new target to all syncers and removes stale blocks from queue. +// Only valid in [StateRunning] state. +func (co *Coordinator) UpdateSyncTarget(newTarget message.Syncable) error { + co.updateMu.Lock() + defer co.updateMu.Unlock() + + if co.CurrentState() != StateRunning { + return errInvalidState + } + if !co.pivot.shouldForward(newTarget.Height()) { + return nil + } + + if err := co.syncerRegistry.UpdateSyncTarget(newTarget); err != nil { + co.abort(err) + return err + } + + co.setCommitTarget(newTarget) + co.targetEpoch.Add(1) + co.queue.removeBelowHeight(newTarget.Height()) + co.pivot.advance() + return nil +} + +// AddBlockOperation appends a block to the queue while in Running or ExecutingBatch. +// Returns true if queued. +func (co *Coordinator) AddBlockOperation(b EthBlockWrapper, op BlockOperationType) bool { + if b == nil { + return false + } + state := co.CurrentState() + if state != StateRunning && state != StateExecutingBatch { + log.Warn("AddBlockOperation rejected: coordinator not running", + "state", int32(state), "op", op.String()) + return false + } + return co.queue.enqueue(b, op) +} + +// CurrentState returns the current lifecycle state of the coordinator. +func (co *Coordinator) CurrentState() State { + return State(co.state.Load()) +} + +func (co *Coordinator) setCommitTarget(target message.Syncable) { + co.targetMu.Lock() + defer co.targetMu.Unlock() + co.commitTarget = target +} + +func (co *Coordinator) getCommitTarget() message.Syncable { + co.targetMu.RLock() + defer co.targetMu.RUnlock() + return co.commitTarget +} + +func (co *Coordinator) markAborted() { + for { + state := co.CurrentState() + if state == StateAborted || state == StateCompleted { + return + } + if co.state.CompareAndSwap(int32(state), int32(StateAborted)) { + log.Warn("coordinator aborted", "fromState", int32(state)) + return + } + } +} + +func (co *Coordinator) beginFinalizing() error { + for { + switch co.CurrentState() { + case StateRunning: + if co.state.CompareAndSwap(int32(StateRunning), int32(StateFinalizing)) { + return nil + } + case StateFinalizing: + return nil + default: + return errInvalidState + } + } +} + +// freezeCommitTarget prevents further sync target updates and ensures the +// commitTarget reflects only roots the syncers actually synced to. +func (co *Coordinator) freezeCommitTarget(cctx context.Context) error { + co.updateMu.Lock() + defer co.updateMu.Unlock() + + if err := co.beginFinalizing(); err != nil { + return contextCause(cctx, err) + } + return nil +} + +func (co *Coordinator) abort(err error) { + co.markAborted() + if co.cancel != nil { + co.cancel(err) + } +} + +func (co *Coordinator) finish(cancel context.CancelCauseFunc, err error) { + if err != nil { + log.Error("coordinator finishing with error", "err", err) + co.markAborted() + } else { + for { + state := co.CurrentState() + if state == StateCompleted || state == StateAborted { + break + } + if co.state.CompareAndSwap(int32(state), int32(StateCompleted)) { + log.Info("coordinator completed", "fromState", int32(state)) + break + } + } + } + if cancel != nil { + cancel(err) + } + if co.callbacks.OnDone != nil { + co.doneOnce.Do(func() { co.callbacks.OnDone(err) }) + } +} + +// contextCause extracts the cancel cause, falling back to fallback if the +// cause is nil or plain context.Canceled. +func contextCause(ctx context.Context, fallback error) error { + if cause := context.Cause(ctx); cause != nil && !errors.Is(cause, context.Canceled) { + return cause + } + return fallback +} + +// executeBlockOperations executes a batch of queued block operations in FIFO order. +// Partial completion is acceptable as operations are idempotent. +func executeBlockOperations(ctx context.Context, operations []blockOperation) error { + for i, op := range operations { + select { + case <-ctx.Done(): + return fmt.Errorf("operation %d/%d: %w", i+1, len(operations), errors.Join(errBatchCancelled, ctx.Err())) + default: + } + + var err error + switch op.operation { + case OpAccept: + err = op.block.Accept(ctx) + case OpReject: + err = op.block.Reject(ctx) + case OpVerify: + err = op.block.Verify(ctx) + } + if err != nil { + return fmt.Errorf("operation %d/%d (%v): %w", i+1, len(operations), op.operation, errors.Join(errBatchOperationFailed, err)) + } + } + return nil +} diff --git a/graft/evm/sync/engine/coordinator_test.go b/graft/evm/sync/engine/coordinator_test.go new file mode 100644 index 000000000000..8cf6c6c50c60 --- /dev/null +++ b/graft/evm/sync/engine/coordinator_test.go @@ -0,0 +1,369 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "context" + "encoding/binary" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/ava-labs/libevm/common" + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/graft/evm/message" +) + +func TestCoordinator_StateValidation(t *testing.T) { + co := NewCoordinator(NewSyncerRegistry(), Callbacks{}, WithPivotInterval(1)) + block := newMockBlock(100) + target := newTestSyncTarget(100) + + // States that reject both operations. + for _, state := range []State{StateIdle, StateInitializing, StateFinalizing, StateCompleted, StateAborted} { + co.state.Store(int32(state)) + require.False(t, co.AddBlockOperation(block, OpAccept), "state %d should reject block", state) + err := co.UpdateSyncTarget(target) + require.ErrorIs(t, err, errInvalidState, "state %d should reject target update", state) + } + + // Running: accepts both. + co.state.Store(int32(StateRunning)) + require.True(t, co.AddBlockOperation(block, OpAccept)) + require.NoError(t, co.UpdateSyncTarget(target)) + + // ExecutingBatch: accepts blocks, rejects target updates. + co.state.Store(int32(StateExecutingBatch)) + require.True(t, co.AddBlockOperation(block, OpAccept)) + err := co.UpdateSyncTarget(target) + require.ErrorIs(t, err, errInvalidState) + + // Nil block is always rejected. + co.state.Store(int32(StateRunning)) + require.False(t, co.AddBlockOperation(nil, OpAccept)) +} + +func TestCoordinator_UpdateSyncTarget_SerializesConcurrentCalls(t *testing.T) { + var ( + inUpdate atomic.Int32 + concurrent atomic.Bool + ) + + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(FuncSyncer{ + name: "serial-check", + updateFn: func(message.Syncable) error { + if !inUpdate.CompareAndSwap(0, 1) { + concurrent.Store(true) + } + time.Sleep(10 * time.Millisecond) + inUpdate.Store(0) + return nil + }, + })) + + co := NewCoordinator(registry, Callbacks{}, WithPivotInterval(1)) + co.state.Store(int32(StateRunning)) + co.setCommitTarget(newTestSyncTarget(100)) + + const n = 8 + var wg sync.WaitGroup + wg.Add(n) + errCh := make(chan error, n) + for i := 0; i < n; i++ { + height := uint64(101 + i) + go func() { + defer wg.Done() + errCh <- co.UpdateSyncTarget(newTestSyncTarget(height)) + }() + } + wg.Wait() + close(errCh) + for err := range errCh { + require.NoError(t, err) + } + + require.False(t, concurrent.Load(), "UpdateTarget calls should be serialized") +} + +func TestCoordinator_Lifecycle(t *testing.T) { + t.Run("completes successfully", func(t *testing.T) { + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(newMockSyncer("test", nil))) + + co, err := runCoordinator(t, registry, Callbacks{ + FinalizeVM: func(context.Context, message.Syncable) error { return nil }, + }) + + require.NoError(t, err) + require.Equal(t, StateCompleted, co.CurrentState()) + }) + + t.Run("aborts on syncer error", func(t *testing.T) { + wantErr := errors.New("syncer failed") + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(newMockSyncer("failing", wantErr))) + + co, err := runCoordinator(t, registry, Callbacks{}) + + require.ErrorIs(t, err, wantErr) + require.Equal(t, StateAborted, co.CurrentState()) + }) +} + +func TestCoordinator_Finish_DoesNotOverwriteAbortedState(t *testing.T) { + co := NewCoordinator(NewSyncerRegistry(), Callbacks{}) + co.state.Store(int32(StateRunning)) + + co.finish(nil, errors.New("abort")) + require.Equal(t, StateAborted, co.CurrentState()) + + co.finish(nil, nil) + require.Equal(t, StateAborted, co.CurrentState()) +} + +func TestCoordinator_ProcessQueuedBlockOperations(t *testing.T) { + t.Run("executes queued operations", func(t *testing.T) { + co := NewCoordinator(NewSyncerRegistry(), Callbacks{}) + co.state.Store(int32(StateRunning)) + co.setCommitTarget(newTestSyncTarget(100)) + co.AddBlockOperation(newMockBlock(101), OpAccept) + + require.NoError(t, co.ProcessQueuedBlockOperations(t.Context())) + require.Equal(t, StateExecutingBatch, co.CurrentState()) + }) + + t.Run("returns error on block operation failure", func(t *testing.T) { + co := NewCoordinator(NewSyncerRegistry(), Callbacks{}) + co.state.Store(int32(StateRunning)) + co.setCommitTarget(newTestSyncTarget(100)) + + failBlock := newMockBlock(101) + failBlock.acceptErr = errors.New("accept failed") + co.AddBlockOperation(failBlock, OpAccept) + + err := co.ProcessQueuedBlockOperations(t.Context()) + require.ErrorIs(t, err, errBatchOperationFailed) + }) + + t.Run("fails closed when already aborted", func(t *testing.T) { + calledFinalize := false + co := NewCoordinator(NewSyncerRegistry(), Callbacks{ + FinalizeVM: func(context.Context, message.Syncable) error { + calledFinalize = true + return nil + }, + }) + co.state.Store(int32(StateAborted)) + co.setCommitTarget(newTestSyncTarget(100)) + + err := co.ProcessQueuedBlockOperations(t.Context()) + require.ErrorIs(t, err, errInvalidState) + require.False(t, calledFinalize) + require.Equal(t, StateAborted, co.CurrentState()) + }) + + t.Run("requires commit target", func(t *testing.T) { + co := NewCoordinator(NewSyncerRegistry(), Callbacks{}) + co.state.Store(int32(StateRunning)) + + err := co.ProcessQueuedBlockOperations(t.Context()) + require.ErrorIs(t, err, errCommitTargetRequired) + require.Equal(t, StateAborted, co.CurrentState()) + }) +} + +// runCoordinator starts a coordinator and waits for completion. +func runCoordinator(t *testing.T, registry *SyncerRegistry, cbs Callbacks) (*Coordinator, error) { + t.Helper() + + var ( + errDone error + wg sync.WaitGroup + ) + wg.Add(1) + + cbs.OnDone = func(err error) { + errDone = err + wg.Done() + } + + co := NewCoordinator(registry, cbs) + co.Start(t.Context(), newTestSyncTarget(100)) + wg.Wait() + + return co, errDone +} + +func TestCoordinator_PivotCycleBlockReplay(t *testing.T) { + tests := []struct { + name string + initialHeight uint64 + blockLo, blockHi uint64 + pivots []uint64 + wantFinalHeight uint64 + wantReplayedLo uint64 + wantReplayedHi uint64 + }{ + { + name: "single pivot prunes below target", + initialHeight: 500, + blockLo: 490, + blockHi: 510, + pivots: []uint64{505}, + wantFinalHeight: 505, + wantReplayedLo: 506, + wantReplayedHi: 510, + }, + { + name: "two pivots advance commit target", + initialHeight: 100, + blockLo: 100, + blockHi: 200, + pivots: []uint64{150, 180}, + wantFinalHeight: 180, + wantReplayedLo: 181, // commit-target block (180) is pruned before replay (handled by FinalizeVM) + wantReplayedHi: 200, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var started sync.WaitGroup + started.Add(1) + release := make(chan struct{}) + + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(NewBarrierSyncer("syncer", &started, release))) + + done := make(chan error, 1) + var finalizedTarget message.Syncable + co := NewCoordinator(registry, Callbacks{ + FinalizeVM: func(_ context.Context, target message.Syncable) error { + finalizedTarget = target + return nil + }, + OnDone: func(err error) { done <- err }, + }, WithPivotInterval(1)) + + co.Start(t.Context(), newTestSyncTarget(tt.initialHeight)) + started.Wait() + + blocks := enqueueBlockRange(t, co, tt.blockLo, tt.blockHi) + + for i, h := range tt.pivots { + require.NoError(t, co.UpdateSyncTarget(newTestSyncTarget(h))) + require.Equal(t, uint64(i+1), co.targetEpoch.Load()) + } + + close(release) + require.NoError(t, <-done) + + require.NotNil(t, finalizedTarget) + require.Equal(t, tt.wantFinalHeight, finalizedTarget.Height()) + require.Equal(t, StateCompleted, co.CurrentState()) + + if tt.blockLo < tt.wantReplayedLo { + requireBlocksNotReplayed(t, blocks, tt.blockLo, tt.wantReplayedLo-1) + } + requireBlocksReplayed(t, blocks, tt.wantReplayedLo, tt.wantReplayedHi) + }) + } +} + +func TestCoordinator_UpdateTargetFailureAborts(t *testing.T) { + wantErr := errors.New("syncer rejected update") + + var started sync.WaitGroup + started.Add(1) + release := make(chan struct{}) + + registry := NewSyncerRegistry() + // Register a barrier syncer that also fails on UpdateTarget. + syncer := NewBarrierSyncer("failing-update", &started, release) + syncer.updateFn = func(message.Syncable) error { return wantErr } + require.NoError(t, registry.Register(syncer)) + + done := make(chan error, 1) + calledFinalize := false + co := NewCoordinator(registry, Callbacks{ + FinalizeVM: func(context.Context, message.Syncable) error { + calledFinalize = true + return nil + }, + OnDone: func(err error) { done <- err }, + }, WithPivotInterval(1)) + + co.Start(t.Context(), newTestSyncTarget(100)) + started.Wait() + + // Enqueue blocks so we can verify they're NOT replayed after abort. + blocks := enqueueBlockRange(t, co, 100, 110) + + // UpdateSyncTarget should fail and abort the coordinator. + err := co.UpdateSyncTarget(newTestSyncTarget(105)) + require.ErrorIs(t, err, wantErr) + require.Equal(t, StateAborted, co.CurrentState()) + + // The syncer is still blocked on the barrier. Release it so the + // coordinator can finish. The syncer will see context cancellation + // because abort cancels the coordinator context. + close(release) + err = <-done + require.ErrorIs(t, err, wantErr) + + // FinalizeVM should NOT have been called since the coordinator aborted. + require.False(t, calledFinalize, "FinalizeVM should not be called after abort") + + // No blocks should have been replayed. + requireBlocksNotReplayed(t, blocks, 100, 110) + + // Further state transitions should be rejected. + require.Equal(t, StateAborted, co.CurrentState()) + err = co.UpdateSyncTarget(newTestSyncTarget(200)) + require.ErrorIs(t, err, errInvalidState) +} + +// enqueueBlockRange creates mock blocks for the inclusive range [lo, hi] and +// enqueues them as OpAccept on the coordinator. Returns the block map. +func enqueueBlockRange(t *testing.T, co *Coordinator, lo, hi uint64) map[uint64]*mockEthBlockWrapper { + t.Helper() + blocks := make(map[uint64]*mockEthBlockWrapper, hi-lo+1) + for i := lo; i <= hi; i++ { + b := newMockBlock(i) + blocks[i] = b + require.True(t, co.AddBlockOperation(b, OpAccept)) + } + return blocks +} + +// requireBlocksReplayed asserts that every block in [lo, hi] was accepted exactly once. +func requireBlocksReplayed(t *testing.T, blocks map[uint64]*mockEthBlockWrapper, lo, hi uint64) { + t.Helper() + for i := lo; i <= hi; i++ { + require.Equal(t, 1, blocks[i].acceptCount, "block %d should have been replayed", i) + } +} + +// requireBlocksNotReplayed asserts that every block in [lo, hi] was never accepted. +func requireBlocksNotReplayed(t *testing.T, blocks map[uint64]*mockEthBlockWrapper, lo, hi uint64) { + t.Helper() + for i := lo; i <= hi; i++ { + require.Equal(t, 0, blocks[i].acceptCount, "block %d should have been pruned", i) + } +} + +func newTestSyncTarget(height uint64) message.Syncable { + var hashBytes [8]byte + binary.BigEndian.PutUint64(hashBytes[:], height) + hash := common.BytesToHash(hashBytes[:]) + + var rootBytes [8]byte + binary.BigEndian.PutUint64(rootBytes[:], height+1) + root := common.BytesToHash(rootBytes[:]) + return newSyncTarget(hash, root, height) +} diff --git a/graft/evm/sync/engine/doubles_test.go b/graft/evm/sync/engine/doubles_test.go index 0eab63a6598f..a404da7993ac 100644 --- a/graft/evm/sync/engine/doubles_test.go +++ b/graft/evm/sync/engine/doubles_test.go @@ -6,31 +6,79 @@ package engine import ( "context" "errors" + "math/big" "sync" "time" + "github.com/ava-labs/avalanchego/graft/evm/message" "github.com/ava-labs/avalanchego/graft/evm/sync/types" + + ethtypes "github.com/ava-labs/libevm/core/types" ) +// mockEthBlockWrapper implements [EthBlockWrapper] for testing. +type mockEthBlockWrapper struct { + ethBlock *ethtypes.Block + acceptErr error + rejectErr error + verifyErr error + + acceptCount int + rejectCount int + verifyCount int +} + +func newMockBlock(height uint64) *mockEthBlockWrapper { + header := ðtypes.Header{Number: new(big.Int).SetUint64(height)} + return &mockEthBlockWrapper{ + ethBlock: ethtypes.NewBlockWithHeader(header), + } +} + +func (m *mockEthBlockWrapper) GetEthBlock() *ethtypes.Block { return m.ethBlock } +func (m *mockEthBlockWrapper) Accept(context.Context) error { + m.acceptCount++ + return m.acceptErr +} + +func (m *mockEthBlockWrapper) Reject(context.Context) error { + m.rejectCount++ + return m.rejectErr +} + +func (m *mockEthBlockWrapper) Verify(context.Context) error { + m.verifyCount++ + return m.verifyErr +} + +var _ EthBlockWrapper = (*mockEthBlockWrapper)(nil) + // FuncSyncer adapts a function to the simple Syncer shape used in tests. It is -// useful for defining small, behavior-driven syncers inline. +// useful for defining small, behavior-driven syncers inline. When updateFn is +// set, it is called on UpdateTarget instead of the default no-op. type FuncSyncer struct { - name string - id string - fn func(ctx context.Context) error + name string + fn func(ctx context.Context) error + updateFn func(message.Syncable) error } -// Sync calls the wrapped function and returns its result. func (f FuncSyncer) Sync(ctx context.Context) error { return f.fn(ctx) } func (f FuncSyncer) Name() string { return f.name } -func (f FuncSyncer) ID() string { return f.id } +func (f FuncSyncer) ID() string { return f.name } + +func (f FuncSyncer) UpdateTarget(target message.Syncable) error { + if f.updateFn != nil { + return f.updateFn(target) + } + return nil +} -var _ types.Syncer = FuncSyncer{} +var _ types.Syncer = (*FuncSyncer)(nil) // NewBarrierSyncer returns a syncer that signals startedWG.Done() when Sync begins, // then blocks until releaseCh is closed (returns nil) or ctx is canceled (returns ctx.Err). func NewBarrierSyncer(name string, startedWG *sync.WaitGroup, releaseCh <-chan struct{}) FuncSyncer { - return FuncSyncer{name: name, id: name, fn: func(ctx context.Context) error { + return FuncSyncer{name: name, fn: func(ctx context.Context) error { if startedWG != nil { startedWG.Done() } @@ -46,7 +94,7 @@ func NewBarrierSyncer(name string, startedWG *sync.WaitGroup, releaseCh <-chan s // NewErrorSyncer returns a syncer that signals startedWG.Done() when Sync begins, // then blocks until trigger is closed (returns errToReturn) or ctx is canceled (returns ctx.Err). func NewErrorSyncer(name string, startedWG *sync.WaitGroup, trigger <-chan struct{}, errToReturn error) FuncSyncer { - return FuncSyncer{name: name, id: name, fn: func(ctx context.Context) error { + return FuncSyncer{name: name, fn: func(ctx context.Context) error { if startedWG != nil { startedWG.Done() } @@ -62,7 +110,7 @@ func NewErrorSyncer(name string, startedWG *sync.WaitGroup, trigger <-chan struc // NewCancelAwareSyncer returns a syncer that signals startedWG.Done() when Sync begins, // then blocks until ctx is canceled (returns ctx.Err) or timeout elapses (returns timeout error). func NewCancelAwareSyncer(name string, startedWG *sync.WaitGroup, timeout time.Duration) FuncSyncer { - return FuncSyncer{name: name, id: name, fn: func(ctx context.Context) error { + return FuncSyncer{name: name, fn: func(ctx context.Context) error { if startedWG != nil { startedWG.Done() } diff --git a/graft/evm/sync/engine/executor_dynamic.go b/graft/evm/sync/engine/executor_dynamic.go new file mode 100644 index 000000000000..c6a7d6335fc6 --- /dev/null +++ b/graft/evm/sync/engine/executor_dynamic.go @@ -0,0 +1,129 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "context" + "fmt" + + "github.com/ava-labs/libevm/log" + + "github.com/ava-labs/avalanchego/graft/evm/message" +) + +var _ Executor = (*dynamicExecutor)(nil) + +// dynamicExecutor runs syncers concurrently with block queueing. +// It wraps [Coordinator] to manage the sync lifecycle. +type dynamicExecutor struct { + coordinator *Coordinator +} + +func newDynamicExecutor(registry *SyncerRegistry, acceptor Acceptor, pivotInterval uint64) *dynamicExecutor { + coordinator := NewCoordinator( + registry, + Callbacks{ + FinalizeVM: acceptor.AcceptSync, + DrainAcceptorQueue: acceptor.DrainAcceptorQueue, + OnDone: nil, // Set in Execute to capture completion. + }, + WithPivotInterval(pivotInterval), + ) + return &dynamicExecutor{coordinator: coordinator} +} + +// Execute launches the coordinator and blocks until sync completes or fails. +func (d *dynamicExecutor) Execute(ctx context.Context, summary message.Syncable) error { + done := make(chan error, 1) + + // Wire up OnDone to signal completion. + d.coordinator.callbacks.OnDone = func(err error) { + if err != nil { + log.Error("dynamic state sync completed with error", "err", err) + } else { + log.Info("dynamic state sync completed successfully") + } + done <- err + } + + d.coordinator.Start(ctx, summary) + return <-done +} + +// OnBlockAccepted enqueues the block for deferred processing and updates the sync target. +func (d *dynamicExecutor) OnBlockAccepted(b EthBlockWrapper) (bool, error) { + state := d.coordinator.CurrentState() + if state == StateExecutingBatch { + // During batch replay the block is already being executed directly + // by executeBlockOperations. Re-enqueueing here would cause an + // infinite loop (Accept -> OnEngineAccept -> enqueue -> dequeue -> Accept ...). + return false, nil + } + + if !d.enqueue(b, OpAccept) { + log.Warn("OnBlockAccepted: enqueue failed, block not deferred", + "coordinatorState", int32(state)) + return false, nil + } + + if b == nil || b.GetEthBlock() == nil { + return true, nil + } + + ethBlock := b.GetEthBlock() + target := newSyncTarget(ethBlock.Hash(), ethBlock.Root(), ethBlock.NumberU64()) + if err := d.coordinator.UpdateSyncTarget(target); err != nil { + // Block is enqueued but target update failed. + return true, fmt.Errorf("block enqueued but sync target update failed: %w", err) + } + return true, nil +} + +// OnBlockRejected enqueues the block for deferred rejection. +func (d *dynamicExecutor) OnBlockRejected(b EthBlockWrapper) (bool, error) { + if d.coordinator.CurrentState() == StateExecutingBatch { + return false, nil + } + return d.enqueue(b, OpReject), nil +} + +// OnBlockVerified enqueues the block for deferred verification. +func (d *dynamicExecutor) OnBlockVerified(b EthBlockWrapper) (bool, error) { + state := d.coordinator.CurrentState() + if state == StateExecutingBatch { + return false, nil + } + ok := d.enqueue(b, OpVerify) + if !ok { + log.Warn("OnBlockVerified: enqueue failed, block not deferred", + "coordinatorState", int32(state)) + } + return ok, nil +} + +// enqueue adds a block operation to the coordinator's queue. +func (d *dynamicExecutor) enqueue(b EthBlockWrapper, op BlockOperationType) bool { + ok := d.coordinator.AddBlockOperation(b, op) + if !ok { + if b != nil && b.GetEthBlock() != nil { + ethBlock := b.GetEthBlock() + log.Warn("could not enqueue block operation", + "hash", ethBlock.Hash(), + "height", ethBlock.NumberU64(), + "op", op.String(), + ) + } + } + return ok +} + +// CurrentState returns the coordinator's current state. +func (d *dynamicExecutor) CurrentState() State { + return d.coordinator.CurrentState() +} + +// UpdateSyncTarget updates the coordinator's sync target. +func (d *dynamicExecutor) UpdateSyncTarget(target message.Syncable) error { + return d.coordinator.UpdateSyncTarget(target) +} diff --git a/graft/evm/sync/engine/executor_dynamic_test.go b/graft/evm/sync/engine/executor_dynamic_test.go new file mode 100644 index 000000000000..bc7757dd9f64 --- /dev/null +++ b/graft/evm/sync/engine/executor_dynamic_test.go @@ -0,0 +1,195 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "context" + "errors" + "sync" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/graft/evm/message" +) + +type noopAcceptor struct{} + +func (noopAcceptor) AcceptSync(context.Context, message.Syncable) error { return nil } +func (noopAcceptor) DrainAcceptorQueue() {} + +func TestDynamicExecutor_OnBlockAccepted(t *testing.T) { + updateErr := errors.New("update failed") + tests := []struct { + name string + state State + updateErr error + wantErr error + }{ + { + name: "running updates target successfully", + state: StateRunning, + updateErr: nil, + wantErr: nil, + }, + { + name: "running returns target update failure", + state: StateRunning, + updateErr: updateErr, + wantErr: updateErr, + }, + { + name: "executing batch skips target update", + state: StateExecutingBatch, + updateErr: nil, + wantErr: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + updates := 0 + syncer := FuncSyncer{ + name: "target-tracker", + updateFn: func(message.Syncable) error { + updates++ + return tt.updateErr + }, + } + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(syncer)) + + executor := newDynamicExecutor(registry, noopAcceptor{}, 1) + executor.coordinator.state.Store(int32(tt.state)) + + deferred, err := executor.OnBlockAccepted(newMockBlock(100)) + require.ErrorIs(t, err, tt.wantErr) + + if tt.state == StateExecutingBatch { + // During batch replay, blocks are not re-enqueued to avoid + // infinite re-entrancy loops. + require.False(t, deferred) + require.Empty(t, executor.coordinator.queue.dequeueBatch()) + } else { + require.True(t, deferred) + wantUpdates := 0 + if tt.state == StateRunning { + wantUpdates = 1 + } + require.Equal(t, wantUpdates, updates) + + if tt.updateErr != nil { + require.Equal(t, StateAborted, executor.coordinator.CurrentState()) + } else { + require.Equal(t, tt.state, executor.coordinator.CurrentState()) + } + + queued := executor.coordinator.queue.dequeueBatch() + require.Len(t, queued, 1) + require.Equal(t, OpAccept, queued[0].operation) + } + }) + } +} + +func TestDynamicExecutor_FullPivotCycleWithBlockAcceptance(t *testing.T) { + // End-to-end test: blocks arrive via OnBlockAccepted while syncers are + // running, triggering a pivot. After syncers finish, batch replay + // executes the surviving blocks. + var started sync.WaitGroup + started.Add(1) + release := make(chan struct{}) + + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(NewBarrierSyncer("syncer", &started, release))) + + done := make(chan error, 1) + executor := newDynamicExecutor(registry, noopAcceptor{}, 1) + go func() { + done <- executor.Execute(t.Context(), newTestSyncTarget(100)) + }() + + started.Wait() + + // Simulate consensus accepting blocks while sync runs. + // With pivotInterval=1, every block triggers a pivot, so the final + // commit target will be 110 (the last block accepted). + blocks := make(map[uint64]*mockEthBlockWrapper) + for i := uint64(100); i <= 110; i++ { + b := newMockBlock(i) + blocks[i] = b + deferred, err := executor.OnBlockAccepted(b) + require.NoError(t, err) + require.True(t, deferred, "block %d should be deferred", i) + } + + require.Equal(t, uint64(110), executor.coordinator.getCommitTarget().Height()) + + // Release syncers to complete. + close(release) + require.NoError(t, <-done) + + require.Equal(t, StateCompleted, executor.CurrentState()) + + // Each pivot pruned blocks below the new target. With pivotInterval=1, + // the commit target advances to 110. All blocks <= 110 are pruned before + // batch replay: blocks < 110 by pivot-time pruning, block 110 (the commit + // target) by the pre-replay removeThroughHeight since FinalizeVM handles it. + requireBlocksNotReplayed(t, blocks, 100, 110) +} + +func TestDynamicExecutor_OnBlockRejectedAndVerified(t *testing.T) { + tests := []struct { + name string + state State + call func(*dynamicExecutor, EthBlockWrapper) (bool, error) + wantOp BlockOperationType + }{ + { + name: "reject is deferred", + state: StateRunning, + call: (*dynamicExecutor).OnBlockRejected, + wantOp: OpReject, + }, + { + name: "verify is deferred", + state: StateRunning, + call: (*dynamicExecutor).OnBlockVerified, + wantOp: OpVerify, + }, + { + name: "reject during batch replay is not deferred", + state: StateExecutingBatch, + call: (*dynamicExecutor).OnBlockRejected, + }, + { + name: "verify during batch replay is not deferred", + state: StateExecutingBatch, + call: (*dynamicExecutor).OnBlockVerified, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + registry := NewSyncerRegistry() + require.NoError(t, registry.Register(FuncSyncer{name: "target-tracker"})) + + executor := newDynamicExecutor(registry, noopAcceptor{}, 1) + executor.coordinator.state.Store(int32(tt.state)) + + deferred, err := tt.call(executor, newMockBlock(100)) + require.NoError(t, err) + + if tt.state == StateExecutingBatch { + require.False(t, deferred) + require.Empty(t, executor.coordinator.queue.dequeueBatch()) + } else { + require.True(t, deferred) + queued := executor.coordinator.queue.dequeueBatch() + require.Len(t, queued, 1) + require.Equal(t, tt.wantOp, queued[0].operation) + } + }) + } +} diff --git a/graft/evm/sync/engine/executor_static.go b/graft/evm/sync/engine/executor_static.go index a9eba560fcda..6a7ecd36afcb 100644 --- a/graft/evm/sync/engine/executor_static.go +++ b/graft/evm/sync/engine/executor_static.go @@ -34,3 +34,12 @@ func (e *staticExecutor) Execute(ctx context.Context, summary message.Syncable) } return e.acceptor.AcceptSync(ctx, summary) } + +// OnBlockAccepted is a no-op in static mode. +func (*staticExecutor) OnBlockAccepted(EthBlockWrapper) (bool, error) { return false, nil } + +// OnBlockRejected is a no-op in static mode. +func (*staticExecutor) OnBlockRejected(EthBlockWrapper) (bool, error) { return false, nil } + +// OnBlockVerified is a no-op in static mode. +func (*staticExecutor) OnBlockVerified(EthBlockWrapper) (bool, error) { return false, nil } diff --git a/graft/evm/sync/engine/pivot_policy.go b/graft/evm/sync/engine/pivot_policy.go new file mode 100644 index 000000000000..2a7022de6006 --- /dev/null +++ b/graft/evm/sync/engine/pivot_policy.go @@ -0,0 +1,62 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import "sync/atomic" + +// defaultPivotInterval is the default number of blocks between sync target updates. +const defaultPivotInterval = uint64(10000) + +// pivotPolicy encapsulates the logic for deciding when to forward +// a new sync target based on a fixed block-height interval. It is +// safe for concurrent use. +// +// The shouldForward -> advance sequence must be externally serialized. +// In coordinator flow this is guaranteed by Coordinator.updateMu. +type pivotPolicy struct { + interval uint64 + // nextHeight is the next height threshold at or beyond which we + // should forward an update. A value of 0 means uninitialized. + nextHeight atomic.Uint64 +} + +// newPivotPolicy creates a new pivot policy with the given interval. +// If interval is 0, defaultPivotInterval is used. +func newPivotPolicy(interval uint64) *pivotPolicy { + if interval == 0 { + interval = defaultPivotInterval + } + return &pivotPolicy{interval: interval} +} + +// shouldForward reports whether a summary at the given height should be +// forwarded, initializing the next threshold on first use. When it returns +// true, callers should follow up with advance(). +func (p *pivotPolicy) shouldForward(height uint64) bool { + if p == nil || p.interval == 0 { + return true + } + next := p.nextHeight.Load() + if next == 0 { + // Round up the initial height to the next multiple of interval. + // Ceil division: ((h + interval - 1) / interval) * interval + init := ((height + p.interval - 1) / p.interval) * p.interval + // Initialize once. If another goroutine wins, read the established value. + if !p.nextHeight.CompareAndSwap(0, init) { + next = p.nextHeight.Load() + } else { + next = init + } + } + return height >= next +} + +// advance moves the next threshold forward by one interval. Call this +// only after shouldForward has returned true and the update was issued. +func (p *pivotPolicy) advance() { + if p == nil || p.interval == 0 { + return + } + p.nextHeight.Add(p.interval) +} diff --git a/graft/evm/sync/engine/pivot_policy_test.go b/graft/evm/sync/engine/pivot_policy_test.go new file mode 100644 index 000000000000..61d4cfc47cf1 --- /dev/null +++ b/graft/evm/sync/engine/pivot_policy_test.go @@ -0,0 +1,27 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPivotPolicy(t *testing.T) { + // Zero interval uses default. + require.Equal(t, defaultPivotInterval, newPivotPolicy(0).interval) + + // Test throttling behavior. + p := newPivotPolicy(100) + + // First call at 150 initializes threshold to ceil(150/100)*100 = 200. + require.False(t, p.shouldForward(150)) // 150 < 200 + require.False(t, p.shouldForward(199)) // 199 < 200 + require.True(t, p.shouldForward(200)) // 200 >= 200 + p.advance() // threshold becomes 300 + + require.False(t, p.shouldForward(250)) // 250 < 300 + require.True(t, p.shouldForward(300)) // 300 >= 300 +} diff --git a/graft/evm/sync/engine/registry.go b/graft/evm/sync/engine/registry.go index a8d3dcd591f8..49e3f8aa36d2 100644 --- a/graft/evm/sync/engine/registry.go +++ b/graft/evm/sync/engine/registry.go @@ -97,7 +97,7 @@ func (r *SyncerRegistry) StartAsync(ctx context.Context, summary message.Syncabl log.Error("failed syncing", "name", task.name, "summary", summaryBlockHashHex, "height", blockHeight, "err", err) return fmt.Errorf("%s failed: %w", task.name, err) } - log.Info("completed successfully", "name", task.name, "summary", summaryBlockHashHex, "height", blockHeight) + log.Info("syncer goroutine exiting", "name", task.name, "summary", summaryBlockHashHex, "height", blockHeight) return nil }) @@ -106,6 +106,18 @@ func (r *SyncerRegistry) StartAsync(ctx context.Context, summary message.Syncabl return g } +// UpdateSyncTarget updates the sync target for all registered syncers. +func (r *SyncerRegistry) UpdateSyncTarget(newTarget message.Syncable) error { + for _, task := range r.syncers { + if err := task.syncer.UpdateTarget(newTarget); err != nil { + log.Error("failed updating sync target", "name", task.name, "err", err) + return err + } + log.Info("updated sync target", "name", task.name, "new_target", newTarget.GetBlockHash().Hex(), "height", newTarget.Height()) + } + return nil +} + // FinalizeAll iterates over all registered syncers and calls Finalize on those that implement the Finalizer interface. // Errors are logged but not returned to ensure best-effort cleanup of all syncers. func (r *SyncerRegistry) FinalizeAll(summary message.Syncable) { diff --git a/graft/evm/sync/engine/registry_test.go b/graft/evm/sync/engine/registry_test.go index f56ef210c42b..d9d650b4895d 100644 --- a/graft/evm/sync/engine/registry_test.go +++ b/graft/evm/sync/engine/registry_test.go @@ -41,6 +41,9 @@ func (m *mockSyncer) Sync(context.Context) error { func (m *mockSyncer) Name() string { return m.name } func (m *mockSyncer) ID() string { return m.name } +func (*mockSyncer) UpdateTarget(message.Syncable) error { + return nil +} // syncerConfig describes a test syncer setup for RunSyncerTasks table tests. type syncerConfig struct { @@ -58,8 +61,7 @@ func TestSyncerRegistry_Register(t *testing.T) { tests := []struct { name string registrations []*mockSyncer - expectedError error - expectedCount int + wantError error }{ { name: "successful registrations", @@ -67,7 +69,6 @@ func TestSyncerRegistry_Register(t *testing.T) { newMockSyncer("Syncer1", nil), newMockSyncer("Syncer2", nil), }, - expectedCount: 2, }, { name: "duplicate id registration", @@ -75,8 +76,7 @@ func TestSyncerRegistry_Register(t *testing.T) { newMockSyncer("Syncer1", nil), newMockSyncer("Syncer1", nil), }, - expectedError: errSyncerAlreadyRegistered, - expectedCount: 1, + wantError: errSyncerAlreadyRegistered, }, { name: "preserve registration order", @@ -85,7 +85,6 @@ func TestSyncerRegistry_Register(t *testing.T) { newMockSyncer("Syncer2", nil), newMockSyncer("Syncer3", nil), }, - expectedCount: 3, }, } @@ -93,6 +92,7 @@ func TestSyncerRegistry_Register(t *testing.T) { t.Run(tt.name, func(t *testing.T) { registry := NewSyncerRegistry() var errLast error + successfulRegistrations := 0 // Perform registrations. for _, reg := range tt.registrations { @@ -101,16 +101,17 @@ func TestSyncerRegistry_Register(t *testing.T) { errLast = err break } + successfulRegistrations++ } // Check error expectations. - require.ErrorIs(t, errLast, tt.expectedError) + require.ErrorIs(t, errLast, tt.wantError) // Verify registration count. - require.Len(t, registry.syncers, tt.expectedCount) + require.Len(t, registry.syncers, successfulRegistrations) // Verify registration order for successful cases. - if tt.expectedError == nil { + if tt.wantError == nil { for i, reg := range tt.registrations { require.Equal(t, reg.name, registry.syncers[i].name) require.Equal(t, reg, registry.syncers[i].syncer) @@ -123,10 +124,10 @@ func TestSyncerRegistry_Register(t *testing.T) { func TestSyncerRegistry_RunSyncerTasks(t *testing.T) { errFoo := errors.New("foo") tests := []struct { - name string - syncers []syncerConfig - expectedError error - assertState func(t *testing.T, mockSyncers []*mockSyncer) + name string + syncers []syncerConfig + wantError error + assertState func(t *testing.T, mockSyncers []*mockSyncer) }{ { name: "successful execution", @@ -145,7 +146,7 @@ func TestSyncerRegistry_RunSyncerTasks(t *testing.T) { {"Syncer1", errFoo}, {"Syncer2", nil}, }, - expectedError: errFoo, + wantError: errFoo, assertState: func(t *testing.T, mockSyncers []*mockSyncer) { // First syncer should be started and waited on (but wait failed). require.True(t, mockSyncers[0].started, "First syncer should have been started") @@ -172,7 +173,7 @@ func TestSyncerRegistry_RunSyncerTasks(t *testing.T) { err := registry.RunSyncerTasks(ctx, newTestClientSummary(t, c)) - require.ErrorIs(t, err, tt.expectedError) + require.ErrorIs(t, err, tt.wantError) // Use custom assertion function for each test case. tt.assertState(t, mockSyncers) diff --git a/graft/evm/sync/engine/server.go b/graft/evm/sync/engine/server.go index 8c13af41e8c2..fb3011cf63b2 100644 --- a/graft/evm/sync/engine/server.go +++ b/graft/evm/sync/engine/server.go @@ -36,6 +36,10 @@ type BlockChain interface { // TrieDB returns the database used for storing the state trie. TrieDB() *triedb.Database + + // DrainAcceptorQueue blocks until all pending accepted blocks have been + // fully processed by the async acceptor. + DrainAcceptorQueue() } // SummaryProvider provides state summaries for blocks. diff --git a/graft/evm/sync/engine/sync_target.go b/graft/evm/sync/engine/sync_target.go new file mode 100644 index 000000000000..c53ff7fec08f --- /dev/null +++ b/graft/evm/sync/engine/sync_target.go @@ -0,0 +1,44 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package engine + +import ( + "context" + + "github.com/ava-labs/libevm/common" + + "github.com/ava-labs/avalanchego/graft/evm/message" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/snow/engine/snowman/block" +) + +var _ message.Syncable = (*syncTarget)(nil) + +// syncTarget is a minimal implementation of [message.Syncable] used internally +// to advance the coordinator's sync target from engine-accepted blocks. +// +// NOTE: Unlike [message.BlockSyncSummary], this is not serializable and should not +// be used for network communication. Only [message.Syncable.GetBlockHash], +// [message.Syncable.GetBlockRoot], and [message.Syncable.Height] are used in practice. +// The other methods are stubs to satisfy the interface. +type syncTarget struct { + hash common.Hash + root common.Hash + height uint64 +} + +func newSyncTarget(hash common.Hash, root common.Hash, height uint64) message.Syncable { + return &syncTarget{hash: hash, root: root, height: height} +} + +func (s *syncTarget) GetBlockHash() common.Hash { return s.hash } +func (s *syncTarget) GetBlockRoot() common.Hash { return s.root } + +func (s *syncTarget) ID() ids.ID { return ids.ID(s.hash) } +func (s *syncTarget) Height() uint64 { return s.height } +func (s *syncTarget) Bytes() []byte { return s.hash.Bytes() } +func (*syncTarget) Accept(context.Context) (block.StateSyncMode, error) { + // When used internally to advance targets, we always handle dynamically. + return block.StateSyncDynamic, nil +} diff --git a/graft/evm/sync/evmstate/BUILD.bazel b/graft/evm/sync/evmstate/BUILD.bazel index 3d6b63bea745..95cd21ef4eb7 100644 --- a/graft/evm/sync/evmstate/BUILD.bazel +++ b/graft/evm/sync/evmstate/BUILD.bazel @@ -5,7 +5,8 @@ go_library( name = "evmstate", srcs = [ "firewood_syncer.go", - "state_syncer.go", + "hashdb_dynamic_syncer.go", + "hashdb_syncer.go", "sync_helpers.go", "trie_queue.go", "trie_segments.go", @@ -51,7 +52,8 @@ graft_go_test( name = "evmstate_test", srcs = [ "firewood_syncer_test.go", - "sync_test.go", + "hashdb_dynamic_syncer_test.go", + "hashdb_syncer_test.go", "trie_sync_stats_test.go", ], embed = [":evmstate"], diff --git a/graft/evm/sync/evmstate/firewood_syncer.go b/graft/evm/sync/evmstate/firewood_syncer.go index e6e757f15207..19f5ec9755bd 100644 --- a/graft/evm/sync/evmstate/firewood_syncer.go +++ b/graft/evm/sync/evmstate/firewood_syncer.go @@ -5,14 +5,13 @@ package evmstate import ( "context" - "fmt" "sync" "github.com/ava-labs/firewood-go-ethhash/ffi" "github.com/ava-labs/libevm/common" "github.com/ava-labs/avalanchego/database/merkle/firewood/syncer" - "github.com/ava-labs/avalanchego/graft/evm/sync/code" + "github.com/ava-labs/avalanchego/graft/evm/message" "github.com/ava-labs/avalanchego/graft/evm/sync/types" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/network/p2p" @@ -26,14 +25,16 @@ var ( ) type FirewoodSyncer struct { - s *merklesync.Syncer[*syncer.RangeProof, struct{}] - cancel context.CancelFunc - codeQueue *code.Queue - // finalizeOnce is initialized in the constructor to make Finalize idempotent. - finalizeOnce func() error + s *merklesync.Syncer[*syncer.RangeProof, struct{}] + cancel context.CancelFunc + + // finalizeCodeQueue guards the single call to codeQueue.Finalize(). + // Both Sync() (on success) and Finalize() (best-effort cleanup) go through + // this to avoid double-finalize errors since Queue.Finalize is not idempotent. + finalizeCodeQueue func() error } -func NewFirewoodSyncer(config syncer.Config, db *ffi.Database, target common.Hash, codeQueue *code.Queue, rpClient, cpClient *p2p.Client) (*FirewoodSyncer, error) { +func NewFirewoodSyncer(config syncer.Config, db *ffi.Database, target common.Hash, codeQueue types.CodeRequestQueue, rpClient, cpClient *p2p.Client) (*FirewoodSyncer, error) { s, err := syncer.NewEVM( config, db, @@ -46,36 +47,29 @@ func NewFirewoodSyncer(config syncer.Config, db *ffi.Database, target common.Has return nil, err } f := &FirewoodSyncer{ - s: s, - cancel: func() {}, // overwritten in Sync - codeQueue: codeQueue, + s: s, + cancel: func() {}, // overwritten in Sync + finalizeCodeQueue: sync.OnceValue(codeQueue.Finalize), } - f.finalizeOnce = sync.OnceValue(f.finish) return f, nil } +// Sync runs the firewood state syncer to completion or until the context is +// cancelled. On successful completion it finalizes the code queue so the code +// syncer can exit. func (f *FirewoodSyncer) Sync(ctx context.Context) error { ctx, f.cancel = context.WithCancel(ctx) if err := f.s.Sync(ctx); err != nil { return err } - - return f.Finalize() + return f.finalizeCodeQueue() } +// Finalize performs best-effort cleanup: cancels the sync context and finalizes +// the code queue. It is idempotent and safe to call multiple times. func (f *FirewoodSyncer) Finalize() error { - return f.finalizeOnce() -} - -// finish performs the finalization logic for the FirewoodSyncer inside a [sync.Once]. -// This is linked to the [sync.Once] in the constructor, and should not be called directly. -func (f *FirewoodSyncer) finish() error { - // Ensure the syncer stops work and the code queue closes on exit. f.cancel() - if err := f.codeQueue.Finalize(); err != nil { - return fmt.Errorf("finalizing code queue: %w", err) - } - return nil + return f.finalizeCodeQueue() } func (*FirewoodSyncer) ID() string { @@ -85,3 +79,10 @@ func (*FirewoodSyncer) ID() string { func (*FirewoodSyncer) Name() string { return "Firewood EVM State Syncer" } + +// UpdateTarget forwards the new target root to the underlying merkle syncer, +// which re-prioritizes completed work items for re-sync against the new root. +// It is thread-safe, non-blocking, and safe to call while Sync is running. +func (f *FirewoodSyncer) UpdateTarget(target message.Syncable) error { + return f.s.UpdateSyncTarget(ids.ID(target.GetBlockRoot())) +} diff --git a/graft/evm/sync/evmstate/hashdb_dynamic_syncer.go b/graft/evm/sync/evmstate/hashdb_dynamic_syncer.go new file mode 100644 index 000000000000..b27ec08ae231 --- /dev/null +++ b/graft/evm/sync/evmstate/hashdb_dynamic_syncer.go @@ -0,0 +1,155 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package evmstate + +import ( + "context" + "fmt" + + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/libevm/ethdb" + "github.com/ava-labs/libevm/log" + "github.com/ava-labs/libevm/trie" + "github.com/ava-labs/libevm/triedb" + "golang.org/x/sync/errgroup" + + "github.com/ava-labs/avalanchego/graft/evm/core/state/snapshot" + "github.com/ava-labs/avalanchego/graft/evm/message" + "github.com/ava-labs/avalanchego/graft/evm/sync/client" + "github.com/ava-labs/avalanchego/graft/evm/sync/code" + "github.com/ava-labs/avalanchego/graft/evm/sync/types" + "github.com/ava-labs/avalanchego/vms/evm/sync/customrawdb" +) + +const hashDBDynamicSyncerName = "HashDB EVM State Syncer (dynamic)" + +var _ types.PivotSession = (*hashDBPivotSession)(nil) + +// hashDBPivotSession implements types.PivotSession for EVM state sync with HashDB. +// It owns the code queue and code syncer, running both alongside the state +// syncer inside Run. On pivot, all three are shut down and rebuilt. +type hashDBPivotSession struct { + inner *HashDBSyncer + codeQueue *code.Queue + codeSyncer *code.Syncer + + // Retained for rebuilding on pivot. + syncClient client.Client + db ethdb.Database + leafsRequestSize uint16 + leafsRequestType message.LeafsRequestType + baseOpts []HashDBSyncerOption // excludes per-pivot incremental options +} + +func (s *hashDBPivotSession) Run(ctx context.Context) error { + eg, egCtx := errgroup.WithContext(ctx) + eg.Go(func() error { + log.Info("code syncer started", "root", s.inner.root) + err := s.codeSyncer.Sync(egCtx) + log.Info("code syncer finished", "root", s.inner.root, "err", err) + return err + }) + eg.Go(func() error { + log.Info("state syncer started", "root", s.inner.root) + err := s.inner.Sync(egCtx) + log.Info("state syncer finished", "root", s.inner.root, "err", err) + return err + }) + return eg.Wait() +} + +func (s *hashDBPivotSession) ShouldPivot(newRoot common.Hash) bool { + return newRoot != s.inner.root +} + +func (s *hashDBPivotSession) Rebuild(newRoot common.Hash, _ uint64) (types.PivotSession, error) { + log.Info("state syncer pivoting to new root", "oldRoot", s.inner.root, "newRoot", newRoot) + + if err := s.inner.Finalize(); err != nil { + log.Error("failed to flush in-progress batches during pivot", "err", err) + } + s.codeQueue.Shutdown() + <-snapshot.WipeSnapshot(s.db, false) + // Clear stale main trie segments from the prior interrupted session. + // The account trie is always re-synced from scratch. + customrawdb.ClearSyncSegments(s.db, newRoot) + + trieDB := triedb.NewDatabase(s.db, nil) + var skipped, registered uint64 + incrementalOpts := []HashDBSyncerOption{ + WithPreserveSegments(), + WithStorageTrieFilter(func(_ ethdb.Database, accountHash common.Hash, storageRoot common.Hash) bool { + total := skipped + registered + _, err := trie.New(trie.StorageTrieID(newRoot, storageRoot, accountHash), trieDB) + if err == nil { + skipped++ + } else { + registered++ + } + if total == 0 || (total+1)%50000 == 0 { + log.Info("storage trie filter", "skipped", skipped, "registered", registered, "total", total+1) + } + return err == nil + }), + } + return newHashDBPivotSession(s.syncClient, s.db, newRoot, s.leafsRequestSize, s.leafsRequestType, s.baseOpts, incrementalOpts) +} + +func (*hashDBPivotSession) OnSessionComplete() error { + // Code queue finalization is handled by the inner syncer's + // onMainTrieFinished callback (via WithFinalizeCodeQueue). + return nil +} + +// Finalize flushes the inner syncer's in-progress work. +func (s *hashDBPivotSession) Finalize() error { + return s.inner.Finalize() +} + +func newHashDBPivotSession(syncClient client.Client, db ethdb.Database, root common.Hash, leafsRequestSize uint16, leafsRequestType message.LeafsRequestType, baseOpts []HashDBSyncerOption, extraOpts []HashDBSyncerOption) (*hashDBPivotSession, error) { + codeQueue, err := code.NewQueue(db) + if err != nil { + return nil, fmt.Errorf("failed to create code queue: %w", err) + } + codeSyncer, err := code.NewSyncer(syncClient, db, codeQueue.CodeHashes()) + if err != nil { + return nil, fmt.Errorf("failed to create code syncer: %w", err) + } + + allOpts := make([]HashDBSyncerOption, 0, len(baseOpts)+len(extraOpts)+1) + allOpts = append(allOpts, baseOpts...) + allOpts = append(allOpts, extraOpts...) + allOpts = append(allOpts, WithFinalizeCodeQueue(codeQueue.Finalize)) + inner, err := NewHashDBSyncer(syncClient, db, root, codeQueue, leafsRequestSize, leafsRequestType, allOpts...) + if err != nil { + return nil, fmt.Errorf("failed to create state syncer for root %s: %w", root, err) + } + return &hashDBPivotSession{ + inner: inner, + codeQueue: codeQueue, + codeSyncer: codeSyncer, + syncClient: syncClient, + db: db, + leafsRequestSize: leafsRequestSize, + leafsRequestType: leafsRequestType, + baseOpts: baseOpts, + }, nil +} + +// NewHashDBDynamicSyncer creates a state syncer that supports pivoting to a new +// root mid-sync via UpdateTarget. The returned DynamicSyncer internally manages +// a code queue and code syncer per session. +func NewHashDBDynamicSyncer(syncClient client.Client, db ethdb.Database, root common.Hash, leafsRequestSize uint16, leafsRequestType message.LeafsRequestType, opts ...HashDBSyncerOption) (*types.DynamicSyncer, error) { + session, err := newHashDBPivotSession(syncClient, db, root, leafsRequestSize, leafsRequestType, opts, nil) + if err != nil { + return nil, err + } + return types.NewDynamicSyncer( + hashDBDynamicSyncerName, + StateSyncerID, + session, + root, + 0, + ), nil +} diff --git a/graft/evm/sync/evmstate/hashdb_dynamic_syncer_test.go b/graft/evm/sync/evmstate/hashdb_dynamic_syncer_test.go new file mode 100644 index 000000000000..d9a7ed731be9 --- /dev/null +++ b/graft/evm/sync/evmstate/hashdb_dynamic_syncer_test.go @@ -0,0 +1,248 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package evmstate + +import ( + "context" + "math/rand" + "sync/atomic" + "testing" + + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/libevm/core/rawdb" + "github.com/ava-labs/libevm/core/state" + "github.com/ava-labs/libevm/core/types" + "github.com/ava-labs/libevm/ethdb" + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/codec" + "github.com/ava-labs/avalanchego/graft/evm/message" + "github.com/ava-labs/avalanchego/graft/evm/message/messagetest" + "github.com/ava-labs/avalanchego/graft/evm/sync/client" + "github.com/ava-labs/avalanchego/graft/evm/sync/code" + "github.com/ava-labs/avalanchego/graft/evm/sync/handlers" + "github.com/ava-labs/avalanchego/graft/evm/sync/synctest" + + handlerstats "github.com/ava-labs/avalanchego/graft/evm/sync/handlers/stats" + synctypes "github.com/ava-labs/avalanchego/graft/evm/sync/types" +) + +type dynamicSyncTestEnv struct { + mockClient *client.TestClient + clientEthDB ethdb.Database + clientDB state.Database + serverDB state.Database +} + +func newDynamicSyncTestEnv(t *testing.T, serverDB state.Database, c codec.Manager) *dynamicSyncTestEnv { + t.Helper() + clientEthDB := rawdb.NewMemoryDatabase() + + leafsHandler := handlers.NewLeafsRequestHandler(serverDB.TrieDB(), message.StateTrieKeyLength, nil, c, handlerstats.NewNoopHandlerStats()) + codeHandler := handlers.NewCodeRequestHandler(serverDB.DiskDB(), c, handlerstats.NewNoopHandlerStats()) + mockClient := client.NewTestClient(c, leafsHandler, codeHandler, nil) + + return &dynamicSyncTestEnv{ + mockClient: mockClient, + clientEthDB: clientEthDB, + clientDB: state.NewDatabase(clientEthDB), + serverDB: serverDB, + } +} + +// runSync creates a dynamic syncer for root, runs it to completion, +// verifies DB consistency, and returns the syncer. +func (e *dynamicSyncTestEnv) runSync(t *testing.T, root common.Hash, leafReqType message.LeafsRequestType) *synctypes.DynamicSyncer { + t.Helper() + stateSyncer, err := NewHashDBDynamicSyncer( + e.mockClient, e.clientEthDB, root, + testRequestSize, leafReqType, WithBatchSize(1000), + ) + require.NoError(t, err) + + require.NoError(t, stateSyncer.Sync(t.Context())) + synctest.AssertDBConsistency(t, root, e.clientDB, e.serverDB) + + return stateSyncer +} + +func TestDynamicSync_CompletesWithoutPivot(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 250, nil) + + env := newDynamicSyncTestEnv(t, serverDB, c) + env.runSync(t, root, leafReqType) + }) +} + +func TestDynamicSync_WithCodeAndStorage(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 500) + + env := newDynamicSyncTestEnv(t, serverDB, c) + env.runSync(t, root, leafReqType) + }) +} + +func TestDynamicSync_PivotMidSync(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root1, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 500) + root2, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, root1, 200) + require.NotEqual(t, root1, root2) + + env := newDynamicSyncTestEnv(t, serverDB, c) + + stateSyncer, err := NewHashDBDynamicSyncer( + env.mockClient, env.clientEthDB, root1, + testRequestSize, leafReqType, WithBatchSize(1000), + ) + require.NoError(t, err) + + var pivotTriggered atomic.Bool + env.mockClient.GetLeafsIntercept = func(_ message.LeafsRequest, resp message.LeafsResponse) (message.LeafsResponse, error) { + if pivotTriggered.CompareAndSwap(false, true) { + _ = stateSyncer.UpdateTarget(&synctest.SyncTarget{BlockRoot: root2, BlockHeight: 200}) + } + return resp, nil + } + + require.NoError(t, stateSyncer.Sync(t.Context())) + require.True(t, pivotTriggered.Load(), "pivot should have been triggered") + + synctest.AssertDBConsistency(t, root2, env.clientDB, serverDB) + }) +} + +func TestDynamicSync_UpdateTarget_StaleIgnored(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 50, nil) + + env := newDynamicSyncTestEnv(t, serverDB, c) + ds := env.runSync(t, root, leafReqType) + + require.NoError(t, ds.UpdateTarget(&synctest.SyncTarget{ + BlockRoot: common.HexToHash("0xdead"), + BlockHeight: 0, + })) + }) +} + +func TestDynamicSync_UpdateTarget_SameRootDifferentHeight(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 50, nil) + + env := newDynamicSyncTestEnv(t, serverDB, c) + ds := env.runSync(t, root, leafReqType) + + prevHeight := ds.TargetHeight() + newHeight := prevHeight + 100 + require.NoError(t, ds.UpdateTarget(&synctest.SyncTarget{ + BlockRoot: ds.DesiredRoot(), + BlockHeight: newHeight, + })) + require.Equal(t, newHeight, ds.TargetHeight()) + }) +} + +func TestDynamicSync_UpdateTarget_StaticNoop(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 50, nil) + clientEthDB := rawdb.NewMemoryDatabase() + + leafsHandler := handlers.NewLeafsRequestHandler(serverDB.TrieDB(), message.StateTrieKeyLength, nil, c, handlerstats.NewNoopHandlerStats()) + codeHandler := handlers.NewCodeRequestHandler(serverDB.DiskDB(), c, handlerstats.NewNoopHandlerStats()) + mockClient := client.NewTestClient(c, leafsHandler, codeHandler, nil) + + codeQueue, err := code.NewQueue(clientEthDB) + require.NoError(t, err) + + stateSyncer, err := NewHashDBSyncer(mockClient, clientEthDB, root, codeQueue, testRequestSize, leafReqType, WithFinalizeCodeQueue(codeQueue.Finalize)) + require.NoError(t, err) + + require.NoError(t, stateSyncer.UpdateTarget(&synctest.SyncTarget{ + BlockRoot: common.HexToHash("0xbeef"), + BlockHeight: 999, + })) + }) +} + +func TestDynamicSync_ContextCancellation(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 2000) + + env := newDynamicSyncTestEnv(t, serverDB, c) + + stateSyncer, err := NewHashDBDynamicSyncer( + env.mockClient, env.clientEthDB, root, + testRequestSize, leafReqType, + ) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(t.Context()) + t.Cleanup(cancel) + + env.mockClient.GetLeafsIntercept = func(_ message.LeafsRequest, resp message.LeafsResponse) (message.LeafsResponse, error) { + cancel() + return resp, nil + } + + err = stateSyncer.Sync(ctx) + require.ErrorIs(t, err, context.Canceled) + }) +} + +// TestDynamicSync_CodeSyncedAfterPivot verifies that contract code is +// correctly synced after a pivot, even though the code syncer is restarted. +func TestDynamicSync_CodeSyncedAfterPivot(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root1, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 100) + root2, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, root1, 50) + + env := newDynamicSyncTestEnv(t, serverDB, c) + + stateSyncer, err := NewHashDBDynamicSyncer( + env.mockClient, env.clientEthDB, root1, + testRequestSize, leafReqType, WithBatchSize(1000), + ) + require.NoError(t, err) + + var pivotTriggered atomic.Bool + env.mockClient.GetLeafsIntercept = func(_ message.LeafsRequest, resp message.LeafsResponse) (message.LeafsResponse, error) { + if pivotTriggered.CompareAndSwap(false, true) { + _ = stateSyncer.UpdateTarget(&synctest.SyncTarget{BlockRoot: root2, BlockHeight: 200}) + } + return resp, nil + } + + require.NoError(t, stateSyncer.Sync(t.Context())) + + // AssertDBConsistency verifies trie nodes, account state, storage, + // and code are all consistent between client and server for root2. + synctest.AssertDBConsistency(t, root2, env.clientDB, serverDB) + }) +} diff --git a/graft/evm/sync/evmstate/state_syncer.go b/graft/evm/sync/evmstate/hashdb_syncer.go similarity index 58% rename from graft/evm/sync/evmstate/state_syncer.go rename to graft/evm/sync/evmstate/hashdb_syncer.go index 932f38fc7e1e..e7da6fb4876b 100644 --- a/graft/evm/sync/evmstate/state_syncer.go +++ b/graft/evm/sync/evmstate/hashdb_syncer.go @@ -20,7 +20,6 @@ import ( "github.com/ava-labs/avalanchego/graft/evm/core/state/snapshot" "github.com/ava-labs/avalanchego/graft/evm/message" "github.com/ava-labs/avalanchego/graft/evm/sync/client" - "github.com/ava-labs/avalanchego/graft/evm/sync/code" "github.com/ava-labs/avalanchego/graft/evm/sync/leaf" "github.com/ava-labs/avalanchego/graft/evm/sync/types" ) @@ -30,26 +29,28 @@ const ( numStorageTrieSegments = 4 numMainTrieSegments = 8 defaultNumWorkers = 8 + + // StateSyncerID is the stable identifier for the EVM state syncer. + StateSyncerID = "state_evm_state_sync" ) var ( - _ types.Syncer = (*stateSync)(nil) + _ types.Syncer = (*HashDBSyncer)(nil) errCodeRequestQueueRequired = errors.New("code request queue is required") errLeafsRequestSizeRequired = errors.New("leafs request size must be > 0") ) -// stateSync keeps the state of the entire state sync operation. -type stateSync struct { - db ethdb.Database // database we are syncing - root common.Hash // root of the EVM state we are syncing to - trieDB *triedb.Database // trieDB on top of db we are syncing. used to restore any existing tries. - snapshot snapshot.SnapshotIterable // used to access the database we are syncing as a snapshot. - batchSize uint // write batches when they reach this size - leafsRequestType message.LeafsRequestType // type of leafs request to use (coreth or subnet-evm wire format) - segments chan leaf.SyncTask // channel of tasks to sync - syncer *leaf.CallbackSyncer // performs the sync, looping over each task's range and invoking specified callbacks - codeQueue *code.Queue // queue that manages the asynchronous download and batching of code hashes - trieQueue *trieQueue // manages a persistent list of storage tries we need to sync and any segments that are created for them +// HashDBSyncer keeps the state of a single-root state sync session. +type HashDBSyncer struct { + db ethdb.Database // database we are syncing + root common.Hash // root of the EVM state we are syncing to + trieDB *triedb.Database // trieDB on top of db we are syncing. used to restore any existing tries. + snapshot snapshot.SnapshotIterable // used to access the database we are syncing as a snapshot. + batchSize uint // write batches when they reach this size + segments chan leaf.SyncTask // channel of tasks to sync + syncer *leaf.CallbackSyncer // performs the sync, looping over each task's range and invoking specified callbacks + codeQueue types.CodeRequestQueue // queue that manages the asynchronous download and batching of code hashes + trieQueue *trieQueue // manages a persistent list of storage tries we need to sync and any segments that are created for them // track the main account trie specifically to commit its root at the end of the operation mainTrie *trieToSync @@ -67,78 +68,109 @@ type stateSync struct { // syncCompleted is set to true when the sync completes successfully. // This provides an explicit success signal for Finalize(). syncCompleted atomic.Bool + + // finalizeCodeQueue is called when the main trie completes. + // No-op by default - set via WithFinalizeCodeQueue. + finalizeCodeQueue func() error + + preserveSegments bool // keep segment markers across root changes + storageTrieFilter StorageTrieFilter // skip unchanged storage tries on pivot } -// SyncerOption configures the state syncer via functional options. -type SyncerOption = options.Option[stateSync] +// HashDBSyncerOption configures the state syncer via functional options. +type HashDBSyncerOption = options.Option[HashDBSyncer] // WithBatchSize sets the database batch size for writes. -func WithBatchSize(n uint) SyncerOption { - return options.Func[stateSync](func(s *stateSync) { +func WithBatchSize(n uint) HashDBSyncerOption { + return options.Func[HashDBSyncer](func(s *HashDBSyncer) { if n > 0 { s.batchSize = n } }) } -func NewSyncer(client client.Client, db ethdb.Database, root common.Hash, codeQueue *code.Queue, leafsRequestSize uint16, leafsRequestType message.LeafsRequestType, opts ...SyncerOption) (types.Syncer, error) { +// WithFinalizeCodeQueue sets the callback invoked when the main trie completes. +// Static callers use this to finalize the code queue. If not set, it defaults +// to a no-op (used by the dynamic wrapper which manages code queue lifecycle). +func WithFinalizeCodeQueue(fn func() error) HashDBSyncerOption { + return options.Func[HashDBSyncer](func(s *HashDBSyncer) { + s.finalizeCodeQueue = fn + }) +} + +// WithPreserveSegments keeps segment markers across root changes so +// unchanged storage tries resume from their prior sync position. +func WithPreserveSegments() HashDBSyncerOption { + return options.Func[HashDBSyncer](func(s *HashDBSyncer) { + s.preserveSegments = true + }) +} + +// StorageTrieFilter returns true to skip syncing a storage trie. +type StorageTrieFilter func(db ethdb.Database, accountHash common.Hash, storageRoot common.Hash) bool + +// WithStorageTrieFilter sets a filter to skip unchanged storage tries. +func WithStorageTrieFilter(fn StorageTrieFilter) HashDBSyncerOption { + return options.Func[HashDBSyncer](func(s *HashDBSyncer) { + s.storageTrieFilter = fn + }) +} + +// NewHashDBSyncer creates a single-session state syncer for the given root. +func NewHashDBSyncer(syncClient client.Client, db ethdb.Database, root common.Hash, codeQueue types.CodeRequestQueue, leafsRequestSize uint16, leafsRequestType message.LeafsRequestType, opts ...HashDBSyncerOption) (*HashDBSyncer, error) { if leafsRequestSize == 0 { return nil, errLeafsRequestSizeRequired } - - // Construct with defaults, then apply options directly to stateSync. - ss := &stateSync{ - db: db, - root: root, - trieDB: triedb.NewDatabase(db, nil), - snapshot: snapshot.NewDiskLayer(db), - stats: newTrieSyncStats(), - triesInProgress: make(map[common.Hash]*trieToSync), - leafsRequestType: leafsRequestType, + if codeQueue == nil { + return nil, errCodeRequestQueueRequired + } + ss := &HashDBSyncer{ + db: db, + root: root, + trieDB: triedb.NewDatabase(db, nil), + snapshot: snapshot.NewDiskLayer(db), + codeQueue: codeQueue, + batchSize: ethdb.IdealBatchSize, + finalizeCodeQueue: func() error { return nil }, + stats: newTrieSyncStats(), + triesInProgress: make(map[common.Hash]*trieToSync), // [triesInProgressSem] is used to keep the number of tries syncing // less than or equal to [defaultNumWorkers]. triesInProgressSem: make(chan struct{}, defaultNumWorkers), - // Each [trieToSync] will have a maximum of [numSegments] segments. + // Each [trieToSync] will have a maximum of [numStorageTrieSegments] segments. // We set the capacity of [segments] such that [defaultNumWorkers] // storage tries can sync concurrently. segments: make(chan leaf.SyncTask, defaultNumWorkers*numStorageTrieSegments), mainTrieDone: make(chan struct{}), storageTriesDone: make(chan struct{}), - batchSize: ethdb.IdealBatchSize, } // Apply functional options. options.ApplyTo(ss, opts...) - ss.syncer = leaf.NewCallbackSyncer(client, ss.segments, &leaf.SyncerConfig{ + ss.syncer = leaf.NewCallbackSyncer(syncClient, ss.segments, &leaf.SyncerConfig{ RequestSize: leafsRequestSize, NumWorkers: defaultNumWorkers, - LeafsRequestType: ss.leafsRequestType, + LeafsRequestType: leafsRequestType, }) - if codeQueue == nil { - return nil, errCodeRequestQueueRequired - } - - ss.codeQueue = codeQueue - ss.trieQueue = NewTrieQueue(db) - if err := ss.trieQueue.clearIfRootDoesNotMatch(ss.root); err != nil { + if err := ss.trieQueue.clearIfRootDoesNotMatch(root, ss.preserveSegments); err != nil { return nil, err } - var err error // create a trieToSync for the main trie and mark it as in progress. - ss.mainTrie, err = NewTrieToSync(ss, ss.root, common.Hash{}, NewMainTrieTask(ss)) + var err error + ss.mainTrie, err = NewTrieToSync(ss, root, common.Hash{}, NewMainTrieTask(ss)) if err != nil { return nil, err } - ss.addTrieInProgress(ss.root, ss.mainTrie) + ss.addTrieInProgress(root, ss.mainTrie) // Use context.Background() for initialization since we don't have a sync context yet. - // This is safe because startSyncing is called before Sync() starts. + // This is safe because startSyncing only enqueues segments. if err := ss.mainTrie.startSyncing(context.Background()); err != nil { return nil, err } @@ -146,17 +178,18 @@ func NewSyncer(client client.Client, db ethdb.Database, root common.Hash, codeQu } // Name returns the human-readable name for this sync task. -func (*stateSync) Name() string { - return "EVM State Syncer" +func (*HashDBSyncer) Name() string { + return "HashDB EVM State Syncer (static)" } // ID returns the stable identifier for this sync task. -func (*stateSync) ID() string { - return "state_evm_state_sync" +func (*HashDBSyncer) ID() string { + return StateSyncerID } -func (t *stateSync) Sync(ctx context.Context) error { - // Start the leaf syncer and storage trie producer. +// Sync runs the single-session sync to completion. +func (t *HashDBSyncer) Sync(ctx context.Context) error { + log.Info("HashDBSyncer.Sync starting", "root", t.root, "hasFilter", t.storageTrieFilter != nil, "preserveSegments", t.preserveSegments) eg, egCtx := errgroup.WithContext(ctx) eg.Go(func() error { @@ -166,18 +199,18 @@ func (t *stateSync) Sync(ctx context.Context) error { return t.onSyncComplete() }) - // Note: code fetcher should already be initialized. eg.Go(func() error { return t.storageTrieProducer(egCtx) }) - // The errgroup wait will take care of returning the first error that occurs, or returning - // nil if syncing finish without an error. return eg.Wait() } +// UpdateTarget is a no-op for the static syncer. +func (*HashDBSyncer) UpdateTarget(message.Syncable) error { return nil } + // onStorageTrieFinished is called after a storage trie finishes syncing. -func (t *stateSync) onStorageTrieFinished(root common.Hash) error { +func (t *HashDBSyncer) onStorageTrieFinished(root common.Hash) error { <-t.triesInProgressSem // allow another trie to start (release the semaphore) // mark the storage trie as done in trieQueue if err := t.trieQueue.StorageTrieDone(root); err != nil { @@ -200,8 +233,8 @@ func (t *stateSync) onStorageTrieFinished(root common.Hash) error { } // onMainTrieFinished is called after the main trie finishes syncing. -func (t *stateSync) onMainTrieFinished() error { - if err := t.codeQueue.Finalize(); err != nil { +func (t *HashDBSyncer) onMainTrieFinished() error { + if err := t.finalizeCodeQueue(); err != nil { return err } @@ -210,6 +243,7 @@ func (t *stateSync) onMainTrieFinished() error { if err != nil { return err } + log.Info("main trie finished, starting storage tries", "numStorageTries", numStorageTries, "root", t.root) t.stats.setTriesRemaining(numStorageTries) // mark the main trie done @@ -222,7 +256,7 @@ func (t *stateSync) onMainTrieFinished() error { // all storage tries have completed syncing. We persist // [mainTrie]'s batch last to avoid persisting the state // root before all storage tries are done syncing. -func (t *stateSync) onSyncComplete() error { +func (t *HashDBSyncer) onSyncComplete() error { if err := t.mainTrie.batch.Write(); err != nil { return err } @@ -235,7 +269,7 @@ func (t *stateSync) onSyncComplete() error { // with their corresponding accounts to the segments channel. // returns nil if all storage tries were iterated and an // error if one occurred or the context expired. -func (t *stateSync) storageTrieProducer(ctx context.Context) error { +func (t *HashDBSyncer) storageTrieProducer(ctx context.Context) error { // Wait for main trie to finish to ensure when this thread terminates // there are no more storage tries to sync select { @@ -293,7 +327,7 @@ func (t *stateSync) storageTrieProducer(ctx context.Context) error { } // addTrieInProgress tracks the root as being currently synced. -func (t *stateSync) addTrieInProgress(root common.Hash, trie *trieToSync) { +func (t *HashDBSyncer) addTrieInProgress(root common.Hash, trie *trieToSync) { t.lock.Lock() defer t.lock.Unlock() @@ -302,7 +336,7 @@ func (t *stateSync) addTrieInProgress(root common.Hash, trie *trieToSync) { // removeTrieInProgress removes root from the set of tracked tries in progress // and returns the number of tries in progress after the removal. -func (t *stateSync) removeTrieInProgress(root common.Hash) (int, error) { +func (t *HashDBSyncer) removeTrieInProgress(root common.Hash) (int, error) { t.lock.Lock() defer t.lock.Unlock() @@ -315,7 +349,7 @@ func (t *stateSync) removeTrieInProgress(root common.Hash) (int, error) { } // Finalize flushes in-progress trie batches to disk to preserve progress on failure. -func (t *stateSync) Finalize() error { +func (t *HashDBSyncer) Finalize() error { if t.syncCompleted.Load() { return nil } diff --git a/graft/evm/sync/evmstate/hashdb_syncer_test.go b/graft/evm/sync/evmstate/hashdb_syncer_test.go new file mode 100644 index 000000000000..f354846c1846 --- /dev/null +++ b/graft/evm/sync/evmstate/hashdb_syncer_test.go @@ -0,0 +1,480 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package evmstate + +import ( + "context" + "errors" + "math/rand" + "sync/atomic" + "testing" + + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/libevm/core/rawdb" + "github.com/ava-labs/libevm/core/state" + "github.com/ava-labs/libevm/core/types" + "github.com/ava-labs/libevm/crypto" + "github.com/ava-labs/libevm/ethdb" + "github.com/ava-labs/libevm/rlp" + "github.com/ava-labs/libevm/trie" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" + + "github.com/ava-labs/avalanchego/codec" + "github.com/ava-labs/avalanchego/graft/evm/core/state/snapshot" + "github.com/ava-labs/avalanchego/graft/evm/message" + "github.com/ava-labs/avalanchego/graft/evm/message/messagetest" + "github.com/ava-labs/avalanchego/graft/evm/sync/client" + "github.com/ava-labs/avalanchego/graft/evm/sync/code" + "github.com/ava-labs/avalanchego/graft/evm/sync/handlers" + "github.com/ava-labs/avalanchego/graft/evm/sync/synctest" + "github.com/ava-labs/avalanchego/vms/evm/sync/customrawdb" + + handlerstats "github.com/ava-labs/avalanchego/graft/evm/sync/handlers/stats" +) + +const testRequestSize = 1024 + +var errInterrupted = errors.New("interrupted sync") + +// syncTestConfig holds optional configuration for testSyncWithConfig. +type syncTestConfig struct { + ctx context.Context + wantError error + leafsIntercept func(message.LeafsRequest, message.LeafsResponse) (message.LeafsResponse, error) + codeIntercept func([]common.Hash, [][]byte) ([][]byte, error) +} + +// testSync runs a full static sync and asserts DB consistency on success. +func testSync(t *testing.T, clientDB, serverDB state.Database, root common.Hash, c codec.Manager, leafReqType message.LeafsRequestType) { + t.Helper() + testSyncWithConfig(t, clientDB, serverDB, root, c, leafReqType, nil) +} + +// testSyncWithConfig runs a static sync with optional intercepts, context, and +// expected error. If cfg is nil, defaults are used. +func testSyncWithConfig(t *testing.T, clientDB, serverDB state.Database, root common.Hash, c codec.Manager, leafReqType message.LeafsRequestType, cfg *syncTestConfig) { + t.Helper() + ctx := t.Context() + var wantError error + if cfg != nil { + if cfg.ctx != nil { + ctx = cfg.ctx + } + wantError = cfg.wantError + } + + clientEthDB, ok := clientDB.DiskDB().(ethdb.Database) + require.Truef(t, ok, "%T is not an ethdb.Database", clientDB.DiskDB()) + + leafsHandler := handlers.NewLeafsRequestHandler(serverDB.TrieDB(), message.StateTrieKeyLength, nil, c, handlerstats.NewNoopHandlerStats()) + codeHandler := handlers.NewCodeRequestHandler(serverDB.DiskDB(), c, handlerstats.NewNoopHandlerStats()) + mockClient := client.NewTestClient(c, leafsHandler, codeHandler, nil) + if cfg != nil { + mockClient.GetLeafsIntercept = cfg.leafsIntercept + mockClient.GetCodeIntercept = cfg.codeIntercept + } + + queue, err := code.NewQueue(clientEthDB) + require.NoError(t, err, "failed to create code queue") + + codeSyncer, err := code.NewSyncer(mockClient, clientEthDB, queue.CodeHashes()) + require.NoError(t, err, "failed to create code syncer") + + stateSyncer, err := NewHashDBSyncer( + mockClient, clientEthDB, root, queue, + testRequestSize, leafReqType, + WithFinalizeCodeQueue(queue.Finalize), + WithBatchSize(1000), // Use a lower batch size in order to get test coverage of batches being written early. + ) + require.NoError(t, err, "failed to create state syncer") + + eg, egCtx := errgroup.WithContext(ctx) + eg.Go(func() error { return codeSyncer.Sync(egCtx) }) + eg.Go(func() error { return stateSyncer.Sync(egCtx) }) + + err = eg.Wait() + require.ErrorIs(t, err, wantError, "unexpected error during sync") + + if wantError != nil { + return + } + + synctest.AssertDBConsistency(t, root, clientDB, serverDB) +} + +func TestSync(t *testing.T) { + t.Run("accounts", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 250, nil) + testSync(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType) + }) + }) + + t.Run("accounts with code", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 250, func(t *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + if index%3 == 0 { + codeBytes := make([]byte, 256) + _, err := r.Read(codeBytes) + require.NoError(t, err) + codeHash := crypto.Keccak256Hash(codeBytes) + rawdb.WriteCode(serverDB.DiskDB(), codeHash, codeBytes) + account.CodeHash = codeHash[:] + } + return account + }) + testSync(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType) + }) + }) + + t.Run("accounts with code and storage", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 250) + testSync(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType) + }) + }) + + t.Run("accounts with storage", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 250, func(t *testing.T, i int, addr common.Address, account types.StateAccount, storageTr state.Trie) types.StateAccount { + if i%5 == 0 { + synctest.FillStorageForAccount(t, r, 16, addr, storageTr) + } + return account + }) + testSync(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType) + }) + }) + + t.Run("accounts with overlapping storage", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, 250, 3) + testSync(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType) + }) + }) + + t.Run("failed to fetch leafs", func(t *testing.T) { + t.Parallel() + clientErr := errors.New("dummy client error") + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 10, nil) + testSyncWithConfig(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: clientErr, + leafsIntercept: func(_ message.LeafsRequest, _ message.LeafsResponse) (message.LeafsResponse, error) { + return message.LeafsResponse{}, clientErr + }, + }) + }) + }) + + t.Run("failed to fetch code", func(t *testing.T) { + t.Parallel() + clientErr := errors.New("dummy client error") + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 10) + testSyncWithConfig(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: clientErr, + codeIntercept: func(_ []common.Hash, _ [][]byte) ([][]byte, error) { + return nil, clientErr + }, + }) + }) + }) +} + +func TestCancelSync(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 2000) + + ctx, cancel := context.WithCancel(t.Context()) + t.Cleanup(cancel) + + testSyncWithConfig(t, state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root, c, leafReqType, &syncTestConfig{ + ctx: ctx, + wantError: context.Canceled, + leafsIntercept: func(_ message.LeafsRequest, lr message.LeafsResponse) (message.LeafsResponse, error) { + cancel() + return lr, nil + }, + }) + }) +} + +// interruptLeafsIntercept provides the parameters to the getLeafsIntercept +// function which returns [errInterrupted] after passing through [numRequests] +// leafs requests for [root]. +type interruptLeafsIntercept struct { + numRequests atomic.Uint32 + interruptAfter uint32 + root common.Hash +} + +// getLeafsIntercept can be passed to testClient and returns an unmodified +// response for the first [numRequest] requests for leafs from [root]. +// After that, all requests for leafs from [root] return [errInterrupted]. +func (i *interruptLeafsIntercept) getLeafsIntercept(request message.LeafsRequest, response message.LeafsResponse) (message.LeafsResponse, error) { + if request.RootHash() == i.root { + if numRequests := i.numRequests.Add(1); numRequests > i.interruptAfter { + return message.LeafsResponse{}, errInterrupted + } + } + return response, nil +} + +func TestResumeSyncAccountsTrieInterrupted(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + root, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, 2000, 3) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + intercept := &interruptLeafsIntercept{root: root, interruptAfter: 1} + + testSyncWithConfig(t, clientDB, serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: errInterrupted, + leafsIntercept: intercept.getLeafsIntercept, + }) + + require.GreaterOrEqual(t, intercept.numRequests.Load(), uint32(2)) + + testSync(t, clientDB, serverDB, root, c, leafReqType) + }) +} + +func TestResumeSyncLargeStorageTrieInterrupted(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + + largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 2000, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + if index == 10 { + account.Root = largeStorageRoot + } + return account + }) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + intercept := &interruptLeafsIntercept{root: largeStorageRoot, interruptAfter: 1} + + testSyncWithConfig(t, clientDB, serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: errInterrupted, + leafsIntercept: intercept.getLeafsIntercept, + }) + + testSync(t, clientDB, serverDB, root, c, leafReqType) + }) +} + +func TestResumeSyncToNewRootAfterLargeStorageTrieInterrupted(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + + largeStorageRoot1, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) + largeStorageRoot2, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) + root1, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 2000, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + if index == 10 { + account.Root = largeStorageRoot1 + } + return account + }) + root2, _ := synctest.FillAccounts(t, r, serverDB, root1, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + if index == 20 { + account.Root = largeStorageRoot2 + } + return account + }) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + intercept := &interruptLeafsIntercept{root: largeStorageRoot1, interruptAfter: 1} + + testSyncWithConfig(t, clientDB, serverDB, root1, c, leafReqType, &syncTestConfig{ + wantError: errInterrupted, + leafsIntercept: intercept.getLeafsIntercept, + }) + + <-snapshot.WipeSnapshot(clientDB.DiskDB(), false) + + testSync(t, clientDB, serverDB, root2, c, leafReqType) + }) +} + +func TestResumeSyncLargeStorageTrieWithConsecutiveDuplicatesInterrupted(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + + largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + // Set the root for 2 successive accounts + if index == 10 || index == 11 { + account.Root = largeStorageRoot + } + return account + }) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + intercept := &interruptLeafsIntercept{root: largeStorageRoot, interruptAfter: 1} + + testSyncWithConfig(t, clientDB, serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: errInterrupted, + leafsIntercept: intercept.getLeafsIntercept, + }) + + testSync(t, clientDB, serverDB, root, c, leafReqType) + }) +} + +func TestResumeSyncLargeStorageTrieWithSpreadOutDuplicatesInterrupted(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + + largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) + root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { + if index == 10 || index == 90 { + account.Root = largeStorageRoot + } + return account + }) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + intercept := &interruptLeafsIntercept{root: largeStorageRoot, interruptAfter: 1} + + testSyncWithConfig(t, clientDB, serverDB, root, c, leafReqType, &syncTestConfig{ + wantError: errInterrupted, + leafsIntercept: intercept.getLeafsIntercept, + }) + + testSync(t, clientDB, serverDB, root, c, leafReqType) + }) +} + +func TestResyncNewRootAfterDeletes(t *testing.T) { + t.Run("delete code", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + testSyncerSyncsToNewRoot(t, deleteAllCode, c, leafReqType) + }) + }) + t.Run("delete intermediate storage nodes", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + testSyncerSyncsToNewRoot(t, corruptStorageTries, c, leafReqType) + }) + }) + t.Run("delete intermediate account trie nodes", func(t *testing.T) { + t.Parallel() + messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { + testSyncerSyncsToNewRoot(t, corruptAccountTrie, c, leafReqType) + }) + }) +} + +func testSyncerSyncsToNewRoot(t *testing.T, deleteBetweenSyncs func(*testing.T, common.Hash, state.Database), c codec.Manager, leafReqType message.LeafsRequestType) { + r := rand.New(rand.NewSource(1)) + clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) + + root1, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, 1000, 3) + root2, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, root1, 1000, 3) + + // Sync to root1. + testSync(t, clientDB, serverDB, root1, c, leafReqType) + + // Wipe snapshot and corrupt state between syncs. + <-snapshot.WipeSnapshot(clientDB.DiskDB(), false) + deleteBetweenSyncs(t, root1, clientDB) + + // Re-sync to root2. + testSync(t, clientDB, serverDB, root2, c, leafReqType) +} + +// deleteAllCode removes all code and code-to-fetch markers from the client DB. +func deleteAllCode(t *testing.T, _ common.Hash, clientDB state.Database) { + it := clientDB.DiskDB().NewIterator(rawdb.CodePrefix, nil) + defer it.Release() + for it.Next() { + if len(it.Key()) != len(rawdb.CodePrefix)+common.HashLength { + continue + } + require.NoError(t, clientDB.DiskDB().Delete(it.Key()), "failed to delete code hash %x", it.Key()[len(rawdb.CodePrefix):]) + } + require.NoError(t, it.Error(), "error iterating over code hashes") + + codeToFetchIt := customrawdb.NewCodeToFetchIterator(clientDB.DiskDB()) + defer codeToFetchIt.Release() + for codeToFetchIt.Next() { + codeHash := common.BytesToHash(codeToFetchIt.Key()[len(customrawdb.CodeToFetchPrefix):]) + require.NoError(t, customrawdb.DeleteCodeToFetch(clientDB.DiskDB(), codeHash), "failed to delete code-to-fetch marker for hash %x", codeHash) + } + require.NoError(t, codeToFetchIt.Error(), "error iterating over code-to-fetch markers") +} + +// corruptStorageTries deletes intermediate nodes from every other storage trie. +func corruptStorageTries(t *testing.T, root common.Hash, clientDB state.Database) { + clientTrieDB := clientDB.TrieDB() + tr, err := trie.New(trie.TrieID(root), clientTrieDB) + require.NoError(t, err, "failed to create trie for root %s", root) + nodeIt, err := tr.NodeIterator(nil) + require.NoError(t, err, "failed to create node iterator for root %s", root) + it := trie.NewIterator(nodeIt) + accountsWithStorage := 0 + + // keep track of storage tries we delete trie nodes from + // so we don't try to do it again if another account has + // the same storage root. + corruptedStorageRoots := make(map[common.Hash]struct{}) + for it.Next() { + var acc types.StateAccount + require.NoError(t, rlp.DecodeBytes(it.Value, &acc), "failed to decode account at key %x", it.Key) + if acc.Root == types.EmptyRootHash { + continue + } + if _, found := corruptedStorageRoots[acc.Root]; found { + continue + } + accountsWithStorage++ + if accountsWithStorage%2 != 0 { + continue + } + corruptedStorageRoots[acc.Root] = struct{}{} + tr, err := trie.New(trie.TrieID(acc.Root), clientTrieDB) + require.NoError(t, err, "failed to create trie for root %s", acc.Root) + synctest.CorruptTrie(t, clientDB.DiskDB(), tr, 2) + } + require.NoError(t, it.Err, "error iterating over trie nodes") +} + +// corruptAccountTrie deletes intermediate nodes from the account trie. +func corruptAccountTrie(t *testing.T, root common.Hash, clientDB state.Database) { + clientTrieDB := clientDB.TrieDB() + tr, err := trie.New(trie.TrieID(root), clientTrieDB) + require.NoError(t, err, "failed to create trie for root %s", root) + synctest.CorruptTrie(t, clientDB.DiskDB(), tr, 5) +} diff --git a/graft/evm/sync/evmstate/sync_test.go b/graft/evm/sync/evmstate/sync_test.go deleted file mode 100644 index 90d511bc3367..000000000000 --- a/graft/evm/sync/evmstate/sync_test.go +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package evmstate - -import ( - "bytes" - "context" - "errors" - "math/rand" - "sync/atomic" - "testing" - - "github.com/ava-labs/libevm/common" - "github.com/ava-labs/libevm/core/rawdb" - "github.com/ava-labs/libevm/core/state" - "github.com/ava-labs/libevm/core/types" - "github.com/ava-labs/libevm/crypto" - "github.com/ava-labs/libevm/ethdb" - "github.com/ava-labs/libevm/rlp" - "github.com/ava-labs/libevm/trie" - "github.com/stretchr/testify/require" - "golang.org/x/sync/errgroup" - - "github.com/ava-labs/avalanchego/codec" - "github.com/ava-labs/avalanchego/graft/evm/core/state/snapshot" - "github.com/ava-labs/avalanchego/graft/evm/message" - "github.com/ava-labs/avalanchego/graft/evm/message/messagetest" - "github.com/ava-labs/avalanchego/graft/evm/sync/client" - "github.com/ava-labs/avalanchego/graft/evm/sync/code" - "github.com/ava-labs/avalanchego/graft/evm/sync/handlers" - "github.com/ava-labs/avalanchego/graft/evm/sync/synctest" - "github.com/ava-labs/avalanchego/vms/evm/sync/customrawdb" - - handlerstats "github.com/ava-labs/avalanchego/graft/evm/sync/handlers/stats" -) - -const testRequestSize = 1024 - -var errInterrupted = errors.New("interrupted sync") - -type syncTest struct { - ctx context.Context - prepareForTest func(t *testing.T, r *rand.Rand) (clientDB state.Database, serverDB state.Database, syncRoot common.Hash) - expectedError error - GetLeafsIntercept func(message.LeafsRequest, message.LeafsResponse) (message.LeafsResponse, error) - GetCodeIntercept func([]common.Hash, [][]byte) ([][]byte, error) -} - -func testSync(t *testing.T, test syncTest, c codec.Manager, leafReqType message.LeafsRequestType) { - t.Helper() - ctx := t.Context() - if test.ctx != nil { - ctx = test.ctx - } - r := rand.New(rand.NewSource(1)) - clientDB, serverDB, root := test.prepareForTest(t, r) - clientEthDB, ok := clientDB.DiskDB().(ethdb.Database) - require.Truef(t, ok, "%T is not an ethdb.Database", clientDB.DiskDB()) - - leafsRequestHandler := handlers.NewLeafsRequestHandler(serverDB.TrieDB(), message.StateTrieKeyLength, nil, c, handlerstats.NewNoopHandlerStats()) - codeRequestHandler := handlers.NewCodeRequestHandler(serverDB.DiskDB(), c, handlerstats.NewNoopHandlerStats()) - mockClient := client.NewTestClient(c, leafsRequestHandler, codeRequestHandler, nil) - // Set intercept functions for the mock client - mockClient.GetLeafsIntercept = test.GetLeafsIntercept - mockClient.GetCodeIntercept = test.GetCodeIntercept - - // Create the code fetcher. - fetcher, err := code.NewQueue(clientEthDB) - require.NoError(t, err, "failed to create code fetcher") - - // Create the consumer code syncer. - codeSyncer, err := code.NewSyncer(mockClient, clientEthDB, fetcher.CodeHashes()) - require.NoError(t, err, "failed to create code syncer") - - // Create the state syncer. - stateSyncer, err := NewSyncer( - mockClient, - clientEthDB, - root, - fetcher, - testRequestSize, - leafReqType, - WithBatchSize(1000), // Use a lower batch size in order to get test coverage of batches being written early. - ) - require.NoError(t, err, "failed to create state syncer") - - // Run both syncers concurrently and wait for the first error. - eg, egCtx := errgroup.WithContext(ctx) - eg.Go(func() error { return codeSyncer.Sync(egCtx) }) - eg.Go(func() error { return stateSyncer.Sync(egCtx) }) - - err = eg.Wait() - require.ErrorIs(t, err, test.expectedError, "unexpected error during sync") - - // Only assert database consistency if the sync was expected to succeed. - if test.expectedError != nil { - return - } - - assertDBConsistency(t, root, clientDB, serverDB) -} - -// testSyncResumes tests a series of syncTests work as expected, invoking a callback function after each -// successive step. -func testSyncResumes(t *testing.T, steps []syncTest, stepCallback func(), c codec.Manager, leafReqType message.LeafsRequestType) { - for _, test := range steps { - testSync(t, test, c, leafReqType) - stepCallback() - } -} - -func TestSimpleSyncCases(t *testing.T) { - var ( - numAccounts = 250 - numAccountsSmall = 10 - clientErr = errors.New("dummy client error") - ) - tests := map[string]syncTest{ - "accounts": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, numAccounts, nil) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - }, - "accounts with code": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, numAccounts, func(t *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - if index%3 == 0 { - codeBytes := make([]byte, 256) - _, err := r.Read(codeBytes) - require.NoError(t, err, "error reading random code bytes") - - codeHash := crypto.Keccak256Hash(codeBytes) - rawdb.WriteCode(serverDB.DiskDB(), codeHash, codeBytes) - account.CodeHash = codeHash[:] - } - return account - }) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - }, - "accounts with code and storage": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, numAccounts) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - }, - "accounts with storage": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, numAccounts, func(t *testing.T, i int, addr common.Address, account types.StateAccount, storageTr state.Trie) types.StateAccount { - if i%5 == 0 { - synctest.FillStorageForAccount(t, r, 16, addr, storageTr) - } - return account - }) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - }, - "accounts with overlapping storage": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, numAccounts, 3) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - }, - "failed to fetch leafs": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, numAccountsSmall, nil) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - GetLeafsIntercept: func(_ message.LeafsRequest, _ message.LeafsResponse) (message.LeafsResponse, error) { - return message.LeafsResponse{}, clientErr - }, - expectedError: clientErr, - }, - "failed to fetch code": { - prepareForTest: func(t *testing.T, r *rand.Rand) (state.Database, state.Database, common.Hash) { - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, numAccountsSmall) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - GetCodeIntercept: func(_ []common.Hash, _ [][]byte) ([][]byte, error) { - return nil, clientErr - }, - expectedError: clientErr, - }, - } - for name, test := range tests { - t.Run(name, func(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - testSync(t, test, c, leafReqType) - }) - }) - } -} - -func TestCancelSync(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - ctx, cancel := context.WithCancel(t.Context()) - t.Cleanup(cancel) - - testSync(t, syncTest{ - ctx: ctx, - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - // Create trie with 2000 accounts (more than one leaf request) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccountsWithStorageAndCode(t, r, serverDB, types.EmptyRootHash, 2000) - return state.NewDatabase(rawdb.NewMemoryDatabase()), serverDB, root - }, - expectedError: context.Canceled, - GetLeafsIntercept: func(_ message.LeafsRequest, lr message.LeafsResponse) (message.LeafsResponse, error) { - cancel() - return lr, nil - }, - }, c, leafReqType) - }) -} - -// interruptLeafsIntercept provides the parameters to the getLeafsIntercept -// function which returns [errInterrupted] after passing through [numRequests] -// leafs requests for [root]. -type interruptLeafsIntercept struct { - numRequests atomic.Uint32 - interruptAfter uint32 - root common.Hash -} - -// getLeafsIntercept can be passed to testClient and returns an unmodified -// response for the first [numRequest] requests for leafs from [root]. -// After that, all requests for leafs from [root] return [errInterrupted]. -func (i *interruptLeafsIntercept) getLeafsIntercept(request message.LeafsRequest, response message.LeafsResponse) (message.LeafsResponse, error) { - if request.RootHash() == i.root { - if numRequests := i.numRequests.Add(1); numRequests > i.interruptAfter { - return message.LeafsResponse{}, errInterrupted - } - } - return response, nil -} - -func TestResumeSyncAccountsTrieInterrupted(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - root, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, 2000, 3) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - intercept := &interruptLeafsIntercept{ - root: root, - interruptAfter: 1, - } - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - expectedError: errInterrupted, - GetLeafsIntercept: intercept.getLeafsIntercept, - }, c, leafReqType) - - require.GreaterOrEqual(t, intercept.numRequests.Load(), uint32(2)) - - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - }, c, leafReqType) - }) -} - -func TestResumeSyncLargeStorageTrieInterrupted(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - - largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 2000, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - // Set the root for a single account - if index == 10 { - account.Root = largeStorageRoot - } - return account - }) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - intercept := &interruptLeafsIntercept{ - root: largeStorageRoot, - interruptAfter: 1, - } - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - expectedError: errInterrupted, - GetLeafsIntercept: intercept.getLeafsIntercept, - }, c, leafReqType) - - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - }, c, leafReqType) - }) -} - -func TestResumeSyncToNewRootAfterLargeStorageTrieInterrupted(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - - largeStorageRoot1, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) - largeStorageRoot2, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) - root1, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 2000, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - // Set the root for a single account - if index == 10 { - account.Root = largeStorageRoot1 - } - return account - }) - root2, _ := synctest.FillAccounts(t, r, serverDB, root1, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - if index == 20 { - account.Root = largeStorageRoot2 - } - return account - }) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - intercept := &interruptLeafsIntercept{ - root: largeStorageRoot1, - interruptAfter: 1, - } - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root1 - }, - expectedError: errInterrupted, - GetLeafsIntercept: intercept.getLeafsIntercept, - }, c, leafReqType) - - <-snapshot.WipeSnapshot(clientDB.DiskDB(), false) - - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root2 - }, - }, c, leafReqType) - }) -} - -func TestResumeSyncLargeStorageTrieWithConsecutiveDuplicatesInterrupted(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - - largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - // Set the root for 2 successive accounts - if index == 10 || index == 11 { - account.Root = largeStorageRoot - } - return account - }) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - intercept := &interruptLeafsIntercept{ - root: largeStorageRoot, - interruptAfter: 1, - } - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - expectedError: errInterrupted, - GetLeafsIntercept: intercept.getLeafsIntercept, - }, c, leafReqType) - - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - }, c, leafReqType) - }) -} - -func TestResumeSyncLargeStorageTrieWithSpreadOutDuplicatesInterrupted(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - - largeStorageRoot, _, _ := synctest.GenerateIndependentTrie(t, r, serverDB.TrieDB(), 2000, common.HashLength) - root, _ := synctest.FillAccounts(t, r, serverDB, common.Hash{}, 100, func(_ *testing.T, index int, _ common.Address, account types.StateAccount, _ state.Trie) types.StateAccount { - if index == 10 || index == 90 { - account.Root = largeStorageRoot - } - return account - }) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - intercept := &interruptLeafsIntercept{ - root: largeStorageRoot, - interruptAfter: 1, - } - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - expectedError: errInterrupted, - GetLeafsIntercept: intercept.getLeafsIntercept, - }, c, leafReqType) - - testSync(t, syncTest{ - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root - }, - }, c, leafReqType) - }) -} - -func TestResyncNewRootAfterDeletes(t *testing.T) { - for name, test := range map[string]struct { - deleteBetweenSyncs func(*testing.T, common.Hash, state.Database) - }{ - "delete code": { - deleteBetweenSyncs: func(t *testing.T, _ common.Hash, clientDB state.Database) { - // delete code - it := clientDB.DiskDB().NewIterator(rawdb.CodePrefix, nil) - defer it.Release() - for it.Next() { - if len(it.Key()) != len(rawdb.CodePrefix)+common.HashLength { - continue - } - require.NoError(t, clientDB.DiskDB().Delete(it.Key()), "failed to delete code hash %x", it.Key()[len(rawdb.CodePrefix):]) - } - require.NoError(t, it.Error(), "error iterating over code hashes") - - // delete code-to-fetch markers to avoid syncer trying to fetch old code - codeToFetchIt := customrawdb.NewCodeToFetchIterator(clientDB.DiskDB()) - defer codeToFetchIt.Release() - for codeToFetchIt.Next() { - codeHash := common.BytesToHash(codeToFetchIt.Key()[len(customrawdb.CodeToFetchPrefix):]) - require.NoError(t, customrawdb.DeleteCodeToFetch(clientDB.DiskDB(), codeHash), "failed to delete code-to-fetch marker for hash %x", codeHash) - } - require.NoError(t, codeToFetchIt.Error(), "error iterating over code-to-fetch markers") - }, - }, - "delete intermediate storage nodes": { - deleteBetweenSyncs: func(t *testing.T, root common.Hash, clientDB state.Database) { - clientTrieDB := clientDB.TrieDB() - tr, err := trie.New(trie.TrieID(root), clientTrieDB) - require.NoError(t, err, "failed to create trie for root %s", root) - nodeIt, err := tr.NodeIterator(nil) - require.NoError(t, err, "failed to create node iterator for root %s", root) - it := trie.NewIterator(nodeIt) - accountsWithStorage := 0 - - // keep track of storage tries we delete trie nodes from - // so we don't try to do it again if another account has - // the same storage root. - corruptedStorageRoots := make(map[common.Hash]struct{}) - for it.Next() { - var acc types.StateAccount - require.NoError(t, rlp.DecodeBytes(it.Value, &acc), "failed to decode account at key %x", it.Key) - if acc.Root == types.EmptyRootHash { - continue - } - if _, found := corruptedStorageRoots[acc.Root]; found { - // avoid trying to delete nodes from a trie we have already deleted nodes from - continue - } - accountsWithStorage++ - if accountsWithStorage%2 != 0 { - continue - } - corruptedStorageRoots[acc.Root] = struct{}{} - tr, err := trie.New(trie.TrieID(acc.Root), clientTrieDB) - require.NoError(t, err, "failed to create trie for root %s", acc.Root) - synctest.CorruptTrie(t, clientDB.DiskDB(), tr, 2) - } - require.NoError(t, it.Err, "error iterating over trie nodes") - }, - }, - "delete intermediate account trie nodes": { - deleteBetweenSyncs: func(t *testing.T, root common.Hash, clientDB state.Database) { - clientTrieDB := clientDB.TrieDB() - tr, err := trie.New(trie.TrieID(root), clientTrieDB) - require.NoError(t, err, "failed to create trie for root %s", root) - synctest.CorruptTrie(t, clientDB.DiskDB(), tr, 5) - }, - }, - } { - t.Run(name, func(t *testing.T) { - t.Parallel() - messagetest.ForEachCodec(t, func(c codec.Manager, leafReqType message.LeafsRequestType) { - testSyncerSyncsToNewRoot(t, test.deleteBetweenSyncs, c, leafReqType) - }) - }) - } -} - -func testSyncerSyncsToNewRoot(t *testing.T, deleteBetweenSyncs func(*testing.T, common.Hash, state.Database), c codec.Manager, leafReqType message.LeafsRequestType) { - r := rand.New(rand.NewSource(1)) - clientDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - serverDB := state.NewDatabase(rawdb.NewMemoryDatabase()) - - root1, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, common.Hash{}, 1000, 3) - root2, _ := synctest.FillAccountsWithOverlappingStorage(t, r, serverDB, root1, 1000, 3) - - called := false - - testSyncResumes(t, []syncTest{ - { - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root1 - }, - }, - { - prepareForTest: func(*testing.T, *rand.Rand) (state.Database, state.Database, common.Hash) { - return clientDB, serverDB, root2 - }, - }, - }, func() { - // Only perform the delete stage once - if called { - return - } - called = true - // delete snapshot first since this is not the responsibility of the EVM State Syncer - <-snapshot.WipeSnapshot(clientDB.DiskDB(), false) - - deleteBetweenSyncs(t, root1, clientDB) - }, c, leafReqType) -} - -// assertDBConsistency checks [serverTrieDB] and [clientTrieDB] have the same EVM state trie at [root], -// and that [clientTrieDB.DiskDB] has corresponding account & snapshot values. -// Also verifies any code referenced by the EVM state is present in [clientTrieDB] and the hash is correct. -func assertDBConsistency(t testing.TB, root common.Hash, clientDB, serverDB state.Database) { - numSnapshotAccounts := 0 - accountIt := customrawdb.NewAccountSnapshotsIterator(clientDB.DiskDB()) - defer accountIt.Release() - for accountIt.Next() { - if !bytes.HasPrefix(accountIt.Key(), rawdb.SnapshotAccountPrefix) || len(accountIt.Key()) != len(rawdb.SnapshotAccountPrefix)+common.HashLength { - continue - } - numSnapshotAccounts++ - } - require.NoError(t, accountIt.Error(), "error iterating over account snapshots") - trieAccountLeaves := 0 - - synctest.AssertTrieConsistency(t, root, serverDB.TrieDB(), clientDB.TrieDB(), func(key, val []byte) error { - trieAccountLeaves++ - accHash := common.BytesToHash(key) - var acc types.StateAccount - if err := rlp.DecodeBytes(val, &acc); err != nil { - return err - } - // check snapshot consistency - snapshotVal := rawdb.ReadAccountSnapshot(clientDB.DiskDB(), accHash) - expectedSnapshotVal := types.SlimAccountRLP(acc) - require.Equal(t, expectedSnapshotVal, snapshotVal) - - // check code consistency - if !bytes.Equal(acc.CodeHash, types.EmptyCodeHash[:]) { - codeHash := common.BytesToHash(acc.CodeHash) - code := rawdb.ReadCode(clientDB.DiskDB(), codeHash) - actualHash := crypto.Keccak256Hash(code) - require.NotEmpty(t, code) - require.Equal(t, codeHash, actualHash) - } - if acc.Root == types.EmptyRootHash { - return nil - } - - storageIt := rawdb.IterateStorageSnapshots(clientDB.DiskDB(), accHash) - defer storageIt.Release() - - snapshotStorageKeysCount := 0 - for storageIt.Next() { - snapshotStorageKeysCount++ - } - - storageTrieLeavesCount := 0 - - // check storage trie and storage snapshot consistency - synctest.AssertTrieConsistency(t, acc.Root, serverDB.TrieDB(), clientDB.TrieDB(), func(key, val []byte) error { - storageTrieLeavesCount++ - snapshotVal := rawdb.ReadStorageSnapshot(clientDB.DiskDB(), accHash, common.BytesToHash(key)) - require.Equal(t, val, snapshotVal) - return nil - }) - - require.Equal(t, storageTrieLeavesCount, snapshotStorageKeysCount) - return nil - }) - - // Check that the number of accounts in the snapshot matches the number of leaves in the accounts trie - require.Equal(t, trieAccountLeaves, numSnapshotAccounts) -} diff --git a/graft/evm/sync/evmstate/trie_queue.go b/graft/evm/sync/evmstate/trie_queue.go index b1d63c5e0008..14563f504c16 100644 --- a/graft/evm/sync/evmstate/trie_queue.go +++ b/graft/evm/sync/evmstate/trie_queue.go @@ -27,9 +27,10 @@ func NewTrieQueue(db ethdb.Database) *trieQueue { } } -// clearIfRootDoesNotMatch clears progress and segment markers if -// the persisted root does not match the root we are syncing to. -func (t *trieQueue) clearIfRootDoesNotMatch(root common.Hash) error { +// clearIfRootDoesNotMatch clears sync progress when the root changes. +// When preserveSegments is true, segment markers are kept so unchanged +// storage tries can resume. +func (t *trieQueue) clearIfRootDoesNotMatch(root common.Hash, preserveSegments bool) error { persistedRoot, err := customrawdb.ReadSyncRoot(t.db) // If no sync root exists, treat it as empty hash (no previous sync). switch { @@ -39,14 +40,22 @@ func (t *trieQueue) clearIfRootDoesNotMatch(root common.Hash) error { return err } - if persistedRoot != (common.Hash{}) && persistedRoot != root { - // if not resuming, clear all progress markers - if err := customrawdb.ClearAllSyncStorageTries(t.db); err != nil { - return err - } - if err := customrawdb.ClearAllSyncSegments(t.db); err != nil { - return err - } + if persistedRoot == (common.Hash{}) || persistedRoot == root { + return customrawdb.WriteSyncRoot(t.db, root) + } + if err := customrawdb.ClearAllSyncStorageTries(t.db); err != nil { + return err + } + switch { + case preserveSegments: + // Only clear the old main trie segments. Storage trie segments + // are keyed by their own root and stay valid for unchanged tries. + err = customrawdb.ClearSyncSegments(t.db, persistedRoot) + default: + err = customrawdb.ClearAllSyncSegments(t.db) + } + if err != nil { + return err } return customrawdb.WriteSyncRoot(t.db, root) diff --git a/graft/evm/sync/evmstate/trie_segments.go b/graft/evm/sync/evmstate/trie_segments.go index b7b06e63fe02..7e2f079882e6 100644 --- a/graft/evm/sync/evmstate/trie_segments.go +++ b/graft/evm/sync/evmstate/trie_segments.go @@ -54,7 +54,7 @@ type trieToSync struct { // We keep a pointer to the overall sync operation, // used to add segments to the work queue and to // update the eta. - sync *stateSync + sync *HashDBSyncer // task implements the syncTask interface with methods // containing logic specific to the main trie or storage @@ -64,7 +64,7 @@ type trieToSync struct { } // NewTrieToSync initializes a trieToSync and restores any previously started segments. -func NewTrieToSync(sync *stateSync, root common.Hash, account common.Hash, syncTask syncTask) (*trieToSync, error) { +func NewTrieToSync(sync *HashDBSyncer, root common.Hash, account common.Hash, syncTask syncTask) (*trieToSync, error) { batch := sync.db.NewBatch() // TODO: migrate state sync to use database schemes. writeFn := func(path []byte, hash common.Hash, blob []byte) { rawdb.WriteTrieNode(batch, account, path, hash, blob, rawdb.HashScheme) diff --git a/graft/evm/sync/evmstate/trie_sync_tasks.go b/graft/evm/sync/evmstate/trie_sync_tasks.go index dacd29c53b24..7dfc51ce9c69 100644 --- a/graft/evm/sync/evmstate/trie_sync_tasks.go +++ b/graft/evm/sync/evmstate/trie_sync_tasks.go @@ -37,10 +37,10 @@ type syncTask interface { } type mainTrieTask struct { - sync *stateSync + sync *HashDBSyncer } -func NewMainTrieTask(sync *stateSync) syncTask { +func NewMainTrieTask(sync *HashDBSyncer) syncTask { return &mainTrieTask{ sync: sync, } @@ -76,8 +76,15 @@ func (m *mainTrieTask) OnLeafs(ctx context.Context, db ethdb.KeyValueWriter, key // check if this account has storage root that we need to fetch if acc.Root != (common.Hash{}) && acc.Root != types.EmptyRootHash { - if err := m.sync.trieQueue.RegisterStorageTrie(acc.Root, accountHash); err != nil { - return err + hasFilter := m.sync.storageTrieFilter != nil + skip := hasFilter && m.sync.storageTrieFilter(m.sync.db, accountHash, acc.Root) + if i == 0 { + log.Info("OnLeafs first account with storage", "hasFilter", hasFilter, "skip", skip, "accountHash", accountHash, "storageRoot", acc.Root) + } + if !skip { + if err := m.sync.trieQueue.RegisterStorageTrie(acc.Root, accountHash); err != nil { + return err + } } } @@ -93,12 +100,12 @@ func (m *mainTrieTask) OnLeafs(ctx context.Context, db ethdb.KeyValueWriter, key } type storageTrieTask struct { - sync *stateSync + sync *HashDBSyncer root common.Hash accounts []common.Hash } -func NewStorageTrieTask(sync *stateSync, root common.Hash, accounts []common.Hash) syncTask { +func NewStorageTrieTask(sync *HashDBSyncer, root common.Hash, accounts []common.Hash) syncTask { return &storageTrieTask{ sync: sync, root: root, diff --git a/graft/evm/sync/handlers/block_request_test.go b/graft/evm/sync/handlers/block_request_test.go index a79c2d754eb6..1d24e435ee8c 100644 --- a/graft/evm/sync/handlers/block_request_test.go +++ b/graft/evm/sync/handlers/block_request_test.go @@ -31,7 +31,7 @@ type blockRequestTest struct { startBlockHeight uint64 requestedParents uint16 - expectedBlocks int + wantBlocks int expectNilResponse bool requireResponse func(t testing.TB, stats *statstest.TestHandlerStats, b []byte) } @@ -79,7 +79,7 @@ func executeBlockRequestTest(t testing.TB, test blockRequestTest, blocks []*type var response message.BlockResponse _, err = c.Unmarshal(responseBytes, &response) require.NoError(t, err) - require.Len(t, response.Blocks, test.expectedBlocks) + require.Len(t, response.Blocks, test.wantBlocks) for _, blockBytes := range response.Blocks { block := new(types.Block) @@ -101,19 +101,19 @@ func TestBlockRequestHandler(t *testing.T) { name: "handler_returns_blocks_as_requested", startBlockIndex: 64, requestedParents: 32, - expectedBlocks: 32, + wantBlocks: 32, }, { name: "handler_caps_blocks_parent_limit", startBlockIndex: 95, requestedParents: 96, - expectedBlocks: 64, + wantBlocks: 64, }, { name: "handler_handles_genesis", startBlockIndex: 0, requestedParents: 64, - expectedBlocks: 1, + wantBlocks: 1, }, { name: "handler_unknown_block", @@ -158,19 +158,19 @@ func TestBlockRequestHandlerLargeBlocks(t *testing.T) { name: "handler_returns_blocks_as_requested", startBlockIndex: 64, requestedParents: 10, - expectedBlocks: 10, + wantBlocks: 10, }, { name: "handler_caps_blocks_size_limit", startBlockIndex: 64, requestedParents: 16, - expectedBlocks: 15, + wantBlocks: 15, }, { name: "handler_caps_blocks_size_limit_on_first_block", startBlockIndex: 32, requestedParents: 10, - expectedBlocks: 1, + wantBlocks: 1, }, } for _, test := range tests { diff --git a/graft/evm/sync/handlers/code_request_test.go b/graft/evm/sync/handlers/code_request_test.go index 14b31af3743a..fcc75d419955 100644 --- a/graft/evm/sync/handlers/code_request_test.go +++ b/graft/evm/sync/handlers/code_request_test.go @@ -40,11 +40,11 @@ func TestCodeRequestHandler(t *testing.T) { testHandlerStats := &statstest.TestHandlerStats{} tests := map[string]struct { - setup func() (request message.CodeRequest, expectedCodeResponse [][]byte) + setup func() (request message.CodeRequest, wantCodeResponse [][]byte) verifyStats func(t *testing.T) }{ "normal": { - setup: func() (request message.CodeRequest, expectedCodeResponse [][]byte) { + setup: func() (request message.CodeRequest, wantCodeResponse [][]byte) { return message.CodeRequest{ Hashes: []common.Hash{codeHash}, }, [][]byte{codeBytes} @@ -55,7 +55,7 @@ func TestCodeRequestHandler(t *testing.T) { }, }, "duplicate hashes": { - setup: func() (request message.CodeRequest, expectedCodeResponse [][]byte) { + setup: func() (request message.CodeRequest, wantCodeResponse [][]byte) { return message.CodeRequest{ Hashes: []common.Hash{codeHash, codeHash}, }, nil @@ -65,7 +65,7 @@ func TestCodeRequestHandler(t *testing.T) { }, }, "too many hashes": { - setup: func() (request message.CodeRequest, expectedCodeResponse [][]byte) { + setup: func() (request message.CodeRequest, wantCodeResponse [][]byte) { return message.CodeRequest{ Hashes: []common.Hash{{1}, {2}, {3}, {4}, {5}, {6}}, }, nil @@ -75,7 +75,7 @@ func TestCodeRequestHandler(t *testing.T) { }, }, "max size code handled": { - setup: func() (request message.CodeRequest, expectedCodeResponse [][]byte) { + setup: func() (request message.CodeRequest, wantCodeResponse [][]byte) { return message.CodeRequest{ Hashes: []common.Hash{maxSizeCodeHash}, }, [][]byte{maxSizeCodeBytes} @@ -92,20 +92,20 @@ func TestCodeRequestHandler(t *testing.T) { for name, test := range tests { testHandlerStats.Reset() t.Run(name, func(t *testing.T) { - request, expectedResponse := test.setup() + request, wantResponse := test.setup() responseBytes, err := codeRequestHandler.OnCodeRequest(t.Context(), ids.GenerateTestNodeID(), 1, request) require.NoError(t, err) // If the expected response is empty, require that the handler returns an empty response and return early. - if len(expectedResponse) == 0 { + if len(wantResponse) == 0 { require.Empty(t, responseBytes, "expected response to be empty") return } var response message.CodeResponse _, err = c.Unmarshal(responseBytes, &response) require.NoError(t, err) - require.Len(t, response.Data, len(expectedResponse)) - for i, code := range expectedResponse { + require.Len(t, response.Data, len(wantResponse)) + for i, code := range wantResponse { require.Equal(t, code, response.Data[i], "code bytes mismatch at index %d", i) } test.verifyStats(t) diff --git a/graft/evm/sync/synctest/BUILD.bazel b/graft/evm/sync/synctest/BUILD.bazel index f184ee4be146..0f224f46ba0d 100644 --- a/graft/evm/sync/synctest/BUILD.bazel +++ b/graft/evm/sync/synctest/BUILD.bazel @@ -4,13 +4,18 @@ go_library( name = "synctest", srcs = [ "blocks.go", + "sync_target.go", "trie.go", ], importpath = "github.com/ava-labs/avalanchego/graft/evm/sync/synctest", visibility = ["//visibility:public"], deps = [ + "//graft/evm/message", "//graft/evm/utils/utilstest", + "//ids", + "//snow/engine/snowman/block", "//utils/wrappers", + "//vms/evm/sync/customrawdb", "@com_github_ava_labs_libevm//common", "@com_github_ava_labs_libevm//core/rawdb", "@com_github_ava_labs_libevm//core/state", @@ -19,6 +24,7 @@ go_library( "@com_github_ava_labs_libevm//ethdb", "@com_github_ava_labs_libevm//libevm/stateconf", "@com_github_ava_labs_libevm//params", + "@com_github_ava_labs_libevm//rlp", "@com_github_ava_labs_libevm//trie", "@com_github_ava_labs_libevm//trie/trienode", "@com_github_ava_labs_libevm//triedb", diff --git a/graft/evm/sync/synctest/pivot_session.go b/graft/evm/sync/synctest/pivot_session.go new file mode 100644 index 000000000000..76e0cfe30318 --- /dev/null +++ b/graft/evm/sync/synctest/pivot_session.go @@ -0,0 +1,25 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package synctest + +import ( + "context" + + "github.com/ava-labs/libevm/common" + + "github.com/ava-labs/avalanchego/graft/evm/sync/types" +) + +var _ types.PivotSession = (*PivotSession)(nil) + +// PivotSession is a no-op types.PivotSession for testing DynamicSyncer +// without a real sync backend. +type PivotSession struct{} + +func (*PivotSession) Run(context.Context) error { return nil } +func (*PivotSession) Rebuild(common.Hash, uint64) (types.PivotSession, error) { + return &PivotSession{}, nil +} +func (*PivotSession) ShouldPivot(common.Hash) bool { return true } +func (*PivotSession) OnSessionComplete() error { return nil } diff --git a/graft/evm/sync/synctest/sync_target.go b/graft/evm/sync/synctest/sync_target.go new file mode 100644 index 000000000000..7f9d6a0081e4 --- /dev/null +++ b/graft/evm/sync/synctest/sync_target.go @@ -0,0 +1,31 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package synctest + +import ( + "context" + + "github.com/ava-labs/libevm/common" + + "github.com/ava-labs/avalanchego/graft/evm/message" + "github.com/ava-labs/avalanchego/ids" + + snowblock "github.com/ava-labs/avalanchego/snow/engine/snowman/block" +) + +var _ message.Syncable = (*SyncTarget)(nil) + +// SyncTarget is a minimal message.Syncable implementation for tests. +type SyncTarget struct { + BlockHash common.Hash + BlockRoot common.Hash + BlockHeight uint64 +} + +func (s *SyncTarget) GetBlockHash() common.Hash { return s.BlockHash } +func (s *SyncTarget) GetBlockRoot() common.Hash { return s.BlockRoot } +func (s *SyncTarget) ID() ids.ID { return ids.ID(s.BlockHash) } +func (s *SyncTarget) Height() uint64 { return s.BlockHeight } +func (s *SyncTarget) Bytes() []byte { return s.BlockHash.Bytes() } +func (*SyncTarget) Accept(context.Context) (snowblock.StateSyncMode, error) { return 0, nil } diff --git a/graft/evm/sync/synctest/trie.go b/graft/evm/sync/synctest/trie.go index afbe86f2cd28..6e3e3111f5ea 100644 --- a/graft/evm/sync/synctest/trie.go +++ b/graft/evm/sync/synctest/trie.go @@ -4,6 +4,7 @@ package synctest import ( + "bytes" "encoding/binary" "math/rand" "testing" @@ -15,6 +16,7 @@ import ( "github.com/ava-labs/libevm/crypto" "github.com/ava-labs/libevm/ethdb" "github.com/ava-labs/libevm/libevm/stateconf" + "github.com/ava-labs/libevm/rlp" "github.com/ava-labs/libevm/trie" "github.com/ava-labs/libevm/trie/trienode" "github.com/ava-labs/libevm/triedb" @@ -23,6 +25,7 @@ import ( "github.com/ava-labs/avalanchego/graft/evm/utils/utilstest" "github.com/ava-labs/avalanchego/utils/wrappers" + "github.com/ava-labs/avalanchego/vms/evm/sync/customrawdb" ) // FillAccountsWithOverlappingStorage adds [numAccounts] randomly generated accounts to the secure trie at [root] @@ -107,6 +110,7 @@ func FillIndependentTrie(t *testing.T, r *rand.Rand, start, numKeys int, keySize // // This is only safe for HashDB or PathDB, since Firewood doesn't store trie nodes individually. func AssertTrieConsistency(t testing.TB, root common.Hash, a, b *triedb.Database, onLeaf func(key, val []byte) error) { + t.Helper() trieA, err := trie.New(trie.TrieID(root), a) require.NoError(t, err) trieB, err := trie.New(trie.TrieID(root), b) @@ -155,6 +159,102 @@ func CorruptTrie(t *testing.T, diskdb ethdb.Batcher, tr *trie.Trie, n int) { require.NoError(t, batch.Write()) } +// AssertDBConsistency checks that clientDB and serverDB have the same EVM state trie at root, +// that clientDB has matching account and storage snapshots, and that all referenced code is +// present with correct hashes. +// +// This is only safe for HashDB, since it relies on trie node iteration. +func AssertDBConsistency(t testing.TB, root common.Hash, clientDB, serverDB state.Database) { + t.Helper() + numSnapshotAccounts := countSnapshotAccounts(t, clientDB) + trieAccountLeaves := 0 + + assertAccountLeaf := func(key, val []byte) error { + trieAccountLeaves++ + accHash := common.BytesToHash(key) + acc, err := decodeAndCheckAccountSnapshot(t, clientDB, accHash, val) + if err != nil { + return err + } + checkCodeConsistency(t, clientDB, acc) + checkStorageConsistency(t, clientDB, serverDB, accHash, acc) + return nil + } + AssertTrieConsistency(t, root, serverDB.TrieDB(), clientDB.TrieDB(), assertAccountLeaf) + + require.Equal(t, trieAccountLeaves, numSnapshotAccounts, "snapshot account count must match trie leaf count") +} + +// countSnapshotAccounts returns the number of account snapshots in the database. +func countSnapshotAccounts(t testing.TB, db state.Database) int { + t.Helper() + count := 0 + it := customrawdb.NewAccountSnapshotsIterator(db.DiskDB()) + defer it.Release() + for it.Next() { + if !bytes.HasPrefix(it.Key(), rawdb.SnapshotAccountPrefix) || len(it.Key()) != len(rawdb.SnapshotAccountPrefix)+common.HashLength { + continue + } + count++ + } + require.NoError(t, it.Error(), "error iterating over account snapshots") + return count +} + +// decodeAndCheckAccountSnapshot decodes an RLP-encoded account and verifies +// the client DB has a matching account snapshot entry. +func decodeAndCheckAccountSnapshot(t testing.TB, clientDB state.Database, accHash common.Hash, val []byte) (types.StateAccount, error) { + t.Helper() + var acc types.StateAccount + if err := rlp.DecodeBytes(val, &acc); err != nil { + return acc, err + } + snapshotVal := rawdb.ReadAccountSnapshot(clientDB.DiskDB(), accHash) + wantSnapshotVal := types.SlimAccountRLP(acc) + require.Equal(t, wantSnapshotVal, snapshotVal) + return acc, nil +} + +// checkCodeConsistency verifies that any code referenced by the account is +// present in the client DB and has the correct hash. +func checkCodeConsistency(t testing.TB, clientDB state.Database, acc types.StateAccount) { + t.Helper() + if bytes.Equal(acc.CodeHash, types.EmptyCodeHash[:]) { + return + } + codeHash := common.BytesToHash(acc.CodeHash) + code := rawdb.ReadCode(clientDB.DiskDB(), codeHash) + require.NotEmpty(t, code) + require.Equal(t, codeHash, crypto.Keccak256Hash(code)) +} + +// checkStorageConsistency verifies that the account's storage trie matches +// between client and server, and that storage snapshot entries match trie leaves. +func checkStorageConsistency(t testing.TB, clientDB, serverDB state.Database, accHash common.Hash, acc types.StateAccount) { + t.Helper() + if acc.Root == types.EmptyRootHash { + return + } + + snapshotStorageKeysCount := 0 + storageIt := rawdb.IterateStorageSnapshots(clientDB.DiskDB(), accHash) + defer storageIt.Release() + for storageIt.Next() { + snapshotStorageKeysCount++ + } + + storageTrieLeavesCount := 0 + assertStorageLeaf := func(key, val []byte) error { + storageTrieLeavesCount++ + snapshotVal := rawdb.ReadStorageSnapshot(clientDB.DiskDB(), accHash, common.BytesToHash(key)) + require.Equal(t, val, snapshotVal) + return nil + } + AssertTrieConsistency(t, acc.Root, serverDB.TrieDB(), clientDB.TrieDB(), assertStorageLeaf) + + require.Equal(t, storageTrieLeavesCount, snapshotStorageKeysCount) +} + // FillAccounts adds [numAccounts] randomly generated accounts to the secure trie at [root] and commits it to [trieDB]. // [onAccount] is called if non-nil so the caller can modify the account before it is stored in the trie. // If the trie in the callback is used (i.e. tr.Hash() doesn't return the empty root), the account's storage root will be updated to match. diff --git a/graft/evm/sync/types/BUILD.bazel b/graft/evm/sync/types/BUILD.bazel index 1e6c01fc0dda..984e74621f6c 100644 --- a/graft/evm/sync/types/BUILD.bazel +++ b/graft/evm/sync/types/BUILD.bazel @@ -8,5 +8,6 @@ go_library( deps = [ "//database/versiondb", "//graft/evm/message", + "@com_github_ava_labs_libevm//common", ], ) diff --git a/graft/evm/sync/types/dynamic_syncer.go b/graft/evm/sync/types/dynamic_syncer.go new file mode 100644 index 000000000000..ac9977dd2b47 --- /dev/null +++ b/graft/evm/sync/types/dynamic_syncer.go @@ -0,0 +1,143 @@ +// Copyright (C) 2019, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package types + +import ( + "context" + "errors" + "sync" + + "github.com/ava-labs/libevm/common" + + "github.com/ava-labs/avalanchego/graft/evm/message" +) + +var ( + _ Syncer = (*DynamicSyncer)(nil) + _ Finalizer = (*DynamicSyncer)(nil) + + errPivotRequested = errors.New("pivot requested") +) + +// DynamicSyncer implements the session-restart pattern shared by all dynamic +// syncers. It owns target tracking, session cancellation, and the pivot loop, +// delegating session-specific behavior to the PivotSession. +type DynamicSyncer struct { + session PivotSession + name string + id string + + mu sync.Mutex + desiredRoot common.Hash + desiredHeight uint64 + sessionCancel context.CancelCauseFunc +} + +func NewDynamicSyncer( + name, id string, + session PivotSession, + initialRoot common.Hash, + initialHeight uint64, +) *DynamicSyncer { + return &DynamicSyncer{ + session: session, + name: name, + id: id, + desiredRoot: initialRoot, + desiredHeight: initialHeight, + } +} + +func (d *DynamicSyncer) Name() string { return d.name } +func (d *DynamicSyncer) ID() string { return d.id } + +// Sync runs the session-restart loop. Each iteration delegates to the +// PivotSession. When UpdateTarget triggers a pivot, the current session is +// cancelled and rebuilt for the new target. +func (d *DynamicSyncer) Sync(ctx context.Context) error { + for { + sessionCtx, sessionCancel := context.WithCancelCause(ctx) + d.setSessionCancel(sessionCancel) + + err := d.session.Run(sessionCtx) + + d.setSessionCancel(nil) + sessionCancel(nil) + + if err == nil { + return d.session.OnSessionComplete() + } + if !errors.Is(context.Cause(sessionCtx), errPivotRequested) { + return err + } + + newRoot, newHeight := d.getDesiredTarget() + newSession, err := d.session.Rebuild(newRoot, newHeight) + if err != nil { + return err + } + d.session = newSession + } +} + +// UpdateTarget records a newer sync target. If the session's ShouldPivot +// returns true for the new root, the active session is cancelled so the +// loop restarts with a fresh session. Thread-safe and non-blocking. +func (d *DynamicSyncer) UpdateTarget(newTarget message.Syncable) error { + d.mu.Lock() + defer d.mu.Unlock() + + newHeight := newTarget.Height() + if newHeight <= d.desiredHeight { + return nil + } + + newRoot := newTarget.GetBlockRoot() + if !d.session.ShouldPivot(newRoot) { + d.desiredHeight = newHeight + return nil + } + + d.desiredRoot = newRoot + d.desiredHeight = newHeight + + if d.sessionCancel != nil { + d.sessionCancel(errPivotRequested) + } + return nil +} + +// TargetHeight returns the latest desired height. +func (d *DynamicSyncer) TargetHeight() uint64 { + d.mu.Lock() + defer d.mu.Unlock() + return d.desiredHeight +} + +// Finalize delegates to the current session if it implements Finalizer. +func (d *DynamicSyncer) Finalize() error { + if f, ok := d.session.(Finalizer); ok { + return f.Finalize() + } + return nil +} + +// DesiredRoot returns the current desired root (exposed for testing). +func (d *DynamicSyncer) DesiredRoot() common.Hash { + d.mu.Lock() + defer d.mu.Unlock() + return d.desiredRoot +} + +func (d *DynamicSyncer) setSessionCancel(cancel context.CancelCauseFunc) { + d.mu.Lock() + defer d.mu.Unlock() + d.sessionCancel = cancel +} + +func (d *DynamicSyncer) getDesiredTarget() (common.Hash, uint64) { + d.mu.Lock() + defer d.mu.Unlock() + return d.desiredRoot, d.desiredHeight +} diff --git a/graft/evm/sync/types/types.go b/graft/evm/sync/types/types.go index e0680bb973ad..629d789c21a8 100644 --- a/graft/evm/sync/types/types.go +++ b/graft/evm/sync/types/types.go @@ -6,49 +6,62 @@ package types import ( "context" + "github.com/ava-labs/libevm/common" + "github.com/ava-labs/avalanchego/database/versiondb" "github.com/ava-labs/avalanchego/graft/evm/message" ) // Syncer is the common interface for all sync operations. -// This provides a unified interface for atomic state sync and state trie sync. type Syncer interface { - // Sync completes the full sync operation, returning any errors encountered. - // The sync will respect context cancellation. + // Sync runs the full sync, respecting context cancellation. Sync(ctx context.Context) error - - // Name returns a human-readable name for this syncer implementation. + // UpdateTarget updates the sync target mid-sync. Static syncers may no-op. + UpdateTarget(newTarget message.Syncable) error + // Name returns a human-readable name for logging. Name() string - - // ID returns a stable, machine-oriented identifier (e.g., "state_block_sync", "state_code_sync", - // "state_evm_state_sync", "state_atomic_sync"). Implementations should ensure this is unique and - // stable across renames for logging/metrics/deduplication. + // ID returns a stable machine-oriented identifier for metrics and dedup. ID() string } -// Finalizer provides a mechanism to perform cleanup operations after a sync operation. -// This is useful for handling inflight requests, flushing to disk, or other cleanup tasks. +// Finalizer flushes in-progress work (inflight requests, disk writes, etc.). type Finalizer interface { - // Finalize performs any necessary cleanup operations. Finalize() error } -// LeafClient is the interface for fetching leaves from the network. -// This is defined here to avoid circular dependencies with the leaf package. +// PivotSession represents one sync session inside a DynamicSyncer. When the +// target changes, the current session is cancelled and Rebuild creates a +// fresh session for the new target. +type PivotSession interface { + // Run syncs to completion or until ctx is cancelled. + Run(ctx context.Context) error + // Rebuild cleans up the current session and returns a new one for the + // given root and height. + Rebuild(newRoot common.Hash, newHeight uint64) (PivotSession, error) + // ShouldPivot reports whether newRoot requires restarting. Returning + // false lets the loop bump the height without restarting. + ShouldPivot(newRoot common.Hash) bool + // OnSessionComplete is called once when sync finishes successfully. + OnSessionComplete() error +} + +// CodeRequestQueue enqueues code hashes for the code syncer to fetch. +type CodeRequestQueue interface { + AddCode(context.Context, []common.Hash) error + // Finalize closes the queue, signalling the code syncer to exit. + Finalize() error +} + +// LeafClient fetches leaves from the network. Responses include verified +// range proofs. Defined here to avoid circular deps with the leaf package. type LeafClient interface { - // GetLeafs synchronously sends the given request, returning a parsed LeafsResponse or error. - // Note: this verifies the response including the range proofs. GetLeafs(ctx context.Context, request message.LeafsRequest) (message.LeafsResponse, error) } -// Extender is an interface that allows for extending the state sync process. +// Extender hooks into the state sync lifecycle for VM-specific work +// (e.g., atomic trie sync in coreth). type Extender interface { - // CreateSyncer creates a syncer instance for the given client, database, and summary. CreateSyncer(client LeafClient, verDB *versiondb.Database, summary message.Syncable) (Syncer, error) - - // OnFinishBeforeCommit is called before committing the sync results. OnFinishBeforeCommit(lastAcceptedHeight uint64, summary message.Syncable) error - - // OnFinishAfterCommit is called after committing the sync results. OnFinishAfterCommit(summaryHeight uint64) error } diff --git a/graft/subnet-evm/core/blockchain.go b/graft/subnet-evm/core/blockchain.go index 3253fbfbfe71..c665b0dd8a58 100644 --- a/graft/subnet-evm/core/blockchain.go +++ b/graft/subnet-evm/core/blockchain.go @@ -2202,6 +2202,12 @@ func (bc *BlockChain) ResetToStateSyncedBlock(block *types.Block) error { bc.chainmu.Lock() defer bc.chainmu.Unlock() + // Persist the block to the raw DB. During dynamic sync the commit-target + // block may not have been fetched by the block syncer if the target + // advanced beyond its initial fetch window. + rawdb.WriteBlock(bc.db, block) + rawdb.WriteCanonicalHash(bc.db, block.Hash(), block.NumberU64()) + // Update head block and snapshot pointers on disk batch := bc.db.NewBatch() if err := bc.batchBlockAcceptedIndices(batch, block); err != nil { diff --git a/graft/subnet-evm/plugin/evm/config/config.go b/graft/subnet-evm/plugin/evm/config/config.go index 2ddfbdc27ba8..cbebe0622140 100644 --- a/graft/subnet-evm/plugin/evm/config/config.go +++ b/graft/subnet-evm/plugin/evm/config/config.go @@ -130,7 +130,9 @@ type Config struct { // Sync settings StateSyncEnabled bool `json:"state-sync-enabled"` - StateSyncSkipResume bool `json:"state-sync-skip-resume"` // Forces state sync to use the highest available summary block + StateSyncSkipResume bool `json:"state-sync-skip-resume"` // Forces state sync to use the highest available summary block + StateSyncDynamicEnabled bool `json:"state-sync-dynamic-enabled"` // Enables dynamic state sync orchestration. + StateSyncPivotInterval uint64 `json:"state-sync-pivot-interval"` // Block interval for forwarding dynamic sync target updates. StateSyncServerTrieCache int `json:"state-sync-server-trie-cache"` StateSyncIDs string `json:"state-sync-ids"` StateSyncCommitInterval uint64 `json:"state-sync-commit-interval"` diff --git a/graft/subnet-evm/plugin/evm/config/config.md b/graft/subnet-evm/plugin/evm/config/config.md index 1bd4dd6ec54e..856a602190b0 100644 --- a/graft/subnet-evm/plugin/evm/config/config.md +++ b/graft/subnet-evm/plugin/evm/config/config.md @@ -233,6 +233,8 @@ Configuration is provided as a JSON object. All fields are optional unless other |--------|------|-------------|---------| | `state-sync-enabled` | bool | Enable state sync | `false` | | `state-sync-skip-resume` | bool | Force state sync to use highest available summary block | `false` | +| `state-sync-dynamic-enabled` | bool | Enable dynamic state sync orchestration (deferred block operations + pivot updates) | `false` | +| `state-sync-pivot-interval` | uint64 | Number of blocks between dynamic sync target updates (used when dynamic sync is enabled) | `10000` | | `state-sync-ids` | string | Comma-separated list of state sync IDs | - | | `state-sync-commit-interval` | uint64 | Commit interval for state sync (blocks) | `16384` | | `state-sync-min-blocks` | uint64 | Minimum blocks ahead required for state sync | `300000` | diff --git a/graft/subnet-evm/plugin/evm/config/default_config.go b/graft/subnet-evm/plugin/evm/config/default_config.go index f0b69f5f176c..24471328033e 100644 --- a/graft/subnet-evm/plugin/evm/config/default_config.go +++ b/graft/subnet-evm/plugin/evm/config/default_config.go @@ -11,7 +11,10 @@ import ( "github.com/ava-labs/avalanchego/database/pebbledb" ) -const defaultCommitInterval = 4096 +const ( + defaultCommitInterval = 4096 + defaultStateSyncPivotInterval = 10_000 +) func NewDefaultConfig() Config { return Config{ @@ -77,7 +80,11 @@ func NewDefaultConfig() Config { StateSyncMinBlocks: 300_000, // the number of key/values to ask peers for per request StateSyncRequestSize: 1024, - StateHistory: uint64(32), + // Dynamic state sync is opt-in. + StateSyncDynamicEnabled: false, + // Number of blocks between dynamic sync target updates. + StateSyncPivotInterval: defaultStateSyncPivotInterval, + StateHistory: uint64(32), // Estimated block count in 24 hours with 2s block accept period HistoricalProofQueryWindow: uint64(24 * time.Hour / (2 * time.Second)), // Mempool settings diff --git a/graft/subnet-evm/plugin/evm/syncervm_test.go b/graft/subnet-evm/plugin/evm/syncervm_test.go index e532b08b0b59..33c07985a612 100644 --- a/graft/subnet-evm/plugin/evm/syncervm_test.go +++ b/graft/subnet-evm/plugin/evm/syncervm_test.go @@ -65,17 +65,34 @@ func TestSkipStateSync(t *testing.T) { } } -func TestStateSyncFromScratch(t *testing.T) { - test := syncTest{ - syncableInterval: 256, - stateSyncMinBlocks: 50, // must be less than [syncableInterval] to perform sync - syncMode: block.StateSyncStatic, +func TestStateSyncFromScratchModes(t *testing.T) { + tests := []struct { + name string + syncMode block.StateSyncMode + dynamicStateSyncEnabled bool + stateSyncPivotInterval uint64 + }{ + {"static", block.StateSyncStatic, false, 0}, + {"dynamic", block.StateSyncDynamic, true, 1}, } - for _, scheme := range schemes { - t.Run(scheme, func(t *testing.T) { - test.stateScheme = scheme - vmSetup := createSyncServerAndClientVMs(t, test, engine.BlocksToFetch) - testSyncerVM(t, vmSetup, test) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + rand.Seed(1) + test := syncTest{ + syncableInterval: 256, + stateSyncMinBlocks: 50, // must be less than [syncableInterval] to perform sync + syncMode: test.syncMode, + dynamicStateSyncEnabled: test.dynamicStateSyncEnabled, + stateSyncPivotInterval: test.stateSyncPivotInterval, + } + for _, scheme := range schemes { + t.Run(scheme, func(t *testing.T) { + test.stateScheme = scheme + vmSetup := createSyncServerAndClientVMs(t, test, engine.BlocksToFetch) + testSyncerVM(t, vmSetup, test) + }) + } }) } } @@ -101,8 +118,6 @@ func TestStateSyncToggleEnabledToDisabled(t *testing.T) { // TODO(#4702): flaky test - should be fixed with state sync refactor t.Skip("Flaky test - tracked in #4702") - rand.New(rand.NewSource(1)) - var lock sync.Mutex reqCount := 0 test := syncTest{ @@ -122,7 +137,7 @@ func TestStateSyncToggleEnabledToDisabled(t *testing.T) { require.NoError(t, syncerVM.AppResponse(t.Context(), nodeID, requestID, response)) } }, - expectedErr: context.Canceled, + wantErr: context.Canceled, } vmSetup := createSyncServerAndClientVMs(t, test, engine.BlocksToFetch) @@ -131,7 +146,7 @@ func TestStateSyncToggleEnabledToDisabled(t *testing.T) { test.syncMode = block.StateSyncStatic test.responseIntercept = nil - test.expectedErr = nil + test.wantErr = nil var atomicErr utils.Atomic[error] syncDisabledVM := &VM{} @@ -262,13 +277,128 @@ func TestVMShutdownWhileSyncing(t *testing.T) { require.NoError(t, syncerVM.AppResponse(t.Context(), nodeID, requestID, response)) } }, - expectedErr: context.Canceled, + wantErr: context.Canceled, } vmSetup = createSyncServerAndClientVMs(t, test, engine.BlocksToFetch) // Perform sync resulting in early termination. testSyncerVM(t, vmSetup, test) } +// TestDynamicSyncWithBlockInjection verifies that blocks injected during +// dynamic state sync trigger coordinator pivots and that blocks above the +// commit target are batch-replayed after sync completes. +func TestDynamicSyncWithBlockInjection(t *testing.T) { + const ( + syncableInterval = 256 + extraBlockCount = 12 + ) + + for _, scheme := range schemes { + t.Run(scheme, func(t *testing.T) { + var extraBlockBytes [][]byte + var ( + mu sync.Mutex + injected bool + ) + + txGenFn := func(vm *VM) func(int, *core.BlockGen) { + return func(_ int, gen *core.BlockGen) { + br := predicate.BlockResults{} + b, err := br.Bytes() + require.NoError(t, err) + gen.AppendExtra(b) + + tx := types.NewTransaction(gen.TxNonce(testEthAddrs[0]), testEthAddrs[1], common.Big1, ethparams.TxGas, big.NewInt(testMinGasPrice), nil) + signedTx, err := types.SignTx(tx, types.NewEIP155Signer(vm.chainConfig.ChainID), testKeys[0].ToECDSA()) + require.NoError(t, err) + gen.AddTx(signedTx) + } + } + + // Server VM. + serverConfigJSON := fmt.Sprintf(`"commit-interval": %d, "state-sync-commit-interval": %d, "state-history": %d`, + syncableInterval, syncableInterval, syncableInterval, + ) + serverVM := newVM(t, testVMConfig{ + genesisJSON: toGenesisJSON(paramstest.ForkToChainConfig[upgradetest.Latest]), + configJSON: getConfig(scheme, serverConfigJSON), + }) + t.Cleanup(func() { require.NoError(t, serverVM.vm.Shutdown(t.Context())) }) + + generateAndAcceptBlocks(t, serverVM.vm, syncableInterval, txGenFn(serverVM.vm), nil) + + generateAndAcceptBlocks(t, serverVM.vm, extraBlockCount, txGenFn(serverVM.vm), + func(blk *types.Block) { + b, err := rlp.EncodeToBytes(blk) + require.NoError(t, err) + extraBlockBytes = append(extraBlockBytes, b) + }, + ) + serverHeight := serverVM.vm.LastAcceptedBlockInternal().Height() + + // Syncer VM. + syncerConfigJSON := fmt.Sprintf( + `"state-sync-enabled":true, "state-sync-min-blocks": %d, "tx-lookup-limit": %d, "state-sync-commit-interval": %d, "state-sync-dynamic-enabled": true, "state-sync-pivot-interval": %d`, + 50, 4, syncableInterval, 1000, + ) + syncerVM := newVM(t, testVMConfig{ + genesisJSON: toGenesisJSON(paramstest.ForkToChainConfig[upgradetest.Latest]), + configJSON: getConfig(scheme, syncerConfigJSON), + isSyncing: true, + }) + t.Cleanup(func() { require.NoError(t, syncerVM.vm.Shutdown(t.Context())) }) + + require.NoError(t, syncerVM.vm.SetState(t.Context(), snow.StateSyncing)) + + deadline, _ := t.Deadline() + serverVM.appSender.SendAppResponseF = func(ctx context.Context, nodeID ids.NodeID, requestID uint32, response []byte) error { + go func() { + mu.Lock() + if !injected { + injected = true + for _, blkBytes := range extraBlockBytes { + blk, err := syncerVM.vm.ParseBlock(t.Context(), blkBytes) + require.NoError(t, err) + require.NoError(t, blk.Verify(t.Context())) + require.NoError(t, blk.Accept(t.Context())) + } + } + mu.Unlock() + require.NoError(t, syncerVM.vm.AppResponse(ctx, nodeID, requestID, response)) + }() + return nil + } + require.NoError(t, syncerVM.vm.Connected(t.Context(), serverVM.vm.ctx.NodeID, client.StateSyncVersion)) + syncerVM.appSender.SendAppRequestF = func(ctx context.Context, nodeSet set.Set[ids.NodeID], requestID uint32, request []byte) error { + nodeID, hasItem := nodeSet.Pop() + require.True(t, hasItem) + require.NoError(t, serverVM.vm.AppRequest(ctx, nodeID, requestID, deadline, request)) + return nil + } + + summary, err := serverVM.vm.GetLastStateSummary(t.Context()) + require.NoError(t, err) + parsedSummary, err := syncerVM.vm.ParseStateSummary(t.Context(), summary.Bytes()) + require.NoError(t, err) + + syncMode, err := parsedSummary.Accept(t.Context()) + require.NoError(t, err) + require.Equal(t, block.StateSyncDynamic, syncMode) + + msg, err := syncerVM.vm.WaitForEvent(t.Context()) + require.NoError(t, err) + require.Equal(t, commonEng.StateSyncDone, msg) + require.NoError(t, syncerVM.vm.Client.Error()) + + require.NoError(t, syncerVM.vm.SetState(t.Context(), snow.Bootstrapping)) + require.Equal(t, serverHeight, syncerVM.vm.blockChain.LastAcceptedBlock().NumberU64(), "syncer height mismatch after block injection") + require.True(t, syncerVM.vm.blockChain.HasState(syncerVM.vm.blockChain.LastAcceptedBlock().Root()), "state unavailable for last accepted block") + + generateAndAcceptBlocks(t, syncerVM.vm, 5, txGenFn(syncerVM.vm), nil) + }) + } +} + func createSyncServerAndClientVMs(t *testing.T, test syncTest, numBlocks int) *syncVMSetup { require := require.New(t) // configure [serverVM] @@ -332,8 +462,8 @@ func createSyncServerAndClientVMs(t *testing.T, test syncTest, numBlocks int) *s // initialise [syncerVM] with blank genesis state // Match the server's state-sync-commit-interval so parsed summaries are acceptable. stateSyncEnabledJSON := fmt.Sprintf( - `"state-sync-enabled":true, "state-sync-min-blocks": %d, "tx-lookup-limit": %d, "state-sync-commit-interval": %d`, - test.stateSyncMinBlocks, 4, test.syncableInterval, + `"state-sync-enabled":true, "state-sync-min-blocks": %d, "tx-lookup-limit": %d, "state-sync-commit-interval": %d, "state-sync-dynamic-enabled": %t, "state-sync-pivot-interval": %d`, + test.stateSyncMinBlocks, 4, test.syncableInterval, test.dynamicStateSyncEnabled, test.stateSyncPivotInterval, ) syncerVM := newVM(t, testVMConfig{ genesisJSON: toGenesisJSON(paramstest.ForkToChainConfig[upgradetest.Latest]), @@ -422,12 +552,14 @@ func (vm *shutdownOnceVM) Shutdown(ctx context.Context) error { // syncTest contains both the actual VMs as well as the parameters with the expected output. type syncTest struct { - responseIntercept func(vm *VM, nodeID ids.NodeID, requestID uint32, response []byte) - stateSyncMinBlocks uint64 - syncableInterval uint64 - syncMode block.StateSyncMode - stateScheme string - expectedErr error + responseIntercept func(vm *VM, nodeID ids.NodeID, requestID uint32, response []byte) + stateSyncMinBlocks uint64 + syncableInterval uint64 + syncMode block.StateSyncMode + dynamicStateSyncEnabled bool + stateSyncPivotInterval uint64 + stateScheme string + wantErr error } func testSyncerVM(t *testing.T, vmSetup *syncVMSetup, test syncTest) { @@ -460,8 +592,8 @@ func testSyncerVM(t *testing.T, vmSetup *syncVMSetup, test syncTest) { // If the test is expected to error, assert the correct error is returned and finish the test. err = syncerVM.Client.Error() - if test.expectedErr != nil { - require.ErrorIs(err, test.expectedErr) + if test.wantErr != nil { + require.ErrorIs(err, test.wantErr) // Note we re-open the database here to avoid a closed error when the test is for a shutdown VM. chaindb := database.New(prefixdb.NewNested(ethDBPrefix, syncerVM.versiondb)) requireSyncPerformedHeight(t, chaindb, 0) @@ -475,8 +607,8 @@ func testSyncerVM(t *testing.T, vmSetup *syncVMSetup, test syncTest) { require.Equal(serverVM.LastAcceptedBlock().Height(), syncerVM.LastAcceptedBlock().Height(), "block height mismatch between syncer and server") require.Equal(serverVM.LastAcceptedBlock().ID(), syncerVM.LastAcceptedBlock().ID(), "blockID mismatch between syncer and server") require.True(syncerVM.blockChain.HasState(syncerVM.blockChain.LastAcceptedBlock().Root()), "unavailable state for last accepted block") - expectedHeight := retrievedSummary.Height() - requireSyncPerformedHeight(t, syncerVM.chaindb, expectedHeight) + wantHeight := retrievedSummary.Height() + requireSyncPerformedHeight(t, syncerVM.chaindb, wantHeight) lastNumber := syncerVM.blockChain.LastAcceptedBlock().NumberU64() // check the last block is indexed @@ -623,9 +755,9 @@ func generateAndAcceptBlocks(t *testing.T, vm *VM, numBlocks int, gen func(int, // requireSyncPerformedHeight iterates over all heights the VM has synced to and // verifies it matches [expected]. -func requireSyncPerformedHeight(t *testing.T, db ethdb.Iteratee, expected uint64) { +func requireSyncPerformedHeight(t *testing.T, db ethdb.Iteratee, want uint64) { t.Helper() latest, err := customrawdb.GetLatestSyncPerformed(db) require.NoError(t, err) - require.Equal(t, expected, latest, "sync performed height mismatch") + require.Equal(t, want, latest, "sync performed height mismatch") } diff --git a/graft/subnet-evm/plugin/evm/vm.go b/graft/subnet-evm/plugin/evm/vm.go index 0f454c5012e2..6d3ce4224cdd 100644 --- a/graft/subnet-evm/plugin/evm/vm.go +++ b/graft/subnet-evm/plugin/evm/vm.go @@ -724,18 +724,20 @@ func (vm *VM) initializeStateSync(lastAcceptedHeight uint64) error { BlockParser: vm, }, ), - Enabled: vm.config.StateSyncEnabled, - SkipResume: vm.config.StateSyncSkipResume, - MinBlocks: vm.config.StateSyncMinBlocks, - RequestSize: vm.config.StateSyncRequestSize, - LastAcceptedHeight: lastAcceptedHeight, // TODO clean up how this is passed around - ChainDB: vm.chaindb, - VerDB: vm.versiondb, - MetadataDB: vm.metadataDB, - Acceptor: vm, - SyncSummaryProvider: vm.extensionConfig.SyncSummaryProvider, - Extender: nil, - LeafsRequestType: message.SubnetEVMLeafsRequestType, + Enabled: vm.config.StateSyncEnabled, + SkipResume: vm.config.StateSyncSkipResume, + DynamicStateSyncEnabled: vm.config.StateSyncDynamicEnabled, + PivotInterval: vm.config.StateSyncPivotInterval, + MinBlocks: vm.config.StateSyncMinBlocks, + RequestSize: vm.config.StateSyncRequestSize, + LastAcceptedHeight: lastAcceptedHeight, // TODO clean up how this is passed around + ChainDB: vm.chaindb, + VerDB: vm.versiondb, + MetadataDB: vm.metadataDB, + Acceptor: vm, + SyncSummaryProvider: vm.extensionConfig.SyncSummaryProvider, + Extender: nil, + LeafsRequestType: message.SubnetEVMLeafsRequestType, }) // If StateSync is disabled, clear any ongoing summary so that we will not attempt to resume diff --git a/graft/subnet-evm/plugin/evm/wrapped_block.go b/graft/subnet-evm/plugin/evm/wrapped_block.go index e793a407d4bc..33ee4d525acd 100644 --- a/graft/subnet-evm/plugin/evm/wrapped_block.go +++ b/graft/subnet-evm/plugin/evm/wrapped_block.go @@ -83,6 +83,18 @@ func (b *wrappedBlock) ID() ids.ID { return b.id } // Accept implements the snowman.Block interface func (b *wrappedBlock) Accept(context.Context) error { + // Notify sync client that engine accepted a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineAccept(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block accept: %w", err) + } + if deferred { + return nil + } + } + vm := b.vm // Although returning an error from Accept is considered fatal, it is good @@ -149,6 +161,18 @@ func (b *wrappedBlock) handlePrecompileAccept(rules extras.Rules) error { // Reject implements the snowman.Block interface func (b *wrappedBlock) Reject(context.Context) error { + // Notify sync client that engine rejected a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineReject(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block reject: %w", err) + } + if deferred { + return nil + } + } + blkID := b.ID() log.Debug("rejecting block", "hash", blkID.Hex(), @@ -171,6 +195,18 @@ func (b *wrappedBlock) Timestamp() time.Time { return time.Unix(int64(b.ethBlock // Verify implements the snowman.Block interface func (b *wrappedBlock) Verify(context.Context) error { + // Notify sync client that engine verified a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineVerify(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block verify: %w", err) + } + if deferred { + return nil + } + } + return b.verify(&precompileconfig.PredicateContext{ SnowCtx: b.vm.ctx, ProposerVMBlockCtx: nil, @@ -203,6 +239,18 @@ func (b *wrappedBlock) ShouldVerifyWithContext(context.Context) (bool, error) { // VerifyWithContext implements the block.WithVerifyContext interface func (b *wrappedBlock) VerifyWithContext(_ context.Context, proposerVMBlockCtx *block.Context) error { + // Notify sync client that engine verified a block. + // If the block was enqueued for deferred processing, skip immediate execution. + if client := b.vm.SyncerClient(); client != nil { + deferred, err := client.OnEngineVerify(b) + if err != nil { + return fmt.Errorf("could not notify sync client of block verify: %w", err) + } + if deferred { + return nil + } + } + return b.verify(&precompileconfig.PredicateContext{ SnowCtx: b.vm.ctx, ProposerVMBlockCtx: proposerVMBlockCtx, diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 4e5bcfbf1fea..080500c5f405 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -736,8 +736,15 @@ func (b *Bootstrapper) tryStartExecuting(ctx context.Context) error { // If the subnet hasn't finished bootstrapping, this chain should remain // syncing. - if !b.Config.BootstrapTracker.IsBootstrapped() { - log("waiting for the remaining chains in this subnet to finish syncing") + // + // During dynamic state sync the inner VM is still at genesis and + // cannot handle NormalOp. Wait for sync to complete. + if !b.Config.BootstrapTracker.IsBootstrapped() || b.Ctx.StateSyncing.Get() { + if b.Ctx.StateSyncing.Get() { + log("waiting for dynamic state sync to complete") + } else { + log("waiting for the remaining chains in this subnet to finish syncing") + } // Restart bootstrapping after [bootstrappingDelay] to keep up to date // on the latest tip. b.awaitingTimeout = true @@ -765,7 +772,7 @@ func (b *Bootstrapper) Timeout() error { } b.awaitingTimeout = false - if !b.Config.BootstrapTracker.IsBootstrapped() { + if !b.Config.BootstrapTracker.IsBootstrapped() || b.Ctx.StateSyncing.Get() { return b.restartBootstrapping(context.TODO()) } return b.onFinished(context.TODO(), b.requestID) @@ -779,7 +786,7 @@ func (b *Bootstrapper) restartBootstrapping(ctx context.Context) error { return b.startBootstrapping(ctx) } -func (b *Bootstrapper) Notify(_ context.Context, msg common.Message) error { +func (b *Bootstrapper) Notify(ctx context.Context, msg common.Message) error { if msg != common.StateSyncDone { b.Ctx.Log.Info("received an unexpected message from the VM", zap.Stringer("msg", msg), @@ -788,6 +795,15 @@ func (b *Bootstrapper) Notify(_ context.Context, msg common.Message) error { } b.Ctx.StateSyncing.Set(false) + + // Re-issue SetState to refresh the rpcchainvm client-side chain.State + // cache, which still holds the pre-sync last accepted block. Without + // this, the bootstrapper would verify pre-sync blocks against post-sync + // state. + if err := b.VM.SetState(ctx, snow.Bootstrapping); err != nil { + return fmt.Errorf("failed to refresh VM state after state sync: %w", err) + } + return nil } diff --git a/vms/proposervm/vm.go b/vms/proposervm/vm.go index f1653ab25aae..6ceec155bd59 100644 --- a/vms/proposervm/vm.go +++ b/vms/proposervm/vm.go @@ -184,7 +184,7 @@ func (vm *VM) Initialize( return err } - if err := vm.repairAcceptedChainByHeight(ctx); err != nil { + if err := vm.repairAcceptedChainByHeight(ctx, false); err != nil { return fmt.Errorf("failed to repair accepted chain by height: %w", err) } @@ -330,8 +330,9 @@ func (vm *VM) SetState(ctx context.Context, newState snow.State) error { // When finishing StateSyncing, if state sync has failed or was skipped, // repairAcceptedChainByHeight rolls back the chain to the previously last // accepted block. If state sync has completed successfully, this call is a - // no-op. - if err := vm.repairAcceptedChainByHeight(ctx); err != nil { + // no-op. During dynamic state sync, skip the rollback so the consensus + // engine can start at the sync target height. + if err := vm.repairAcceptedChainByHeight(ctx, true); err != nil { return fmt.Errorf("failed to repair accepted chain height: %w", err) } return vm.setLastAcceptedMetadata(ctx) @@ -607,7 +608,7 @@ func (vm *VM) LastAccepted(ctx context.Context) (ids.ID, error) { return lastAccepted, err } -func (vm *VM) repairAcceptedChainByHeight(ctx context.Context) error { +func (vm *VM) repairAcceptedChainByHeight(ctx context.Context, afterStateSyncing bool) error { innerLastAcceptedID, err := vm.ChainVM.LastAccepted(ctx) if err != nil { return fmt.Errorf("failed to get inner last accepted: %w", err) @@ -640,6 +641,17 @@ func (vm *VM) repairAcceptedChainByHeight(ctx context.Context) error { return nil } + // During dynamic state sync the inner VM is still at genesis. + // Preserve the proposervm height so the bootstrapper gets the + // correct lastAccepted. Not applied during Initialize (crash + // recovery) where height 0 means a genuine rollback. + if afterStateSyncing && innerLastAcceptedHeight == 0 { + vm.ctx.Log.Info("preserving proposervm height during dynamic state sync", + zap.Uint64("outerHeight", proLastAcceptedHeight), + ) + return nil + } + vm.ctx.Log.Info("repairing accepted chain by height", zap.Uint64("outerHeight", proLastAcceptedHeight), zap.Uint64("innerHeight", innerLastAcceptedHeight),