From 54873ea3757d3203fdcc3643966358fc95ead810 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 17 Jun 2020 19:31:37 -0400 Subject: [PATCH 01/16] network bridge skeleton --- Cargo.lock | 17 ++++++++ Cargo.toml | 2 + node/network/bridge/Cargo.toml | 18 ++++++++ node/network/bridge/src/lib.rs | 76 ++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 node/network/bridge/Cargo.toml create mode 100644 node/network/bridge/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 6e944dfa9351..8e47953cab75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4252,6 +4252,23 @@ dependencies = [ "wasm-timer", ] +[[package]] +name = "polkadot-network-bridge" +version = "0.1.0" +dependencies = [ + "futures 0.3.5", + "futures-timer 3.0.2", + "log 0.4.8", + "parity-scale-codec", + "polkadot-node-messages", + "polkadot-node-primitives", + "polkadot-overseer", + "polkadot-primitives", + "sc-network", + "sp-runtime", + "streamunordered", +] + [[package]] name = "polkadot-network-test" version = "0.8.10" diff --git a/Cargo.toml b/Cargo.toml index 8a43ec6ec032..254a8749c3f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,8 @@ members = [ "node/primitives", "node/service", + "node/network/bridge", + "parachain/test-parachains", "parachain/test-parachains/adder", "parachain/test-parachains/adder/collator", diff --git a/node/network/bridge/Cargo.toml b/node/network/bridge/Cargo.toml new file mode 100644 index 000000000000..bb502aa8b2e5 --- /dev/null +++ b/node/network/bridge/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "polkadot-network-bridge" +version = "0.1.0" +authors = ["Parity Technologies "] +edition = "2018" + +[dependencies] +futures = "0.3.5" +log = "0.4.8" +futures-timer = "3.0.2" +streamunordered = "0.5.1" +polkadot-primitives = { path = "../../../primitives" } +node-primitives = { package = "polkadot-node-primitives", path = "../../primitives" } +messages = { package = "polkadot-node-messages", path = "../../messages" } +parity-scale-codec = "1.3.0" +overseer = { package = "polkadot-overseer", path = "../../overseer" } +sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs new file mode 100644 index 000000000000..49a4f0e49f89 --- /dev/null +++ b/node/network/bridge/src/lib.rs @@ -0,0 +1,76 @@ +// Copyright 2020 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! The Network Bridge Subsystem - protocol multiplexer for Polkadot. + +use parity_scale_codec::{Encode, Decode}; + +use sc_network::{ + ObservedRole, ReputationChange, PeerId, config::ProtocolId as SubstrateProtocolId, +}; +use sp_runtime::ConsensusEngineId; + +use messages::{NetworkBridgeEvent, NetworkBridgeMessage}; +use overseer::{Subsystem, SubsystemContext, SpawnedSubsystem}; +use node_primitives::{ProtocolId, View}; +use polkadot_primitives::{Block, Hash}; + +use std::collections::HashMap; +use std::sync::Arc; + +const MAX_VIEW_HEADS: usize = 5; + +/// The engine ID of the polkadot network protocol. +pub const POLKADOT_ENGINE_ID: ConsensusEngineId = *b"dot2"; +/// The protocol name. +pub const POLKADOT_PROTOCOL_NAME: &[u8] = b"/polkadot/2"; + +/// Messages received on the network. +#[derive(Encode, Decode)] +pub enum Message { + /// A message from a peer on a specific protocol. + #[codec(index = "1")] + ProtocolMessage(ProtocolId, Vec), + /// A view update from a peer. + #[codec(index = "2")] + ViewUpdate(View), +} + +/// The network bridge subsystem. +pub struct NetworkBridge(Arc>); + +impl NetworkBridge { + /// Create a new network bridge subsystem with underlying network service. + pub fn new(net_service: Arc>) -> Self { + NetworkBridge(net_service) + } +} + +impl Subsystem for NetworkBridge { + fn start(&mut self, ctx: SubsystemContext) -> SpawnedSubsystem { + unimplemented!(); + // TODO [now]: Spawn substrate-network notifications protocol & event stream. + } +} + +struct PeerData { + /// Latest view sent by the peer. + view: View, +} + +struct ProtocolHandler { + peers: HashMap, +} From 5eea3bfe3deb43fc955062bb57bd40cb0c3da8fa Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 17 Jun 2020 19:31:46 -0400 Subject: [PATCH 02/16] move some primitives around and add debug impls --- node/messages/src/lib.rs | 21 +++++++++++++-------- node/primitives/src/lib.rs | 10 ++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/node/messages/src/lib.rs b/node/messages/src/lib.rs index 4d6da67d84a2..9f853c6a68a1 100644 --- a/node/messages/src/lib.rs +++ b/node/messages/src/lib.rs @@ -24,14 +24,14 @@ use futures::channel::{mpsc, oneshot}; -use sc_network::{ObservedRole, ReputationChange, PeerId, config::ProtocolId}; +use sc_network::{ObservedRole, ReputationChange, PeerId}; use polkadot_primitives::{BlockNumber, Hash, Signature}; use polkadot_primitives::parachain::{ AbridgedCandidateReceipt, PoVBlock, ErasureChunk, BackedCandidate, Id as ParaId, SignedAvailabilityBitfield, SigningContext, ValidatorId, ValidationCode, ValidatorIndex, }; use polkadot_node_primitives::{ - MisbehaviorReport, SignedStatement, + MisbehaviorReport, SignedStatement, View, ProtocolId, }; /// Signals sent by an overseer to a subsystem. @@ -90,12 +90,8 @@ pub enum CandidateValidationMessage { ), } -/// Chain heads. -/// -/// Up to `N` (5?) chain heads. -pub struct View(pub Vec); - /// Events from network. +#[derive(Debug)] pub enum NetworkBridgeEvent { /// A peer has connected. PeerConnected(PeerId, ObservedRole), @@ -114,7 +110,8 @@ pub enum NetworkBridgeEvent { } /// Messages received by the network bridge subsystem. -pub enum NetworkBridgeSubsystemMessage { +#[derive(Debug)] +pub enum NetworkBridgeMessage { /// Register an event producer on startup. RegisterEventProducer(ProtocolId, fn(NetworkBridgeEvent) -> AllMessages), @@ -126,6 +123,7 @@ pub enum NetworkBridgeSubsystemMessage { } /// Availability Distribution Message. +#[derive(Debug)] pub enum AvailabilityDistributionMessage { /// Distribute an availability chunk to other validators. DistributeChunk(Hash, ErasureChunk), @@ -138,6 +136,7 @@ pub enum AvailabilityDistributionMessage { } /// Bitfield distribution message. +#[derive(Debug)] pub enum BitfieldDistributionMessage { /// Distribute a bitfield via gossip to other validators. DistributeBitfield(Hash, SignedAvailabilityBitfield), @@ -147,6 +146,7 @@ pub enum BitfieldDistributionMessage { } /// Availability store subsystem message. +#[derive(Debug)] pub enum AvailabilityStoreMessage { /// Query a `PoVBlock` from the AV store. QueryPoV(Hash, oneshot::Sender>), @@ -159,6 +159,7 @@ pub enum AvailabilityStoreMessage { } /// A request to the Runtime API subsystem. +#[derive(Debug)] pub enum RuntimeApiRequest { /// Get the current validator set. Validators(oneshot::Sender>), @@ -171,12 +172,14 @@ pub enum RuntimeApiRequest { } /// A message to the Runtime API subsystem. +#[derive(Debug)] pub enum RuntimeApiMessage { /// Make a request of the runtime API against the post-state of the given relay-parent. Request(Hash, RuntimeApiRequest), } /// Statement distribution message. +#[derive(Debug)] pub enum StatementDistributionMessage { /// We have originated a signed statement in the context of /// given relay-parent hash and it should be distributed to other validators. @@ -184,6 +187,7 @@ pub enum StatementDistributionMessage { } /// This data becomes intrinsics or extrinsics which should be included in a future relay chain block. +#[derive(Debug)] pub enum ProvisionableData { /// This bitfield indicates the availability of various candidate blocks. Bitfield(Hash, SignedAvailabilityBitfield), @@ -198,6 +202,7 @@ pub enum ProvisionableData { /// Message to the Provisioner. /// /// In all cases, the Hash is that of the relay parent. +#[derive(Debug)] pub enum ProvisionerMessage { /// This message allows potential block authors to be kept updated with all new authorship data /// as it becomes available. diff --git a/node/primitives/src/lib.rs b/node/primitives/src/lib.rs index 78a87cda39a9..15e99df0b0e8 100644 --- a/node/primitives/src/lib.rs +++ b/node/primitives/src/lib.rs @@ -92,6 +92,7 @@ impl SignedStatement { } /// A misbehaviour report. +#[derive(Debug)] pub enum MisbehaviorReport { /// These validator nodes disagree on this candidate's validity, please figure it out /// @@ -107,3 +108,12 @@ pub enum MisbehaviorReport { /// This peer has seconded more than one parachain candidate for this relay parent head DoubleVote(CandidateReceipt, SignedStatement, SignedStatement), } + +/// A unique identifier for a network protocol. +pub type ProtocolId = [u8; 4]; + +/// A succinct representation of a peer's view. This consists of a bounded amount of chain heads. +/// +/// Up to `N` (5?) chain heads. +#[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)] +pub struct View(pub Vec); From 27456b580247e457f21e0c77e4f5a756cc50d287 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 17 Jun 2020 19:47:11 -0400 Subject: [PATCH 03/16] protocol registration glue & abstract network interface --- node/network/bridge/src/lib.rs | 48 +++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index 49a4f0e49f89..a2a9ee2d61f3 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -17,9 +17,12 @@ //! The Network Bridge Subsystem - protocol multiplexer for Polkadot. use parity_scale_codec::{Encode, Decode}; +use futures::prelude::*; +use futures::stream::BoxStream; use sc_network::{ ObservedRole, ReputationChange, PeerId, config::ProtocolId as SubstrateProtocolId, + Event as NetworkEvent, }; use sp_runtime::ConsensusEngineId; @@ -49,17 +52,54 @@ pub enum Message { ViewUpdate(View), } +/// Information about the notifications protocol. Should be used during network configuration +/// or shortly after startup to register the protocol with the network service. +pub fn notifications_protocol_info() -> (ConsensusEngineId, std::borrow::Cow<'static, [u8]>) { + (POLKADOT_ENGINE_ID, POLKADOT_PROTOCOL_NAME.into()) +} + +/// An abstraction over networking for the purposes of this subsystem. +pub trait Network: Clone + Send + 'static { + /// Get a stream of all events occurring on the network. This may include events unrelated + /// to the Polkadot protocol - the user of this function should filter only for events related + /// to the [`POLKADOT_ENGINE_ID`](POLKADOT_ENGINE_ID). + fn event_stream(&self) -> BoxStream; + + /// Report a given peer as either beneficial (+) or costly (-) according to the given scalar. + fn report_peer(&self, who: PeerId, cost_benefit: ReputationChange); + + /// Write a notification to a peer on the [`POLKADOT_ENGINE_ID`](POLKADOT_ENGINE_ID) topic. + fn write_notification(&self, who: PeerId, message: Vec); +} + +impl Network for Arc> { + fn event_stream(&self) -> BoxStream { + sc_network::NetworkService::event_stream(self, "polkadot-network-bridge").boxed() + } + + fn report_peer(&self, who: PeerId, cost_benefit: ReputationChange) { + sc_network::NetworkService::report_peer(self, who, cost_benefit) + } + + fn write_notification(&self, who: PeerId, message: Vec) { + sc_network::NetworkService::write_notification(self, who, POLKADOT_ENGINE_ID, message) + } +} + /// The network bridge subsystem. -pub struct NetworkBridge(Arc>); +pub struct NetworkBridge(N); -impl NetworkBridge { +impl NetworkBridge { /// Create a new network bridge subsystem with underlying network service. - pub fn new(net_service: Arc>) -> Self { + /// + /// This assumes that the network service has had the notifications protocol for the network + /// bridge already registered. See [`notifications_protocol_info`](notifications_protocol_info). + pub fn new(net_service: N) -> Self { NetworkBridge(net_service) } } -impl Subsystem for NetworkBridge { +impl Subsystem for NetworkBridge { fn start(&mut self, ctx: SubsystemContext) -> SpawnedSubsystem { unimplemented!(); // TODO [now]: Spawn substrate-network notifications protocol & event stream. From 3bf17fa09c5212d073b0f7678fbe399ffdc4ac68 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 17 Jun 2020 20:49:31 -0400 Subject: [PATCH 04/16] add send_msgs to subsystemctx --- node/overseer/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs index 0d3c9b7b5095..01c5e4685468 100644 --- a/node/overseer/src/lib.rs +++ b/node/overseer/src/lib.rs @@ -65,7 +65,7 @@ use futures::channel::{mpsc, oneshot}; use futures::{ pending, poll, select, future::{BoxFuture, RemoteHandle}, - stream::FuturesUnordered, + stream::{self, FuturesUnordered}, task::{Spawn, SpawnError, SpawnExt}, Future, FutureExt, SinkExt, StreamExt, }; @@ -330,6 +330,16 @@ impl SubsystemContext { Ok(()) } + /// Send multiple direct messages to other `Subsystem`s, routed based on message type. + pub async fn send_msgs(&mut self, msgs: impl IntoIterator) + -> SubsystemResult<()> + { + let mut msgs = stream::iter(msgs.into_iter().map(ToOverseer::SubsystemMessage).map(Ok)); + self.tx.send_all(&mut msgs).await?; + + Ok(()) + } + fn new(rx: mpsc::Receiver>, tx: mpsc::Sender) -> Self { Self { rx, From e05abd2017fbad142bc1567100966f385335115c Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Wed, 17 Jun 2020 20:51:41 -0400 Subject: [PATCH 05/16] select logic --- node/network/bridge/src/lib.rs | 50 ++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index a2a9ee2d61f3..ef91e94a0d90 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -26,7 +26,7 @@ use sc_network::{ }; use sp_runtime::ConsensusEngineId; -use messages::{NetworkBridgeEvent, NetworkBridgeMessage}; +use messages::{NetworkBridgeEvent, NetworkBridgeMessage, FromOverseer, OverseerSignal}; use overseer::{Subsystem, SubsystemContext, SpawnedSubsystem}; use node_primitives::{ProtocolId, View}; use polkadot_primitives::{Block, Hash}; @@ -101,8 +101,7 @@ impl NetworkBridge { impl Subsystem for NetworkBridge { fn start(&mut self, ctx: SubsystemContext) -> SpawnedSubsystem { - unimplemented!(); - // TODO [now]: Spawn substrate-network notifications protocol & event stream. + SpawnedSubsystem(run_network(self.0.clone(), ctx).boxed()) } } @@ -111,6 +110,47 @@ struct PeerData { view: View, } -struct ProtocolHandler { - peers: HashMap, +async fn run_network(net: impl Network, mut ctx: SubsystemContext) { + let mut event_stream = net.event_stream().fuse(); + + // TODO [now] + // let peers = HashMap::new(); + // let event_listeners = HashMap::new(); + + loop { + let subsystem_next = ctx.recv().fuse(); + let mut net_event_next = event_stream.next().fuse(); + futures::pin_mut!(subsystem_next); + + futures::select! { + subsystem_msg = subsystem_next => match subsystem_msg { + Ok(FromOverseer::Signal(OverseerSignal::StartWork(relay_parent))) => { + // TODO [now]: update local view and send view update to peers. + } + Ok(FromOverseer::Signal(OverseerSignal::StopWork(relay_parent))) => { + // TODO [now]: update local view and send view update to peers. + } + Ok(FromOverseer::Signal(OverseerSignal::Conclude)) => return, + Ok(FromOverseer::Communication { msg }) => match msg { + NetworkBridgeMessage::RegisterEventProducer(protocol_id, message_producer) => { + // TODO [now]: add event producer. + } + NetworkBridgeMessage::ReportPeer(peer, rep) => { + // TODO [now]: report a peer to network service. + } + NetworkBridgeMessage::SendMessage(peers, protocol, message) => { + // TODO [now]: Send the message to all peers with `write_notification`. + } + }, + Err(e) => { + // TODO [now]: log error. + return; + } + }, + net_event = net_event_next => { + // TODO [now]: Update peer tracker, filter out anything not to do with this + // engine, and transform all updates to be sent to the overseer. + }, + } + } } From ba005c9ca71d0fc8ab60763d97dfbfbe0b998bfd Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 18 Jun 2020 12:27:47 -0400 Subject: [PATCH 06/16] transform different events into actions and handle --- node/network/bridge/src/lib.rs | 168 +++++++++++++++++++++++++++------ 1 file changed, 137 insertions(+), 31 deletions(-) diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index ef91e94a0d90..c77abfd66544 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -21,12 +21,14 @@ use futures::prelude::*; use futures::stream::BoxStream; use sc_network::{ - ObservedRole, ReputationChange, PeerId, config::ProtocolId as SubstrateProtocolId, + ObservedRole, ReputationChange, PeerId, Event as NetworkEvent, }; use sp_runtime::ConsensusEngineId; -use messages::{NetworkBridgeEvent, NetworkBridgeMessage, FromOverseer, OverseerSignal}; +use messages::{ + NetworkBridgeEvent, NetworkBridgeMessage, FromOverseer, OverseerSignal, AllMessages, +}; use overseer::{Subsystem, SubsystemContext, SpawnedSubsystem}; use node_primitives::{ProtocolId, View}; use polkadot_primitives::{Block, Hash}; @@ -43,7 +45,7 @@ pub const POLKADOT_PROTOCOL_NAME: &[u8] = b"/polkadot/2"; /// Messages received on the network. #[derive(Encode, Decode)] -pub enum Message { +pub enum WireMessage { /// A message from a peer on a specific protocol. #[codec(index = "1")] ProtocolMessage(ProtocolId, Vec), @@ -110,47 +112,151 @@ struct PeerData { view: View, } +enum Action { + RegisterEventProducer(ProtocolId, fn(NetworkBridgeEvent) -> AllMessages), + SendMessage(Vec, ProtocolId, Vec), + ReportPeer(PeerId, ReputationChange), + StartWork(Hash), + StopWork(Hash), + + PeerConnected(PeerId, ObservedRole), + PeerDisconnected(PeerId), + PeerMalformedMessage(PeerId), + PeerMessages(PeerId, Vec), + PeerViewChange(PeerId, View), + + Abort, +} + +fn action_from_overseer_message( + res: overseer::SubsystemResult>, +) -> Action { + match res { + Ok(FromOverseer::Signal(OverseerSignal::StartWork(relay_parent))) + => Action::StartWork(relay_parent), + Ok(FromOverseer::Signal(OverseerSignal::StopWork(relay_parent))) + => Action::StopWork(relay_parent), + Ok(FromOverseer::Signal(OverseerSignal::Conclude)) => Action::Abort, + Ok(FromOverseer::Communication { msg }) => match msg { + NetworkBridgeMessage::RegisterEventProducer(protocol_id, message_producer) + => Action::RegisterEventProducer(protocol_id, message_producer), + NetworkBridgeMessage::ReportPeer(peer, rep) => Action::ReportPeer(peer, rep), + NetworkBridgeMessage::SendMessage(peers, protocol, message) + => Action::SendMessage(peers, protocol, message), + }, + Err(e) => { + log::warn!("Shutting down Network Bridge due to error {:?}", e); + Action::Abort + } + } +} + +fn action_from_network_message(event: Option) -> Option { + match event { + None => { + log::warn!("Shutting down Network Bridge: underlying event stream concluded"); + Some(Action::Abort) + } + Some(NetworkEvent::Dht(_)) => None, + Some(NetworkEvent::NotificationStreamOpened { remote, engine_id, role }) => { + if engine_id == POLKADOT_ENGINE_ID { + Some(Action::PeerConnected(remote, role)) + } else { + None + } + } + Some(NetworkEvent::NotificationStreamClosed { remote, engine_id }) => { + if engine_id == POLKADOT_ENGINE_ID { + Some(Action::PeerDisconnected(remote)) + } else { + None + } + } + Some(NetworkEvent::NotificationsReceived { remote, messages }) => { + let v: Result, _> = messages.iter() + .filter(|(engine_id, _)| engine_id == &POLKADOT_ENGINE_ID) + .map(|(_, msg_bytes)| WireMessage::decode(&mut msg_bytes.as_ref())) + .collect(); + + match v { + Err(_) => Some(Action::PeerMalformedMessage(remote)), + Ok(v) => if v.is_empty() { + None + } else { + Some(Action::PeerMessages(remote, v)) + } + } + } + } +} + async fn run_network(net: impl Network, mut ctx: SubsystemContext) { let mut event_stream = net.event_stream().fuse(); + let mut local_view = Vec::with_capacity(MAX_VIEW_HEADS); - // TODO [now] - // let peers = HashMap::new(); - // let event_listeners = HashMap::new(); + //let mut peers = HashMap::new(); + let mut event_producers = HashMap::new(); loop { let subsystem_next = ctx.recv().fuse(); let mut net_event_next = event_stream.next().fuse(); futures::pin_mut!(subsystem_next); - futures::select! { - subsystem_msg = subsystem_next => match subsystem_msg { - Ok(FromOverseer::Signal(OverseerSignal::StartWork(relay_parent))) => { - // TODO [now]: update local view and send view update to peers. - } - Ok(FromOverseer::Signal(OverseerSignal::StopWork(relay_parent))) => { - // TODO [now]: update local view and send view update to peers. + let action = futures::select! { + subsystem_msg = subsystem_next => Some(action_from_overseer_message(subsystem_msg)), + net_event = net_event_next => action_from_network_message(net_event), + }; + + let action = match action { + None => continue, + Some(a) => a, + }; + + match action { + Action::RegisterEventProducer(protocol_id, event_producer) => { + // insert only if none present. + event_producers.entry(protocol_id).or_insert(event_producer); + } + Action::SendMessage(peers, protocol, message) => { + let message = WireMessage::ProtocolMessage(protocol, message).encode(); + + for peer in peers.iter().skip(1).cloned() { + net.write_notification(peer, message.clone()); } - Ok(FromOverseer::Signal(OverseerSignal::Conclude)) => return, - Ok(FromOverseer::Communication { msg }) => match msg { - NetworkBridgeMessage::RegisterEventProducer(protocol_id, message_producer) => { - // TODO [now]: add event producer. - } - NetworkBridgeMessage::ReportPeer(peer, rep) => { - // TODO [now]: report a peer to network service. - } - NetworkBridgeMessage::SendMessage(peers, protocol, message) => { - // TODO [now]: Send the message to all peers with `write_notification`. - } - }, - Err(e) => { - // TODO [now]: log error. - return; + + if let Some(peer) = peers.first() { + net.write_notification(peer.clone(), message); } + } + Action::ReportPeer(peer, rep) => { + net.report_peer(peer, rep) + } + Action::StartWork(relay_parent) => { + local_view.push(relay_parent); + // TODO [now]: send view change. + } + Action::StopWork(relay_parent) => { + local_view.retain(|h| h != &relay_parent) + // TODO [now]: send view change. + } + + Action::PeerConnected(peer, role) => { + + } + Action::PeerDisconnected(peer) => { + + }, + Action::PeerMalformedMessage(peer) => { + }, - net_event = net_event_next => { - // TODO [now]: Update peer tracker, filter out anything not to do with this - // engine, and transform all updates to be sent to the overseer. + Action::PeerMessages(peer, messages) => { + + }, + Action::PeerViewChange(peer, new_view) => { + }, + + Action::Abort => return, } } } From 7572bbab43e8e35b2eb492bcfaa5bb20babe77e9 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Thu, 18 Jun 2020 14:36:46 -0400 Subject: [PATCH 07/16] implement remaining network bridge state machine --- node/messages/src/lib.rs | 2 +- node/network/bridge/src/lib.rs | 182 +++++++++++++++++++++++++++------ 2 files changed, 154 insertions(+), 30 deletions(-) diff --git a/node/messages/src/lib.rs b/node/messages/src/lib.rs index 9f853c6a68a1..79b28f107b3f 100644 --- a/node/messages/src/lib.rs +++ b/node/messages/src/lib.rs @@ -91,7 +91,7 @@ pub enum CandidateValidationMessage { } /// Events from network. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum NetworkBridgeEvent { /// A peer has connected. PeerConnected(PeerId, ObservedRole), diff --git a/node/network/bridge/src/lib.rs b/node/network/bridge/src/lib.rs index c77abfd66544..9d33a7e47855 100644 --- a/node/network/bridge/src/lib.rs +++ b/node/network/bridge/src/lib.rs @@ -33,7 +33,7 @@ use overseer::{Subsystem, SubsystemContext, SpawnedSubsystem}; use node_primitives::{ProtocolId, View}; use polkadot_primitives::{Block, Hash}; -use std::collections::HashMap; +use std::collections::hash_map::{HashMap, Entry}; use std::sync::Arc; const MAX_VIEW_HEADS: usize = 5; @@ -43,6 +43,13 @@ pub const POLKADOT_ENGINE_ID: ConsensusEngineId = *b"dot2"; /// The protocol name. pub const POLKADOT_PROTOCOL_NAME: &[u8] = b"/polkadot/2"; +const MALFORMED_MESSAGE_COST: ReputationChange + = ReputationChange::new(-500, "Malformed Network-bridge message"); +const UNKNOWN_PROTO_COST: ReputationChange + = ReputationChange::new(-50, "Message sent to unknown protocol"); +const MALFORMED_VIEW_COST: ReputationChange + = ReputationChange::new(-500, "Malformed view"); + /// Messages received on the network. #[derive(Encode, Decode)] pub enum WireMessage { @@ -61,7 +68,7 @@ pub fn notifications_protocol_info() -> (ConsensusEngineId, std::borrow::Cow<'st } /// An abstraction over networking for the purposes of this subsystem. -pub trait Network: Clone + Send + 'static { +pub trait Network: Clone + Send + Sync + 'static { /// Get a stream of all events occurring on the network. This may include events unrelated /// to the Polkadot protocol - the user of this function should filter only for events related /// to the [`POLKADOT_ENGINE_ID`](POLKADOT_ENGINE_ID). @@ -121,9 +128,7 @@ enum Action { PeerConnected(PeerId, ObservedRole), PeerDisconnected(PeerId), - PeerMalformedMessage(PeerId), PeerMessages(PeerId, Vec), - PeerViewChange(PeerId, View), Abort, } @@ -179,7 +184,7 @@ fn action_from_network_message(event: Option) -> Option { .collect(); match v { - Err(_) => Some(Action::PeerMalformedMessage(remote)), + Err(_) => Some(Action::ReportPeer(remote, MALFORMED_MESSAGE_COST)), Ok(v) => if v.is_empty() { None } else { @@ -190,26 +195,61 @@ fn action_from_network_message(event: Option) -> Option { } } +fn construct_view(live_heads: &[Hash]) -> View { + View(live_heads.iter().rev().take(MAX_VIEW_HEADS).cloned().collect()) +} + +async fn dispatch_update_to_all( + update: NetworkBridgeEvent, + event_producers: impl IntoIterator AllMessages>, + ctx: &mut SubsystemContext, +) -> overseer::SubsystemResult<()> { + // collect messages here to avoid the borrow lasting across await boundary. + let messages: Vec<_> = event_producers.into_iter() + .map(|producer| producer(update.clone())) + .collect(); + + ctx.send_msgs(messages).await +} + async fn run_network(net: impl Network, mut ctx: SubsystemContext) { let mut event_stream = net.event_stream().fuse(); - let mut local_view = Vec::with_capacity(MAX_VIEW_HEADS); - //let mut peers = HashMap::new(); + // Most recent heads are at the back. + let mut live_heads = Vec::with_capacity(MAX_VIEW_HEADS); + let mut local_view = View(Vec::new()); + + let mut peers = HashMap::new(); let mut event_producers = HashMap::new(); loop { - let subsystem_next = ctx.recv().fuse(); - let mut net_event_next = event_stream.next().fuse(); - futures::pin_mut!(subsystem_next); - - let action = futures::select! { - subsystem_msg = subsystem_next => Some(action_from_overseer_message(subsystem_msg)), - net_event = net_event_next => action_from_network_message(net_event), + let action = { + let subsystem_next = ctx.recv().fuse(); + let mut net_event_next = event_stream.next().fuse(); + futures::pin_mut!(subsystem_next); + + let action = futures::select! { + subsystem_msg = subsystem_next => Some(action_from_overseer_message(subsystem_msg)), + net_event = net_event_next => action_from_network_message(net_event), + }; + + match action { + Some(a) => a, + None => continue, + } }; - let action = match action { - None => continue, - Some(a) => a, + let update_view = |peers: &HashMap, live_heads, local_view: &mut View| { + let new_view = construct_view(live_heads); + if *local_view == new_view { return None } + *local_view = new_view.clone(); + + let message = WireMessage::ViewUpdate(new_view).encode(); + for peer in peers.keys().cloned() { + net.write_notification(peer, message.clone()) + } + + Some(NetworkBridgeEvent::OurViewChange(local_view.clone())) }; match action { @@ -232,28 +272,112 @@ async fn run_network(net: impl Network, mut ctx: SubsystemContext { - local_view.push(relay_parent); - // TODO [now]: send view change. + live_heads.push(relay_parent); + if let Some(view_update) = update_view(&peers, &live_heads, &mut local_view) { + if let Err(_) = dispatch_update_to_all( + view_update, + event_producers.values(), + &mut ctx, + ).await { + log::warn!("Aborting - Failure to dispatch messages to overseer"); + return + } + } } Action::StopWork(relay_parent) => { - local_view.retain(|h| h != &relay_parent) - // TODO [now]: send view change. + live_heads.retain(|h| h != &relay_parent); + if let Some(view_update) = update_view(&peers, &live_heads, &mut local_view) { + if let Err(_) = dispatch_update_to_all( + view_update, + event_producers.values(), + &mut ctx, + ).await { + log::warn!("Aborting - Failure to dispatch messages to overseer"); + return + } + } } Action::PeerConnected(peer, role) => { - + match peers.entry(peer.clone()) { + Entry::Occupied(_) => continue, + Entry::Vacant(vacant) => { + vacant.insert(PeerData { + view: View(Vec::new()), + }); + + if let Err(_) = dispatch_update_to_all( + NetworkBridgeEvent::PeerConnected(peer, role), + event_producers.values(), + &mut ctx, + ).await { + log::warn!("Aborting - Failure to dispatch messages to overseer"); + return + } + } + } } Action::PeerDisconnected(peer) => { - - }, - Action::PeerMalformedMessage(peer) => { - + if peers.remove(&peer).is_some() { + if let Err(_) = dispatch_update_to_all( + NetworkBridgeEvent::PeerDisconnected(peer), + event_producers.values(), + &mut ctx, + ).await { + log::warn!("Aborting - Failure to dispatch messages to overseer"); + return + } + } }, Action::PeerMessages(peer, messages) => { + let peer_data = match peers.get_mut(&peer) { + None => continue, + Some(d) => d, + }; + + let mut outgoing_messages = Vec::with_capacity(messages.len()); + for message in messages { + match message { + WireMessage::ViewUpdate(new_view) => { + if new_view.0.len() > MAX_VIEW_HEADS { + net.report_peer(peer.clone(), MALFORMED_VIEW_COST); + continue + } + + if new_view == peer_data.view { continue } + peer_data.view = new_view; + + let update = NetworkBridgeEvent::PeerViewChange( + peer.clone(), + peer_data.view.clone(), + ); + + outgoing_messages.extend( + event_producers.values().map(|producer| producer(update.clone())) + ); + } + WireMessage::ProtocolMessage(protocol, message) => { + let message = match event_producers.get(&protocol) { + Some(producer) => Some(producer( + NetworkBridgeEvent::PeerMessage(peer.clone(), message) + )), + None => { + net.report_peer(peer.clone(), UNKNOWN_PROTO_COST); + None + } + }; + + if let Some(message) = message { + outgoing_messages.push(message); + } + } + } + } - }, - Action::PeerViewChange(peer, new_view) => { - + if let Err(_) = ctx.send_msgs(outgoing_messages).await { + log::warn!("Aborting - Failure to dispatch messages to overseer"); + return + } }, Action::Abort => return, From 5c99b3d17c5d74cbc2a86eac6d49aaa2e6bf25d6 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 19 Jun 2020 16:21:22 -0400 Subject: [PATCH 08/16] rejig candidate types a bit --- .../availability/availability-distribution.md | 2 +- .../src/node/backing/pov-distribution.md | 1 + .../node/backing/statement-distribution.md | 2 +- .../src/runtime/inclusion.md | 14 ++-- .../implementors-guide/src/runtime/router.md | 4 +- .../implementors-guide/src/types/backing.md | 6 +- .../implementors-guide/src/types/candidate.md | 83 +++++++++++++------ .../src/types/overseer-protocol.md | 12 ++- 8 files changed, 82 insertions(+), 42 deletions(-) diff --git a/roadmap/implementors-guide/src/node/availability/availability-distribution.md b/roadmap/implementors-guide/src/node/availability/availability-distribution.md index a916b3766bfb..1640eb08e039 100644 --- a/roadmap/implementors-guide/src/node/availability/availability-distribution.md +++ b/roadmap/implementors-guide/src/node/availability/availability-distribution.md @@ -26,7 +26,7 @@ Register on startup an event producer with `NetworkBridge::RegisterEventProduce For each relay-parent in our local view update, look at all backed candidates pending availability. Distribute via gossip all erasure chunks for all candidates that we have to peers. -We define an operation `live_candidates(relay_heads) -> Set` which returns a set of candidates a given set of relay chain heads that implies a set of candidates whose availability chunks should be currently gossiped. This is defined as all candidates pending availability in any of those relay-chain heads or any of their last `K` ancestors. We assume that state is not pruned within `K` blocks of the chain-head. +We define an operation `live_candidates(relay_heads) -> Set` which returns a set of [`CommittedCandidateReceipt`s](../../types/candidate.html#committed-candidate-receipt) a given set of relay chain heads that implies a set of candidates whose availability chunks should be currently gossiped. This is defined as all candidates pending availability in any of those relay-chain heads or any of their last `K` ancestors. We assume that state is not pruned within `K` blocks of the chain-head. We will send any erasure-chunks that correspond to candidates in `live_candidates(peer_most_recent_view_update)`. Likewise, we only accept and forward messages pertaining to a candidate in `live_candidates(current_heads)`. Each erasure chunk should be accompanied by a merkle proof that it is committed to by the erasure trie root in the candidate receipt, and this gossip system is responsible for checking such proof. diff --git a/roadmap/implementors-guide/src/node/backing/pov-distribution.md b/roadmap/implementors-guide/src/node/backing/pov-distribution.md index d7290cbfbf98..a1ecd3993872 100644 --- a/roadmap/implementors-guide/src/node/backing/pov-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/pov-distribution.md @@ -11,3 +11,4 @@ Handle requests for PoV block by candidate hash and relay-parent. Implemented as a gossip system, where `PoV`s are not accepted unless we know a `Seconded` message. > TODO: this requires a lot of cross-contamination with statement distribution even if we don't implement this as a gossip system. In a point-to-point implementation, we still have to know _who to ask_, which means tracking who's submitted `Seconded`, `Valid`, or `Invalid` statements - by validator and by peer. One approach is to have the Statement gossip system to just send us this information and then we can separate the systems from the beginning instead of combining them +∂ diff --git a/roadmap/implementors-guide/src/node/backing/statement-distribution.md b/roadmap/implementors-guide/src/node/backing/statement-distribution.md index b77ee7464f8b..5bdf74c495d9 100644 --- a/roadmap/implementors-guide/src/node/backing/statement-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/statement-distribution.md @@ -22,7 +22,7 @@ Implemented as a gossip protocol. Register a network event producer on startup. Statement Distribution is the only backing subsystem which has any notion of peer nodes, who are any full nodes on the network. Validators will also act as peer nodes. -It is responsible for signing statements that we have generated and forwarding them, and for detecting a variety of Validator misbehaviors for reporting to [Misbehavior Arbitration](../utility/misbehavior-arbitration.md). During the Backing stage of the inclusion pipeline, it's the main point of contact with peer nodes, who distribute statements by validators. On receiving a signed statement from a peer, assuming the peer receipt state machine is in an appropriate state, it sends the Candidate Receipt to the [Candidate Backing subsystem](candidate-backing.md) to handle the validator's statement. +It is responsible for distributing signed statements that we have generated and forwarding them, and for detecting a variety of Validator misbehaviors for reporting to [Misbehavior Arbitration](../utility/misbehavior-arbitration.md). During the Backing stage of the inclusion pipeline, it's the main point of contact with peer nodes. On receiving a signed statement from a peer, assuming the peer receipt state machine is in an appropriate state, it sends the Candidate Receipt to the [Candidate Backing subsystem](candidate-backing.md) to handle the validator's statement. Track equivocating validators and stop accepting information from them. Forward double-vote proofs to the double-vote reporting system. Establish a data-dependency order: diff --git a/roadmap/implementors-guide/src/runtime/inclusion.md b/roadmap/implementors-guide/src/runtime/inclusion.md index e2d27476967e..29a1aa4e888a 100644 --- a/roadmap/implementors-guide/src/runtime/inclusion.md +++ b/roadmap/implementors-guide/src/runtime/inclusion.md @@ -14,7 +14,7 @@ struct AvailabilityBitfield { struct CandidatePendingAvailability { core: CoreIndex, // availability core - receipt: AbridgedCandidateReceipt, + receipt: CandidateReceipt, availability_votes: Bitfield, // one bit per validator. relay_parent_number: BlockNumber, // number of the relay-parent. backed_in_number: BlockNumber, @@ -28,6 +28,8 @@ Storage Layout: bitfields: map ValidatorIndex => AvailabilityBitfield; /// Candidates pending availability. PendingAvailability: map ParaId => CandidatePendingAvailability; +/// The commitments of candidates pending availability, by ParaId. +PendingAvailabilityCommitments: map ParaId => CandidateCommitments; /// The current validators, by their parachain session keys. Validators: Vec; @@ -36,8 +38,6 @@ Validators: Vec; CurrentSessionIndex: SessionIndex; ``` -> TODO: `CandidateReceipt` and `AbridgedCandidateReceipt` can contain code upgrades which make them very large. the code entries should be split into a different storage map with infrequent access patterns - ## Session Change 1. Clear out all candidates pending availability. @@ -64,15 +64,17 @@ All failed checks should lead to an unrecoverable error making the block invalid 1. check that there is no candidate pending availability for any scheduled `ParaId`. 1. If the core assignment includes a specific collator, ensure the backed candidate is issued by that collator. 1. Ensure that any code upgrade scheduled by the candidate does not happen within `config.validation_upgrade_frequency` of `Paras::last_code_upgrade(para_id, true)`, if any, comparing against the value of `Paras::FutureCodeUpgrades` for the given para ID. - 1. Check the collator's signature on the pov block. + 1. Check the collator's signature on the candidate data. + 1. Transform each [`CommittedCandidateReceipt`](../../types/candidate.html#committed-candidate-receipt) into the corresponding [`CandidateReceipt`](../../types/candidate.html#candidate-receipt), setting the commitments aside. 1. check the backing of the candidate using the signatures and the bitfields, comparing against the validators assigned to the groups, fetched with the `group_validators` lookup. 1. check that the upward messages, when combined with the existing queue size, are not exceeding `config.max_upward_queue_count` and `config.watermark_upward_queue_size` parameters. 1. create an entry in the `PendingAvailability` map for each backed candidate with a blank `availability_votes` bitfield. + 1. create a corresponding entry in the `PendingAvailabilityCommitments` with the commitments. 1. Return a `Vec` of all scheduled cores of the list of passed assignments that a candidate was successfully backed for, sorted ascending by CoreIndex. -* `enact_candidate(relay_parent_number: BlockNumber, AbridgedCandidateReceipt)`: +* `enact_candidate(relay_parent_number: BlockNumber, CommittedCandidateReceipt)`: 1. If the receipt contains a code upgrade, Call `Paras::schedule_code_upgrade(para_id, code, relay_parent_number + config.validationl_upgrade_delay)`. > TODO: Note that this is safe as long as we never enact candidates where the relay parent is across a session boundary. In that case, which we should be careful to avoid with contextual execution, the configuration might have changed and the para may de-sync from the host's understanding of it. - 1. call `Router::queue_upward_messages` for each backed candidate. + 1. call `Router::queue_upward_messages` for each backed candidate, using the [`UpwardMessage`s](../../types/messages.html#upward-message) from the [`CandidateCommitments`](../../types/candidate.html#candidate-commitments). 1. Call `Paras::note_new_head` using the `HeadData` from the receipt and `relay_parent_number`. * `collect_pending`: diff --git a/roadmap/implementors-guide/src/runtime/router.md b/roadmap/implementors-guide/src/runtime/router.md index fff8e78920f7..4f7adeaa048f 100644 --- a/roadmap/implementors-guide/src/runtime/router.md +++ b/roadmap/implementors-guide/src/runtime/router.md @@ -27,9 +27,9 @@ No initialization routine runs for this module. ## Routines -* `queue_upward_messages(AbridgedCandidateReceipt)`: +* `queue_upward_messages(ParaId, Vec)`: 1. Updates `NeedsDispatch`, and enqueues upward messages into `RelayDispatchQueue` and modifies the respective entry in `RelayDispatchQueueSize`. -## Finalization +## Finalization 1. Dispatch queued upward messages from `RelayDispatchQueues` in a FIFO order applying the `config.watermark_upward_queue_size` and `config.max_upward_queue_count` limits. diff --git a/roadmap/implementors-guide/src/types/backing.md b/roadmap/implementors-guide/src/types/backing.md index a963d0ee9695..878fb4f28848 100644 --- a/roadmap/implementors-guide/src/types/backing.md +++ b/roadmap/implementors-guide/src/types/backing.md @@ -61,7 +61,7 @@ enum Statement { /// second only 1 candidate; this places an upper bound on the total number of candidates whose validity /// needs to be checked. A validator who seconds more than 1 parachain candidate per relay head is subject /// to slashing. - Seconded(CandidateReceipt), + Seconded(CommittedCandidateReceipt), /// A statement about the validity of a candidate, based on candidate's hash. Valid(Hash), /// A statement about the invalidity of a candidate. @@ -102,11 +102,11 @@ Munging the signed `Statement` into a `CompactStatement` before signing allows t ## Backed Candidate -An [`AbridgedCandidateReceipt`](candidate.md#abridgedcandidatereceipt) along with all data necessary to prove its backing. This is submitted to the relay-chain to process and move along the candidate to the pending-availability stage. +An [`CommittedCandidateReceipt`](candidate.md#committed-candidate-receipt) along with all data necessary to prove its backing. This is submitted to the relay-chain to process and move along the candidate to the pending-availability stage. ```rust struct BackedCandidate { - candidate: AbridgedCandidateReceipt, + candidate: CommittedCandidateReceipt, validity_votes: Vec, // the indices of validators who signed the candidate within the group. There is no need to include // bit for any validators who are not in the group, so this is more compact. diff --git a/roadmap/implementors-guide/src/types/candidate.md b/roadmap/implementors-guide/src/types/candidate.md index ff09365c936a..065e3075f918 100644 --- a/roadmap/implementors-guide/src/types/candidate.md +++ b/roadmap/implementors-guide/src/types/candidate.md @@ -4,18 +4,34 @@ Para candidates are some of the most common types, both within the runtime and o In a way, this entire guide is about these candidates: how they are scheduled, constructed, backed, included, and challenged. -This section will describe the base candidate type, its components, and abridged counterpart. +This section will describe the base candidate type, its components, and variants that contain extra data. -## CandidateReceipt +## Candidate Receipt -This is the base receipt type. The `GlobalValidationSchedule` and the `LocalValidationData` are technically redundant with the `inner.relay_parent`, which uniquely describes the a block in the blockchain from whose state these values are derived. The [`AbridgedCandidateReceipt`](#abridgedcandidatereceipt) variant is often used instead for this reason. +Much info in a [`FullCandidateReceipt`](#full-candidate-receipt) is duplicated from the relay-chain state. When the corresponding relay-chain state is considered widely available, the Candidate Receipt should be favored over the `FullCandidateReceipt`. -However, the full CandidateReceipt type is useful as a means of avoiding the implicit dependency on availability of old blockchain state. In situations such as availability and approval, having the full description of the candidate within a self-contained struct is convenient. +Examples of situations where the state is readily available includes within the scope of work done by subsystems working on a given relay-parent, or within the logic of the runtime importing a backed candidate. ```rust -/// All data pertaining to the execution of a para candidate. +/// A candidate-receipt. struct CandidateReceipt { - inner: AbridgedCandidateReceipt, + /// The descriptor of the candidate. + descriptor: CandidateDescriptor, + /// The hash of the encoded commitments made as a result of candidate execution. + commitments_hash: Hash, +} +``` + +## Full Candidate Receipt + +This is the full receipt type. The `GlobalValidationSchedule` and the `LocalValidationData` are technically redundant with the `inner.relay_parent`, which uniquely describes the a block in the blockchain from whose state these values are derived. The [`CandidateReceipt`](#candidate-receipt) variant is often used instead for this reason. + +However, the Full Candidate Receipt type is useful as a means of avoiding the implicit dependency on availability of old blockchain state. In situations such as availability and approval, having the full description of the candidate within a self-contained struct is convenient. + +```rust +/// All data pertaining to the execution of a para candidate. +struct FullCandidateReceipt { + inner: CandidateReceipt, /// The global validation schedule. global_validation: GlobalValidationSchedule, /// The local validation data. @@ -23,38 +39,49 @@ struct CandidateReceipt { } ``` -## AbridgedCandidateReceipt +## Committed Candidate Receipt -Much info in a [`CandidateReceipt`](#candidatereceipt) is duplicated from the relay-chain state. When the corresponding relay-chain state is considered widely available, the Abridged Candidate Receipt should be favored. +This is a variant of the candidate receipt which includes the commitments of the candidate receipt alongside the descriptor. This should be favored over the [`Candidate Receipt`](#candidate-receipt) in situations where the candidate is not going to be executed but the actual data committed to is important. This is often the case in the backing phase. -Examples of situations where the state is readily available includes within the scope of work done by subsystems working on a given relay-parent, or within the logic of the runtime importing a backed candidate. +The hash of the committed candidate receipt will be the same as the corresponding [`Candidate Receipt`](#candidate-receipt), because it is computed by first hashing the encoding of the commitments to form a plain [`Candidate Receipt`](#candidate-receipt). + +```rust +/// A candidate-receipt with commitments directly included. +struct CommittedCandidateReceipt { + /// The descriptor of the candidate. + descriptor: CandidateDescriptor, + /// The commitments of the candidate receipt. + commitments: CandidateCommitments, +} +``` + +## Candidate Descriptor + +This struct is pure description of the candidate, in a lightweight format. ```rust -/// An abridged candidate-receipt. -struct AbridgedCandidateReceipt { +/// A unique descriptor of the candidate receipt. +struct CandidateDescriptor { /// The ID of the para this is a candidate for. para_id: Id, /// The hash of the relay-chain block this is executed in the context of. relay_parent: Hash, - /// The head-data produced as a result of execution. - head_data: HeadData, /// The collator's sr25519 public key. collator: CollatorId, /// Signature on blake2-256 of components of this receipt: - /// The parachain index, the relay parent, the head data, and the pov_hash. + /// The parachain index, the relay parent, and the pov_hash. signature: CollatorSignature, /// The blake2-256 hash of the pov-block. pov_hash: Hash, - /// Commitments made as a result of validation. - commitments: CandidateCommitments, } ``` + ## GlobalValidationSchedule The global validation schedule comprises of information describing the global environment for para execution, as derived from a particular relay-parent. These are parameters that will apply to all parablocks executed in the context of this relay-parent. -> TODO: message queue watermarks (first upward messages, then XCMP channels) +> TODO: message queue watermarks (first downward messages, then XCMP channels) ```rust /// Extra data that is needed along with the other fields in a `CandidateReceipt` @@ -88,13 +115,13 @@ Para validation happens optimistically before the block is authored, so it is no ```rust /// Extra data that is needed along with the other fields in a `CandidateReceipt` /// to fully validate the candidate. These fields are parachain-specific. -pub struct LocalValidationData { +struct LocalValidationData { /// The parent head-data. - pub parent_head: HeadData, + parent_head: HeadData, /// The balance of the parachain at the moment of validation. - pub balance: Balance, + balance: Balance, /// The blake2-256 hash of the validation code used to execute the candidate. - pub validation_code_hash: Hash, + validation_code_hash: Hash, /// Whether the parachain is allowed to upgrade its validation code. /// /// This is `Some` if so, and contains the number of the minimum relay-chain @@ -106,7 +133,7 @@ pub struct LocalValidationData { /// height. This may be equal to the current perceived relay-chain block height, in /// which case the code upgrade should be applied at the end of the signaling /// block. - pub code_upgrade_allowed: Option, + code_upgrade_allowed: Option, } ``` @@ -126,15 +153,17 @@ The execution and validation of parachain or parathread candidates produces a nu /// Commitments made in a `CandidateReceipt`. Many of these are outputs of validation. #[derive(PartialEq, Eq, Clone, Encode, Decode)] #[cfg_attr(feature = "std", derive(Debug, Default))] -pub struct CandidateCommitments { +struct CandidateCommitments { /// Fees paid from the chain to the relay chain validators. - pub fees: Balance, + fees: Balance, /// Messages destined to be interpreted by the Relay chain itself. - pub upward_messages: Vec, + upward_messages: Vec, /// The root of a block's erasure encoding Merkle tree. - pub erasure_root: Hash, + erasure_root: Hash, /// New validation code. - pub new_validation_code: Option, + new_validation_code: Option, + /// The head-data produced as a result of execution. + head_data: HeadData, } ``` diff --git a/roadmap/implementors-guide/src/types/overseer-protocol.md b/roadmap/implementors-guide/src/types/overseer-protocol.md index 6a1bdfa96ed9..fac08c46fd18 100644 --- a/roadmap/implementors-guide/src/types/overseer-protocol.md +++ b/roadmap/implementors-guide/src/types/overseer-protocol.md @@ -170,9 +170,17 @@ If this subsystem chooses to second a parachain block, it dispatches a `Candidat ## PoV Distribution -Messages received by the PoV Distribution subsystem are unspecified and highly tied to gossip. +Messages -> TODO +```rust +enum PoVDistributionMessage { + /// Fetch a PoV from the network. + /// (relay_parent, PoV-hash, Response channel). + FetchPoV(Hash, CandidateDescriptor, ResponseChannel), + /// + DistributePoV(Hash, PoV), +} +``` ## Provisioner Message From 6d3179ba286ff258eebf610fc1f1fbbd0637fa15 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 19 Jun 2020 16:37:07 -0400 Subject: [PATCH 09/16] adjust doc --- roadmap/implementors-guide/src/types/backing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roadmap/implementors-guide/src/types/backing.md b/roadmap/implementors-guide/src/types/backing.md index 878fb4f28848..d1da2e4662ef 100644 --- a/roadmap/implementors-guide/src/types/backing.md +++ b/roadmap/implementors-guide/src/types/backing.md @@ -91,7 +91,7 @@ enum CompactStatement { A statement which has been [cryptographically signed](#signed-wrapper) by a validator. ```rust -/// A signed statement, containing the abridged candidate receipt in the `Seconded` variant. +/// A signed statement, containing the committed candidate receipt in the `Seconded` variant. pub type SignedFullStatement = Signed; /// A signed statement, containing only the hash. From 12c3a4237f1d0a1241c6c74e4f56c4d056f76229 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 19 Jun 2020 18:19:12 -0400 Subject: [PATCH 10/16] describe basic PoV distribution --- .../src/node/backing/candidate-backing.md | 2 +- .../src/node/backing/pov-distribution.md | 115 +++++++++++++++++- .../node/backing/statement-distribution.md | 2 +- .../src/types/overseer-protocol.md | 18 +-- 4 files changed, 124 insertions(+), 13 deletions(-) diff --git a/roadmap/implementors-guide/src/node/backing/candidate-backing.md b/roadmap/implementors-guide/src/node/backing/candidate-backing.md index c57680e6356c..1490bb074ccb 100644 --- a/roadmap/implementors-guide/src/node/backing/candidate-backing.md +++ b/roadmap/implementors-guide/src/node/backing/candidate-backing.md @@ -12,7 +12,7 @@ Once a sufficient quorum has agreed that a candidate is valid, this subsystem no The [Candidate Selection subsystem](candidate-selection.md) is the primary source of non-overseer messages into this subsystem. That subsystem generates appropriate [`CandidateBackingMessage`s](../../types/overseer-protocol.md#candidate-backing-message), and passes them to this subsystem. -This subsystem validates the candidates and generates an appropriate [`Statement`](../../types/backing.md#statement-type). All `Statement`s are then passed on to the [Statement Distribution subsystem](statement-distribution.md) to be gossiped to peers. When this subsystem decides that a candidate is invalid, and it was recommended to us to second by our own Candidate Selection subsystem, a message is sent to the Candidate Selection subsystem with the candidate's hash so that the collator which recommended it can be penalized. +This subsystem validates the candidates and generates an appropriate [`SignedStatement`](../../types/backing.md#signed-statement-type). All `SignedStatement`s are then passed on to the [Statement Distribution subsystem](statement-distribution.md) to be gossiped to peers. All [Proofs of Validity](../../types/availability.md#proof-of-validity) should be distributed via the [PoV Distribution](pov-distribution.md) subsystem. When this subsystem decides that a candidate is invalid, and it was recommended to us to second by our own Candidate Selection subsystem, a message is sent to the Candidate Selection subsystem with the candidate's hash so that the collator which recommended it can be penalized. ## Functionality diff --git a/roadmap/implementors-guide/src/node/backing/pov-distribution.md b/roadmap/implementors-guide/src/node/backing/pov-distribution.md index a1ecd3993872..1e3f97010d8e 100644 --- a/roadmap/implementors-guide/src/node/backing/pov-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/pov-distribution.md @@ -4,11 +4,118 @@ This subsystem is responsible for distributing PoV blocks. For now, unified with ## Protocol -Handle requests for PoV block by candidate hash and relay-parent. +`ProtocolId`: `b"povd"` + +Input: [`PoVDistributionMessage`](../../types/overseer-protocol.html#pov-distribution-message) + + +Output: + +- NetworkBridge::RegisterEventProducer(`ProtocolId`) +- NetworkBridge::SendMessage(`[PeerId]`, `ProtocolId`, `Bytes`) +- NetworkBridge::ReportPeer(PeerId, cost_or_benefit) + ## Functionality -Implemented as a gossip system, where `PoV`s are not accepted unless we know a `Seconded` message. +This network protocol is responsible for distributing [`PoV`s](../../types/availability.html#proof-of-validity) by gossip. Since PoVs are heavy in practice, gossip is far from the most efficient way to distribute them. In the future, this should be replaced by a better network protocol that finds validators who have validated the block and connects to them directly. + +This protocol is described in terms of "us" and our peers, with the understanding that this is the procedure that any honest node will run. It has the following goals: + - We never have to buffer an unbounded amount of data + - PoVs will flow transitively across a network of honest nodes, stemming from the validators that originate them. + +As we are gossiping, we need to track which PoVs our peers are waiting for to avoid sending them data that they are not expecting. It is not reasonable to expect our peers to buffer unexpected PoVs, just as we will not buffer unexpected PoVs. So notification to our peers about what is being awaited is key. However it is important that the notifications system is also bounded. + +For this, in order to avoid reaching into the internals of the [Statement Distribution](statement-distribution.html) Subsystem, we can rely on an expected propery of candidate backing: that each validator can only second one candidate at each chain head. So we can set a cap on the number of PoVs each peer is allowed to notify us that they are waiting for at a given relay-parent. This cap will be the number of validators at that relay-parent. And the view update mechanism of the [Network Bridge](../utility/network-bridge.html) ensures that peers are only allowed to consider a certain set of relay-parents as live. So this bounding mechanism caps the amount of data we need to store per peer at any time at `sum({ n_validators_at_head(head) | head in view_heads })`. Additionally, peers should only be allowed to notify us of PoV hashes they are waiting for in the context of relay-parents in our own local view, which means that `n_validators_at_head` is implied to be `0` for relay-parents not in our own local view. + +View updates from peers and our own view updates are received from the network bridge. These will lag somewhat behind the `StartWork` and `StopWork` messages received from the overseer, which will influence the actual data we store. The `OurViewUpdate`s from the [`NetworkBridgeEvent`](../../types/overseer-protocol.html#network-bridge-update) must be considered canonical in terms of our peers' perception of us. + +Lastly, the system needs to be bootstrapped with our own perception of which PoVs we are cognizant of but awaiting data for. This is done by receipt of the [`PoVDistributionMessage`](../../types/overseer-protocolhtml#pov-distribution-message)::ValidatorStatement variant. We can ignore anything except for `Seconded` statements. + +## Formal Description + +This protocol can be implemented as a state machine with the following state: + +```rust +struct State { + relay_parent_state: Map, + peer_state: Map, + our_view: View, +} + +struct BlockBasedState { + known: Map, // should be a shared PoV in practice. these things are heavy. + awaited: Set, // awaited PoVs by blake2-256 hash. + fetching: Map]>, + n_validators: usize, +} + +struct PeerState { + awaited: Map>, +} +``` + +We also assume the following network messages, which are sent and received by the [Network Bridge](../utility/network-bridge.html) + +```rust +enum NetworkMessage { + /// Notification that we are awaiting the given PoVs (by hash) against a + /// specific relay-parent hash. + Awaiting(Hash, Vec), + /// Notification of an awaited PoV, in a given relay-parent context. + /// (relay_parent, pov_hash, pov) + SendPoV(Hash, Hash, PoV), +} +``` + +Here is the logic of the state machine: + +*Overseer Signals* +- On `StartWork(relay_parent)`: + - Get the number of validators at that relay parent by querying the [Runtime API](../utility/runtime-api.html) for the validators and then counting them. + - Create a blank entry in `relay_parent_state` under `relay_parent` with correct `n_validators` set. +- On `StopWork(relay_parent)`: + - Remove the entry for `relay_parent` from `relay_parent_state`. +- On `Concluded`: conclude. + +*PoV Distribution Messages* +- On `ValidatorStatement(relay_parent, statement)` + - If this is not `Statement::Seconded`, ignore. + - If there is an entry under `relay_parent` in `relay_parent_state`, add the `pov_hash` of the seconded Candidate's [`CandidateDescriptor`](../../types/candidate.html#candidate-descriptor) to the `awaited` set of the entry. + - If the `pov_hash` was not previously awaited and there are `n_validators` or fewer entries in the `awaited` set, send `NetworkMessage::Awaiting(relay_parent, vec![pov_hash])` to all peers. +- On `FetchPoV(relay_parent, descriptor, response_channel)` + - If there is no entry in `relay_parent_state` under `relay_parent`, ignore. + - If there is a PoV under `descriptor.pov_hash` in the `known` map, send that PoV on the channel and return. + - Otherwise, place the `response_channel` in the `fetching` map under `descriptor.pov_hash`. +- On `DistributePoV(relay_parent, descriptor, PoV)` + - If there is no entry in `relay_parent_state` under `relay_parent`, ignore. + - Complete and remove any channels under `descriptor.pov_hash` in the `fetching` map. + - Send `NetworkMessage::SendPoV(relay_parent, descriptor.pov_hash, PoV)` to all peers who have the `descriptor.pov_hash` in the set under `relay_parent` in the `peer.awaited` map and remove the entry from `peer.awaited`. + - Note the PoV under `descriptor.pov_hash` in `known`. + +*Network Bridge Updates* +- On `PeerConnected(peer_id, observed_role)` + - Make a fresh entry in the `peer_state` map for the `peer_id`. +- On `PeerDisconnected(peer_id) + - Remove the entry for `peer_id` from the `peer_state` map. +- On `PeerMessage(peer_id, bytes)` + - If the bytes do not decode to a `NetworkMessage` or the `peer_id` has no entry in the `peer_state` map, report and ignore. + - If this is `NetworkMessage::Awaiting(relay_parent, pov_hashes)`: + - If there is no entry under `peer_state.awaited` for the `relay_parent`, report and ignore. + - If `relay_parent` is not contained within `our_view`, report and ignore. + - Otherwise, if the `awaited` map combined with the `pov_hashes` would have more than `relay_parent_state[relay_parent].n_validators` entries, report and ignore. Note that we are leaning on the property of the network bridge that it sets our view based on `StartWork` messages. + - For each new `pov_hash` in `pov_hashes`, if there is a `pov` under `pov_hash` in the `known` map, send the peer a `NetworkMessage::SendPoV(relay_parent, pov_hash, pov)`. + - Otherwise, add the `pov_hash` to the `awaited` map + - If this is `NetworkMessage::SendPoV(relay_parent, pov_hash, pov)`: + - If there is no entry under `relay_parent` in `relay_parent_state` or no entry under `pov_hash` in our `awaited` map for that `relay_parent`, report and ignore. + - If the blake2-256 hash of the pov doesn't equal `pov_hash`, report and ignore. + - Complete and remove any listeners in the `fetching` map under `pov_hash`. + - Add to `known` map. + - Send `NetworkMessage::SendPoV(relay_parent, descriptor.pov_hash, PoV)` to all peers who have the `descriptor.pov_hash` in the set under `relay_parent` in the `peer.awaited` map and remove the entry from `peer.awaited`. +- On `PeerViewChange(peer_id, view)` + - If Peer is unknown, ignore. + - Ensure there is an entry under `relay_parent` for each `relay_parent` in `view` within the `peer.awaited` map, creating blank `awaited` lists as necessary. + - Remove all entries under `peer.awaited` that are not within `view`. +- On `OurViewChange(view)` + - Update `our_view` to `view` -> TODO: this requires a lot of cross-contamination with statement distribution even if we don't implement this as a gossip system. In a point-to-point implementation, we still have to know _who to ask_, which means tracking who's submitted `Seconded`, `Valid`, or `Invalid` statements - by validator and by peer. One approach is to have the Statement gossip system to just send us this information and then we can separate the systems from the beginning instead of combining them -∂ diff --git a/roadmap/implementors-guide/src/node/backing/statement-distribution.md b/roadmap/implementors-guide/src/node/backing/statement-distribution.md index 5bdf74c495d9..59e5244d0d76 100644 --- a/roadmap/implementors-guide/src/node/backing/statement-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/statement-distribution.md @@ -37,7 +37,7 @@ The Statement Distribution subsystem sends statements to peer nodes and detects There is a very simple state machine which governs which messages we are willing to receive from peers. Not depicted in the state machine: on initial receipt of any [`SignedFullStatement`](../../types/backing.md#signed-statement-type), validate that the provided signature does in fact sign the included data. Note that each individual parablock candidate gets its own instance of this state machine; it is perfectly legal to receive a `Valid(X)` before a `Seconded(Y)`, as long as a `Seconded(X)` has been received. -A: Initial State. Receive `SignedFullStatement(Statement::Second)`: extract `Statement`, forward to Candidate Backing, proceed to B. Receive any other `SignedFullStatement` variant: drop it. +A: Initial State. Receive `SignedFullStatement(Statement::Second)`: extract `Statement`, forward to Candidate Backing and PoV Distribution, proceed to B. Receive any other `SignedFullStatement` variant: drop it. B: Receive any `SignedFullStatement`: check signature, forward to Candidate Backing. Receive `OverseerMessage::StopWork`: proceed to C. diff --git a/roadmap/implementors-guide/src/types/overseer-protocol.md b/roadmap/implementors-guide/src/types/overseer-protocol.md index fac08c46fd18..7b211bd14f6c 100644 --- a/roadmap/implementors-guide/src/types/overseer-protocol.md +++ b/roadmap/implementors-guide/src/types/overseer-protocol.md @@ -90,8 +90,8 @@ enum CandidateBackingMessage { /// in a child of the given relay-parent, referenced by its hash. RegisterBackingWatcher(Hash, TODO), /// Note that the Candidate Backing subsystem should second the given candidate in the context of the - /// given relay-parent (ref. by hash). This candidate must be validated. - Second(Hash, CandidateReceipt), + /// given relay-parent (ref. by hash). This candidate must be validated using the provided PoV. + Second(Hash, CandidateReceipt, PoV), /// Note a peer validator's statement about a particular candidate. Disagreements about validity must be escalated /// to a broader check by Misbehavior Arbitration. Agreements are simply tallied until a quorum is reached. Statement(Statement), @@ -168,17 +168,21 @@ enum MisbehaviorReport { If this subsystem chooses to second a parachain block, it dispatches a `CandidateBackingSubsystemMessage`. -## PoV Distribution - -Messages +## PoV Distribution Message ```rust enum PoVDistributionMessage { + /// Note a statement by a validator on a relay-parent. `Seconded` statements must always + /// have been passed in before `Valid` or `Invalid` statements. + ValidatorStatement(Hash, SignedFullStatement), /// Fetch a PoV from the network. /// (relay_parent, PoV-hash, Response channel). FetchPoV(Hash, CandidateDescriptor, ResponseChannel), - /// - DistributePoV(Hash, PoV), + /// Distribute a PoV for the given relay-parent and CandidateDescriptor. + /// The PoV should correctly hash to the PoV hash mentioned in the CandidateDescriptor + DistributePoV(Hash, CandidateDescriptor, PoV), + /// An update from the network bridge. + NetworkBridgeUpdate(NetworkBridgeEvent), } ``` From dd512a23930e8bbc9379a47ea8dc52a4e8e93e7a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Sat, 20 Jun 2020 15:07:57 -0400 Subject: [PATCH 11/16] replace some straggling html links --- .../node/availability/availability-distribution.md | 2 +- .../src/node/backing/pov-distribution.md | 14 +++++++------- .../implementors-guide/src/runtime/inclusion.md | 4 ++-- .../implementors-guide/src/types/availability.md | 2 +- roadmap/implementors-guide/src/types/backing.md | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/roadmap/implementors-guide/src/node/availability/availability-distribution.md b/roadmap/implementors-guide/src/node/availability/availability-distribution.md index 1640eb08e039..008f3e91fbe7 100644 --- a/roadmap/implementors-guide/src/node/availability/availability-distribution.md +++ b/roadmap/implementors-guide/src/node/availability/availability-distribution.md @@ -26,7 +26,7 @@ Register on startup an event producer with `NetworkBridge::RegisterEventProduce For each relay-parent in our local view update, look at all backed candidates pending availability. Distribute via gossip all erasure chunks for all candidates that we have to peers. -We define an operation `live_candidates(relay_heads) -> Set` which returns a set of [`CommittedCandidateReceipt`s](../../types/candidate.html#committed-candidate-receipt) a given set of relay chain heads that implies a set of candidates whose availability chunks should be currently gossiped. This is defined as all candidates pending availability in any of those relay-chain heads or any of their last `K` ancestors. We assume that state is not pruned within `K` blocks of the chain-head. +We define an operation `live_candidates(relay_heads) -> Set` which returns a set of [`CommittedCandidateReceipt`s](../../types/candidate.md#committed-candidate-receipt) a given set of relay chain heads that implies a set of candidates whose availability chunks should be currently gossiped. This is defined as all candidates pending availability in any of those relay-chain heads or any of their last `K` ancestors. We assume that state is not pruned within `K` blocks of the chain-head. We will send any erasure-chunks that correspond to candidates in `live_candidates(peer_most_recent_view_update)`. Likewise, we only accept and forward messages pertaining to a candidate in `live_candidates(current_heads)`. Each erasure chunk should be accompanied by a merkle proof that it is committed to by the erasure trie root in the candidate receipt, and this gossip system is responsible for checking such proof. diff --git a/roadmap/implementors-guide/src/node/backing/pov-distribution.md b/roadmap/implementors-guide/src/node/backing/pov-distribution.md index 1e3f97010d8e..bf687bd64f9b 100644 --- a/roadmap/implementors-guide/src/node/backing/pov-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/pov-distribution.md @@ -6,7 +6,7 @@ This subsystem is responsible for distributing PoV blocks. For now, unified with `ProtocolId`: `b"povd"` -Input: [`PoVDistributionMessage`](../../types/overseer-protocol.html#pov-distribution-message) +Input: [`PoVDistributionMessage`](../../types/overseer-protocol.md#pov-distribution-message) Output: @@ -18,7 +18,7 @@ Output: ## Functionality -This network protocol is responsible for distributing [`PoV`s](../../types/availability.html#proof-of-validity) by gossip. Since PoVs are heavy in practice, gossip is far from the most efficient way to distribute them. In the future, this should be replaced by a better network protocol that finds validators who have validated the block and connects to them directly. +This network protocol is responsible for distributing [`PoV`s](../../types/availability.md#proof-of-validity) by gossip. Since PoVs are heavy in practice, gossip is far from the most efficient way to distribute them. In the future, this should be replaced by a better network protocol that finds validators who have validated the block and connects to them directly. This protocol is described in terms of "us" and our peers, with the understanding that this is the procedure that any honest node will run. It has the following goals: - We never have to buffer an unbounded amount of data @@ -26,9 +26,9 @@ This protocol is described in terms of "us" and our peers, with the understandin As we are gossiping, we need to track which PoVs our peers are waiting for to avoid sending them data that they are not expecting. It is not reasonable to expect our peers to buffer unexpected PoVs, just as we will not buffer unexpected PoVs. So notification to our peers about what is being awaited is key. However it is important that the notifications system is also bounded. -For this, in order to avoid reaching into the internals of the [Statement Distribution](statement-distribution.html) Subsystem, we can rely on an expected propery of candidate backing: that each validator can only second one candidate at each chain head. So we can set a cap on the number of PoVs each peer is allowed to notify us that they are waiting for at a given relay-parent. This cap will be the number of validators at that relay-parent. And the view update mechanism of the [Network Bridge](../utility/network-bridge.html) ensures that peers are only allowed to consider a certain set of relay-parents as live. So this bounding mechanism caps the amount of data we need to store per peer at any time at `sum({ n_validators_at_head(head) | head in view_heads })`. Additionally, peers should only be allowed to notify us of PoV hashes they are waiting for in the context of relay-parents in our own local view, which means that `n_validators_at_head` is implied to be `0` for relay-parents not in our own local view. +For this, in order to avoid reaching into the internals of the [Statement Distribution](statement-distribution.md) Subsystem, we can rely on an expected propery of candidate backing: that each validator can only second one candidate at each chain head. So we can set a cap on the number of PoVs each peer is allowed to notify us that they are waiting for at a given relay-parent. This cap will be the number of validators at that relay-parent. And the view update mechanism of the [Network Bridge](../utility/network-bridge.md) ensures that peers are only allowed to consider a certain set of relay-parents as live. So this bounding mechanism caps the amount of data we need to store per peer at any time at `sum({ n_validators_at_head(head) | head in view_heads })`. Additionally, peers should only be allowed to notify us of PoV hashes they are waiting for in the context of relay-parents in our own local view, which means that `n_validators_at_head` is implied to be `0` for relay-parents not in our own local view. -View updates from peers and our own view updates are received from the network bridge. These will lag somewhat behind the `StartWork` and `StopWork` messages received from the overseer, which will influence the actual data we store. The `OurViewUpdate`s from the [`NetworkBridgeEvent`](../../types/overseer-protocol.html#network-bridge-update) must be considered canonical in terms of our peers' perception of us. +View updates from peers and our own view updates are received from the network bridge. These will lag somewhat behind the `StartWork` and `StopWork` messages received from the overseer, which will influence the actual data we store. The `OurViewUpdate`s from the [`NetworkBridgeEvent`](../../types/overseer-protocol.md#network-bridge-update) must be considered canonical in terms of our peers' perception of us. Lastly, the system needs to be bootstrapped with our own perception of which PoVs we are cognizant of but awaiting data for. This is done by receipt of the [`PoVDistributionMessage`](../../types/overseer-protocolhtml#pov-distribution-message)::ValidatorStatement variant. We can ignore anything except for `Seconded` statements. @@ -55,7 +55,7 @@ struct PeerState { } ``` -We also assume the following network messages, which are sent and received by the [Network Bridge](../utility/network-bridge.html) +We also assume the following network messages, which are sent and received by the [Network Bridge](../utility/network-bridge.md) ```rust enum NetworkMessage { @@ -72,7 +72,7 @@ Here is the logic of the state machine: *Overseer Signals* - On `StartWork(relay_parent)`: - - Get the number of validators at that relay parent by querying the [Runtime API](../utility/runtime-api.html) for the validators and then counting them. + - Get the number of validators at that relay parent by querying the [Runtime API](../utility/runtime-api.md) for the validators and then counting them. - Create a blank entry in `relay_parent_state` under `relay_parent` with correct `n_validators` set. - On `StopWork(relay_parent)`: - Remove the entry for `relay_parent` from `relay_parent_state`. @@ -81,7 +81,7 @@ Here is the logic of the state machine: *PoV Distribution Messages* - On `ValidatorStatement(relay_parent, statement)` - If this is not `Statement::Seconded`, ignore. - - If there is an entry under `relay_parent` in `relay_parent_state`, add the `pov_hash` of the seconded Candidate's [`CandidateDescriptor`](../../types/candidate.html#candidate-descriptor) to the `awaited` set of the entry. + - If there is an entry under `relay_parent` in `relay_parent_state`, add the `pov_hash` of the seconded Candidate's [`CandidateDescriptor`](../../types/candidate.md#candidate-descriptor) to the `awaited` set of the entry. - If the `pov_hash` was not previously awaited and there are `n_validators` or fewer entries in the `awaited` set, send `NetworkMessage::Awaiting(relay_parent, vec![pov_hash])` to all peers. - On `FetchPoV(relay_parent, descriptor, response_channel)` - If there is no entry in `relay_parent_state` under `relay_parent`, ignore. diff --git a/roadmap/implementors-guide/src/runtime/inclusion.md b/roadmap/implementors-guide/src/runtime/inclusion.md index 29a1aa4e888a..2ab98be9d043 100644 --- a/roadmap/implementors-guide/src/runtime/inclusion.md +++ b/roadmap/implementors-guide/src/runtime/inclusion.md @@ -65,7 +65,7 @@ All failed checks should lead to an unrecoverable error making the block invalid 1. If the core assignment includes a specific collator, ensure the backed candidate is issued by that collator. 1. Ensure that any code upgrade scheduled by the candidate does not happen within `config.validation_upgrade_frequency` of `Paras::last_code_upgrade(para_id, true)`, if any, comparing against the value of `Paras::FutureCodeUpgrades` for the given para ID. 1. Check the collator's signature on the candidate data. - 1. Transform each [`CommittedCandidateReceipt`](../../types/candidate.html#committed-candidate-receipt) into the corresponding [`CandidateReceipt`](../../types/candidate.html#candidate-receipt), setting the commitments aside. + 1. Transform each [`CommittedCandidateReceipt`](../../types/candidate.md#committed-candidate-receipt) into the corresponding [`CandidateReceipt`](../../types/candidate.md#candidate-receipt), setting the commitments aside. 1. check the backing of the candidate using the signatures and the bitfields, comparing against the validators assigned to the groups, fetched with the `group_validators` lookup. 1. check that the upward messages, when combined with the existing queue size, are not exceeding `config.max_upward_queue_count` and `config.watermark_upward_queue_size` parameters. 1. create an entry in the `PendingAvailability` map for each backed candidate with a blank `availability_votes` bitfield. @@ -74,7 +74,7 @@ All failed checks should lead to an unrecoverable error making the block invalid * `enact_candidate(relay_parent_number: BlockNumber, CommittedCandidateReceipt)`: 1. If the receipt contains a code upgrade, Call `Paras::schedule_code_upgrade(para_id, code, relay_parent_number + config.validationl_upgrade_delay)`. > TODO: Note that this is safe as long as we never enact candidates where the relay parent is across a session boundary. In that case, which we should be careful to avoid with contextual execution, the configuration might have changed and the para may de-sync from the host's understanding of it. - 1. call `Router::queue_upward_messages` for each backed candidate, using the [`UpwardMessage`s](../../types/messages.html#upward-message) from the [`CandidateCommitments`](../../types/candidate.html#candidate-commitments). + 1. call `Router::queue_upward_messages` for each backed candidate, using the [`UpwardMessage`s](../../types/messages.md#upward-message) from the [`CandidateCommitments`](../../types/candidate.md#candidate-commitments). 1. Call `Paras::note_new_head` using the `HeadData` from the receipt and `relay_parent_number`. * `collect_pending`: diff --git a/roadmap/implementors-guide/src/types/availability.md b/roadmap/implementors-guide/src/types/availability.md index 4fd3128513cf..3362908d6b63 100644 --- a/roadmap/implementors-guide/src/types/availability.md +++ b/roadmap/implementors-guide/src/types/availability.md @@ -5,7 +5,7 @@ candidates for the duration of a challenge period. This is done via an erasure-c ## Signed Availability Bitfield -A bitfield [signed](backing.html#signed-wrapper) by a particular validator about the availability of pending candidates. +A bitfield [signed](backing.md#signed-wrapper) by a particular validator about the availability of pending candidates. ```rust diff --git a/roadmap/implementors-guide/src/types/backing.md b/roadmap/implementors-guide/src/types/backing.md index d1da2e4662ef..22799f25733a 100644 --- a/roadmap/implementors-guide/src/types/backing.md +++ b/roadmap/implementors-guide/src/types/backing.md @@ -44,7 +44,7 @@ impl, RealPayload: Encode> Signed`. Therefore, for the generic case where `RealPayload = Payload`, it changes nothing. However, we `impl EncodeAs for Statement`, which helps efficiency. From cf9ee532ba6718d48092a7481d091db27b17e6ff Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 22 Jun 2020 11:43:05 +0200 Subject: [PATCH 12/16] claim this PR From 98a4ba12a0abab2e6d3bff8ba9c8fed129641e0b Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 22 Jun 2020 13:46:56 +0200 Subject: [PATCH 13/16] sync types with guide --- node/messages/src/lib.rs | 29 ++++++++++++++++------------- node/primitives/src/lib.rs | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/node/messages/src/lib.rs b/node/messages/src/lib.rs index 3a413f2c67bb..191d05178dd0 100644 --- a/node/messages/src/lib.rs +++ b/node/messages/src/lib.rs @@ -97,32 +97,35 @@ pub struct View(pub Vec); /// Events from network. pub enum NetworkBridgeEvent { - /// A peer has connected. + /// A peer with given ID is now connected. PeerConnected(PeerId, ObservedRole), - /// A peer has disconnected. + /// A peer with given ID is now disconnected. PeerDisconnected(PeerId), - /// Peer has sent a message. - PeerMessage(PeerId, Vec), + /// We received a message from the given peer. Protocol ID should be apparent from context. + PeerMessage(PeerId, Bytes), - /// Peer's `View` has changed. + /// The given peer has updated its description of its view. + /// + /// This is guaranteed to come after the `PeerConnected` event for a given peer. PeerViewChange(PeerId, View), - /// Our `View` has changed. + /// We have posted the given view update to all connected peers. OurViewChange(View), } /// Messages received by the network bridge subsystem. -pub enum NetworkBridgeSubsystemMessage { - /// Register an event producer on startup. - RegisterEventProducer(ProtocolId, fn(NetworkBridgeEvent) -> AllMessages), +pub enum NetworkBridgeMessage { + /// Register an event producer with the network bridge. This should be done early and cannot + /// be de-registered. + RegisterEventProducer(ProtocolId, Box AllMessages>), - /// Report a peer for their actions. - ReportPeer(PeerId, ReputationChange), + /// Report a cost or benefit of a peer. Negative values are costs, positive are benefits. + ReportPeer(PeerId, i32), - /// Send a message to multiple peers. - SendMessage(Vec, ProtocolId, Vec), + /// Send a message to one or more peers on the given protocol ID. + SendMessage([PeerId], ProtocolId, Bytes), } /// Availability Distribution Message. diff --git a/node/primitives/src/lib.rs b/node/primitives/src/lib.rs index bd43748ab24a..feb89e663266 100644 --- a/node/primitives/src/lib.rs +++ b/node/primitives/src/lib.rs @@ -63,7 +63,7 @@ impl EncodeAs for Statement { /// Only the compact `SignedStatement` is suitable for submission to the chain. pub type SignedFullStatement = Signed; -/// A misbehaviour report. +/// A misbehavior report. pub enum MisbehaviorReport { /// These validator nodes disagree on this candidate's validity, please figure it out /// From 1700dd5ec1edaeaf0bd6787b663bbb658e598e14 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 22 Jun 2020 13:57:40 +0200 Subject: [PATCH 14/16] update guide: link to proper struct definitions --- .../src/node/backing/statement-distribution.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/roadmap/implementors-guide/src/node/backing/statement-distribution.md b/roadmap/implementors-guide/src/node/backing/statement-distribution.md index b77ee7464f8b..4f47967ed689 100644 --- a/roadmap/implementors-guide/src/node/backing/statement-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/statement-distribution.md @@ -8,13 +8,11 @@ The Statement Distribution Subsystem is responsible for distributing statements Input: -- NetworkBridgeUpdate(update) +- [`NetworkBridgeUpdate`](../../types/overseer-protocol.md#network-bridge-update) Output: -- NetworkBridge::RegisterEventProducer(`ProtocolId`) -- NetworkBridge::SendMessage(`[PeerId]`, `ProtocolId`, `Bytes`) -- NetworkBridge::ReportPeer(PeerId, cost_or_benefit) +- [`NetworkBridgeMessage`](../../types/overseer-protocol.md#network-bridge-message) ## Functionality From 593417cbb1db792bf356475ba8acf9d2b4e025e2 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Mon, 22 Jun 2020 14:21:32 +0200 Subject: [PATCH 15/16] stub out statement gossip subsystem crate and struct --- Cargo.lock | 10 ++++ Cargo.toml | 1 + .../network/statement-distribution/Cargo.toml | 11 ++++ .../network/statement-distribution/src/lib.rs | 50 +++++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 node/network/statement-distribution/Cargo.toml create mode 100644 node/network/statement-distribution/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 55b417c691fb..e1aa3e6ddb9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7383,6 +7383,16 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" +[[package]] +name = "statement-distribution" +version = "0.1.0" +dependencies = [ + "futures 0.3.5", + "sc-network", + "sc-network-gossip", + "sp-runtime", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 705262f5a23b..4cda67cf141a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ members = [ "validation", "node/messages", + "node/network/statement-distribution", "node/overseer", "node/primitives", "node/service", diff --git a/node/network/statement-distribution/Cargo.toml b/node/network/statement-distribution/Cargo.toml new file mode 100644 index 000000000000..331cdce3ee09 --- /dev/null +++ b/node/network/statement-distribution/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "statement-distribution" +version = "0.1.0" +authors = ["Parity Technologies "] +edition = "2018" + +[dependencies] +futures = "0.3.4" +sc-network = { git = "https://github.com/paritytech/substrate", branch = "master" } +sc-network-gossip = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/statement-distribution/src/lib.rs b/node/network/statement-distribution/src/lib.rs new file mode 100644 index 000000000000..0d760a2da876 --- /dev/null +++ b/node/network/statement-distribution/src/lib.rs @@ -0,0 +1,50 @@ +use futures::prelude::*; +use sc_network::{Event, PeerId, ReputationChange}; +use sc_network_gossip::Network; +use sp_runtime::{traits::Block as BlockT, ConsensusEngineId}; +use std::{borrow::Cow, pin::Pin, sync::Arc}; + +pub struct StatementGossipSubsystem { + block: std::marker::PhantomData, +} + +impl Network for StatementGossipSubsystem { + /// Returns a stream of events representing what happens on the network. + fn event_stream(&self) -> Pin + Send>> { + unimplemented!() + } + + /// Adjust the reputation of a node. + fn report_peer(&self, peer_id: PeerId, reputation: ReputationChange) { + unimplemented!() + } + + /// Force-disconnect a peer. + fn disconnect_peer(&self, who: PeerId) { + unimplemented!() + } + + /// Send a notification to a peer. + fn write_notification(&self, who: PeerId, engine_id: ConsensusEngineId, message: Vec) { + unimplemented!() + } + + /// Registers a notifications protocol. + /// + /// See the documentation of [`NetworkService:register_notifications_protocol`] for more information. + fn register_notifications_protocol( + &self, + engine_id: ConsensusEngineId, + protocol_name: Cow<'static, [u8]>, + ) { + unimplemented!() + } + + /// Notify everyone we're connected to that we have the given block. + /// + /// Note: this method isn't strictly related to gossiping and should eventually be moved + /// somewhere else. + fn announce(&self, block: B::Hash, associated_data: Vec) { + unimplemented!() + } +} From 469712caa3bf3c71dc7dbd4c0a5affc3328ad9e1 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Tue, 23 Jun 2020 13:00:04 +0200 Subject: [PATCH 16/16] fix broken links --- .../implementors-guide/src/node/backing/pov-distribution.md | 3 +-- roadmap/implementors-guide/src/runtime/inclusion.md | 4 ++-- roadmap/implementors-guide/src/types/candidate.md | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/roadmap/implementors-guide/src/node/backing/pov-distribution.md b/roadmap/implementors-guide/src/node/backing/pov-distribution.md index bf687bd64f9b..3a93c0c7e18f 100644 --- a/roadmap/implementors-guide/src/node/backing/pov-distribution.md +++ b/roadmap/implementors-guide/src/node/backing/pov-distribution.md @@ -30,7 +30,7 @@ For this, in order to avoid reaching into the internals of the [Statement Distri View updates from peers and our own view updates are received from the network bridge. These will lag somewhat behind the `StartWork` and `StopWork` messages received from the overseer, which will influence the actual data we store. The `OurViewUpdate`s from the [`NetworkBridgeEvent`](../../types/overseer-protocol.md#network-bridge-update) must be considered canonical in terms of our peers' perception of us. -Lastly, the system needs to be bootstrapped with our own perception of which PoVs we are cognizant of but awaiting data for. This is done by receipt of the [`PoVDistributionMessage`](../../types/overseer-protocolhtml#pov-distribution-message)::ValidatorStatement variant. We can ignore anything except for `Seconded` statements. +Lastly, the system needs to be bootstrapped with our own perception of which PoVs we are cognizant of but awaiting data for. This is done by receipt of the [`PoVDistributionMessage`](../../types/overseer-protocol.md#pov-distribution-message)::ValidatorStatement variant. We can ignore anything except for `Seconded` statements. ## Formal Description @@ -118,4 +118,3 @@ Here is the logic of the state machine: - Remove all entries under `peer.awaited` that are not within `view`. - On `OurViewChange(view)` - Update `our_view` to `view` - diff --git a/roadmap/implementors-guide/src/runtime/inclusion.md b/roadmap/implementors-guide/src/runtime/inclusion.md index 2ab98be9d043..d201e3ed51b2 100644 --- a/roadmap/implementors-guide/src/runtime/inclusion.md +++ b/roadmap/implementors-guide/src/runtime/inclusion.md @@ -65,7 +65,7 @@ All failed checks should lead to an unrecoverable error making the block invalid 1. If the core assignment includes a specific collator, ensure the backed candidate is issued by that collator. 1. Ensure that any code upgrade scheduled by the candidate does not happen within `config.validation_upgrade_frequency` of `Paras::last_code_upgrade(para_id, true)`, if any, comparing against the value of `Paras::FutureCodeUpgrades` for the given para ID. 1. Check the collator's signature on the candidate data. - 1. Transform each [`CommittedCandidateReceipt`](../../types/candidate.md#committed-candidate-receipt) into the corresponding [`CandidateReceipt`](../../types/candidate.md#candidate-receipt), setting the commitments aside. + 1. Transform each [`CommittedCandidateReceipt`](../types/candidate.md#committed-candidate-receipt) into the corresponding [`CandidateReceipt`](../types/candidate.md#candidate-receipt), setting the commitments aside. 1. check the backing of the candidate using the signatures and the bitfields, comparing against the validators assigned to the groups, fetched with the `group_validators` lookup. 1. check that the upward messages, when combined with the existing queue size, are not exceeding `config.max_upward_queue_count` and `config.watermark_upward_queue_size` parameters. 1. create an entry in the `PendingAvailability` map for each backed candidate with a blank `availability_votes` bitfield. @@ -74,7 +74,7 @@ All failed checks should lead to an unrecoverable error making the block invalid * `enact_candidate(relay_parent_number: BlockNumber, CommittedCandidateReceipt)`: 1. If the receipt contains a code upgrade, Call `Paras::schedule_code_upgrade(para_id, code, relay_parent_number + config.validationl_upgrade_delay)`. > TODO: Note that this is safe as long as we never enact candidates where the relay parent is across a session boundary. In that case, which we should be careful to avoid with contextual execution, the configuration might have changed and the para may de-sync from the host's understanding of it. - 1. call `Router::queue_upward_messages` for each backed candidate, using the [`UpwardMessage`s](../../types/messages.md#upward-message) from the [`CandidateCommitments`](../../types/candidate.md#candidate-commitments). + 1. call `Router::queue_upward_messages` for each backed candidate, using the [`UpwardMessage`s](../types/messages.md#upward-message) from the [`CandidateCommitments`](../types/candidate.md#candidate-commitments). 1. Call `Paras::note_new_head` using the `HeadData` from the receipt and `relay_parent_number`. * `collect_pending`: diff --git a/roadmap/implementors-guide/src/types/candidate.md b/roadmap/implementors-guide/src/types/candidate.md index 065e3075f918..fdba6919e57c 100644 --- a/roadmap/implementors-guide/src/types/candidate.md +++ b/roadmap/implementors-guide/src/types/candidate.md @@ -145,7 +145,7 @@ Head data is a type-safe abstraction around bytes (`Vec`) for the purposes o struct HeadData(Vec); ``` -## CandidateCommitments +## Candidate Commitments The execution and validation of parachain or parathread candidates produces a number of values which either must be committed to on the relay chain or committed to the state of the relay chain.