Skip to content

Commit de7a49c

Browse files
committed
Integrate end to end hashed chain exchange with F3
Integrate the chain exchange mechanism with F3 host and runner. But without touching the core GPBFT. The implementation here leaves two major TODOs: 1) chain broadcasting mechanism (currently coupled to GPBFT message broadcast), and 2) partial message validation prior to buffering (currently skipped entirely but with capped buffer sizes and re-validation by core GPBFT once the messages are complete). The integration introduces the concept of Partial GMessage: a GMessage with chains replaced with the key to the chain. The work introduces a buffer and refill mechanism that listens to the chains discovered, un-buffers the messages having re-constructed their original GMessage and feeds them to the participation using the existing event loop. Part of #792
1 parent 3507ce2 commit de7a49c

File tree

7 files changed

+551
-29
lines changed

7 files changed

+551
-29
lines changed

cbor_gen.go

+122
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

chainexchange/pubsub.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ func (p *PubSubChainExchange) Start(ctx context.Context) error {
6464
}
6565
if p.topicScoreParams != nil {
6666
if err := p.topic.SetScoreParams(p.topicScoreParams); err != nil {
67-
return fmt.Errorf("failed to set score params: %w", err)
67+
// This can happen most likely due to router not supporting peer scoring. It's
68+
// non-critical. Hence, the warning log.
69+
log.Warnw("failed to set topic score params", "err", err)
6870
}
6971
}
7072
subscription, err := p.topic.Subscribe(pubsub.WithBufferSize(p.subscriptionBufferSize))
@@ -79,7 +81,7 @@ func (p *PubSubChainExchange) Start(ctx context.Context) error {
7981
for ctx.Err() == nil {
8082
msg, err := subscription.Next(ctx)
8183
if err != nil {
82-
log.Debugw("failed to read nex message from subscription", "err", err)
84+
log.Debugw("failed to read next message from subscription", "err", err)
8385
continue
8486
}
8587
cmsg := msg.ValidatorData.(Message)
@@ -89,7 +91,9 @@ func (p *PubSubChainExchange) Start(ctx context.Context) error {
8991
p.stop = func() error {
9092
cancel()
9193
subscription.Cancel()
92-
return p.topic.Close()
94+
_ = p.pubsub.UnregisterTopicValidator(p.topicName)
95+
_ = p.topic.Close()
96+
return nil
9397
}
9498
return nil
9599
}

gen/main.go

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66

7+
"github.com/filecoin-project/go-f3"
78
"github.com/filecoin-project/go-f3/certexchange"
89
"github.com/filecoin-project/go-f3/certs"
910
"github.com/filecoin-project/go-f3/chainexchange"
@@ -45,6 +46,11 @@ func main() {
4546
chainexchange.Message{},
4647
)
4748
})
49+
eg.Go(func() error {
50+
return gen.WriteTupleEncodersToFile("../cbor_gen.go", "f3",
51+
f3.PartialGMessage{},
52+
)
53+
})
4854
if err := eg.Wait(); err != nil {
4955
fmt.Printf("Failed to complete cborg_gen: %v\n", err)
5056
os.Exit(1)

host.go

+95-14
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ type gpbftRunner struct {
5252

5353
inputs gpbftInputs
5454
msgEncoding gMessageEncoding
55+
pmm *partialMessageManager
5556
}
5657

5758
type roundPhase struct {
@@ -141,6 +142,12 @@ func newRunner(
141142
} else {
142143
runner.msgEncoding = &cborGMessageEncoding{}
143144
}
145+
146+
runner.pmm, err = newPartialMessageManager(runner.Progress, ps, m)
147+
if err != nil {
148+
return nil, fmt.Errorf("creating partial message manager: %w", err)
149+
}
150+
144151
return runner, nil
145152
}
146153

@@ -156,6 +163,11 @@ func (h *gpbftRunner) Start(ctx context.Context) (_err error) {
156163
return err
157164
}
158165

166+
completedMessageQueue, err := h.pmm.Start(ctx)
167+
if err != nil {
168+
return err
169+
}
170+
159171
finalityCertificates, unsubCerts := h.certStore.Subscribe()
160172
select {
161173
case c := <-finalityCertificates:
@@ -193,7 +205,7 @@ func (h *gpbftRunner) Start(ctx context.Context) (_err error) {
193205
default:
194206
}
195207

196-
// Handle messages, finality certificates, and alarms
208+
// Handle messages, completed messages, finality certificates, and alarms
197209
select {
198210
case c := <-finalityCertificates:
199211
if err := h.receiveCertificate(c); err != nil {
@@ -219,6 +231,29 @@ func (h *gpbftRunner) Start(ctx context.Context) (_err error) {
219231
// errors.
220232
log.Errorf("error when processing message: %+v", err)
221233
}
234+
case gmsg, ok := <-completedMessageQueue:
235+
if !ok {
236+
return fmt.Errorf("incoming completed message queue closed")
237+
}
238+
switch validatedMessage, err := h.participant.ValidateMessage(gmsg); {
239+
case errors.Is(err, gpbft.ErrValidationInvalid):
240+
log.Debugw("validation error while validating completed message", "err", err)
241+
// TODO: Signal partial message manager to penalise sender,
242+
// e.g. reduce the total number of messages stroed from sender?
243+
case errors.Is(err, gpbft.ErrValidationTooOld):
244+
// TODO: Signal partial message manager to drop the instance?
245+
case errors.Is(err, gpbft.ErrValidationNotRelevant):
246+
// TODO: Signal partial message manager to drop irrelevant messages?
247+
case errors.Is(err, gpbft.ErrValidationNoCommittee):
248+
log.Debugw("committee error while validating completed message", "err", err)
249+
case err != nil:
250+
log.Errorw("unknown error while validating completed message", "err", err)
251+
default:
252+
recordValidatedMessage(ctx, validatedMessage)
253+
if err := h.participant.ReceiveMessage(validatedMessage); err != nil {
254+
log.Errorw("error while processing completed message", "err", err)
255+
}
256+
}
222257
case <-h.runningCtx.Done():
223258
return nil
224259
}
@@ -452,7 +487,17 @@ func (h *gpbftRunner) BroadcastMessage(ctx context.Context, msg *gpbft.GMessage)
452487
if h.topic == nil {
453488
return pubsub.ErrTopicClosed
454489
}
455-
encoded, err := h.msgEncoding.Encode(msg)
490+
491+
if err := h.pmm.BroadcastChain(ctx, msg.Vote.Instance, msg.Vote.Value); err != nil {
492+
// Silently log the error and continue. Partial message manager should take care of re-broadcast.
493+
log.Warnw("failed to broadcast chain", "instance", msg.Vote.Instance, "error", err)
494+
}
495+
496+
pmsg, err := h.pmm.toPartialGMessage(msg)
497+
if err != nil {
498+
return err
499+
}
500+
encoded, err := h.msgEncoding.Encode(pmsg)
456501
if err != nil {
457502
return fmt.Errorf("encoding GMessage for broadcast: %w", err)
458503
}
@@ -472,7 +517,17 @@ func (h *gpbftRunner) rebroadcastMessage(msg *gpbft.GMessage) error {
472517
if h.topic == nil {
473518
return pubsub.ErrTopicClosed
474519
}
475-
encoded, err := h.msgEncoding.Encode(msg)
520+
521+
if err := h.pmm.BroadcastChain(h.runningCtx, msg.Vote.Instance, msg.Vote.Value); err != nil {
522+
// Silently log the error and continue. Partial message manager should take care of re-broadcast.
523+
log.Warnw("failed to rebroadcast chain", "instance", msg.Vote.Instance, "error", err)
524+
}
525+
526+
pmsg, err := h.pmm.toPartialGMessage(msg)
527+
if err != nil {
528+
return err
529+
}
530+
encoded, err := h.msgEncoding.Encode(pmsg)
476531
if err != nil {
477532
return fmt.Errorf("encoding GMessage for broadcast: %w", err)
478533
}
@@ -489,12 +544,28 @@ func (h *gpbftRunner) validatePubsubMessage(ctx context.Context, _ peer.ID, msg
489544
recordValidationTime(ctx, start, _result)
490545
}(time.Now())
491546

492-
gmsg, err := h.msgEncoding.Decode(msg.Data)
547+
pgmsg, err := h.msgEncoding.Decode(msg.Data)
493548
if err != nil {
494549
log.Debugw("failed to decode message", "from", msg.GetFrom(), "err", err)
495550
return pubsub.ValidationReject
496551
}
497552

553+
gmsg, completed := h.pmm.CompleteMessage(ctx, pgmsg)
554+
if !completed {
555+
// TODO: Partially validate the message because we can. To do this, however,
556+
// message validator needs to be refactored to tolerate partial data.
557+
// Hence, for now validation is postponed entirely until that refactor
558+
// is done to accommodate partial messages.
559+
// See: https://github.com/filecoin-project/go-f3/issues/813
560+
561+
// FIXME: must verify signature before buffering otherwise nodes can spoof the
562+
// buffer with invalid messages on behalf of other peers as censorship
563+
// attack.
564+
565+
msg.ValidatorData = pgmsg
566+
return pubsub.ValidationAccept
567+
}
568+
498569
switch validatedMessage, err := h.participant.ValidateMessage(gmsg); {
499570
case errors.Is(err, gpbft.ErrValidationInvalid):
500571
log.Debugf("validation error during validation: %+v", err)
@@ -588,15 +659,18 @@ func (h *gpbftRunner) startPubsub() (<-chan gpbft.ValidatedMessage, error) {
588659
}
589660
return fmt.Errorf("pubsub message subscription returned an error: %w", err)
590661
}
591-
gmsg, ok := msg.ValidatorData.(gpbft.ValidatedMessage)
592-
if !ok {
662+
663+
switch gmsg := msg.ValidatorData.(type) {
664+
case gpbft.ValidatedMessage:
665+
select {
666+
case messageQueue <- gmsg:
667+
case <-h.runningCtx.Done():
668+
return nil
669+
}
670+
case *PartialGMessage:
671+
h.pmm.bufferPartialMessage(h.runningCtx, gmsg)
672+
default:
593673
log.Errorf("invalid msgValidatorData: %+v", msg.ValidatorData)
594-
continue
595-
}
596-
select {
597-
case messageQueue <- gmsg:
598-
case <-h.runningCtx.Done():
599-
return nil
600674
}
601675
}
602676
return nil
@@ -632,18 +706,25 @@ func (h *gpbftHost) RequestRebroadcast(instant gpbft.Instant) error {
632706
}
633707

634708
func (h *gpbftHost) GetProposal(instance uint64) (*gpbft.SupplementalData, gpbft.ECChain, error) {
635-
return h.inputs.GetProposal(h.runningCtx, instance)
709+
proposal, chain, err := h.inputs.GetProposal(h.runningCtx, instance)
710+
if err == nil {
711+
if err := h.pmm.BroadcastChain(h.runningCtx, instance, chain); err != nil {
712+
log.Warnw("failed to broadcast chain", "instance", instance, "error", err)
713+
}
714+
}
715+
return proposal, chain, err
636716
}
637717

638718
func (h *gpbftHost) GetCommittee(instance uint64) (*gpbft.Committee, error) {
639719
return h.inputs.GetCommittee(h.runningCtx, instance)
640720
}
641721

642-
func (h *gpbftRunner) Stop(context.Context) error {
722+
func (h *gpbftRunner) Stop(ctx context.Context) error {
643723
h.ctxCancel()
644724
return multierr.Combine(
645725
h.wal.Close(),
646726
h.errgrp.Wait(),
727+
h.pmm.Shutdown(ctx),
647728
h.teardownPubsub(),
648729
)
649730
}

0 commit comments

Comments
 (0)