Skip to content

Commit

Permalink
Allow handshake to complete quicker add internal NATS connection stat…
Browse files Browse the repository at this point in the history
…us log
  • Loading branch information
kthomas committed Jan 9, 2024
1 parent 3d13e78 commit 3e2424e
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions nex-node/machine_mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ func (m *MachineManager) DispatchWork(vm *runningFirecracker, workloadName, name
}
bytes, _ := json.Marshal(req)

status := m.ncInternal.Status()
m.log.WithField("vmid", vm.vmmID).
WithField("status", status.String()).
Debug("NATS internal connection status")

subject := fmt.Sprintf("agentint.%s.workdispatch", vm.vmmID)
resp, err := m.ncInternal.Request(subject, bytes, 1*time.Second)
if err != nil {
Expand Down Expand Up @@ -283,15 +288,19 @@ func (m *MachineManager) fillPool() {
}

func (m *MachineManager) awaitHandshake(vmid string) {
time.Sleep(m.handshakeTimeout)
timeoutAt := time.Now().UTC().Add(m.handshakeTimeout)

handshakeOk := false
for !handshakeOk {
if time.Now().UTC().After(timeoutAt) {
m.log.WithField("vmid", vmid).Error("Did not receive NATS handshake from agent within timeout. Exiting unstable node")
_ = m.Stop()
os.Exit(1) // FIXME
}

_, ok := m.handshakes[vmid]
if !ok {
m.log.WithField("vmid", vmid).Error("Did not receive NATS handshake from agent within timeout. Exiting unstable node")
_ = m.Stop()
os.Exit(1)
_, handshakeOk = m.handshakes[vmid]
time.Sleep(time.Millisecond * agentapi.DefaultRunloopSleepTimeoutMillis)
}

}

func (m *MachineManager) startInternalNats() (*server.Server, *nats.Conn, error) {
Expand Down

0 comments on commit 3e2424e

Please sign in to comment.