From e0dfaa2badd2a1ad1bc5ab26cc22240be040e4df Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 13:26:57 -0500 Subject: [PATCH 01/12] Fail test on panic --- netdeploy/network.go | 66 ++++++++++++++++------- nodecontrol/NodeController.go | 5 ++ nodecontrol/algodControl.go | 18 +++++-- test/framework/fixtures/libgoalFixture.go | 10 +++- 4 files changed, 75 insertions(+), 24 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index 8da4e93330..676166308b 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -48,10 +48,12 @@ type NetworkCfg struct { // Network represents an instance of a deployed network type Network struct { - rootDir string - cfg NetworkCfg - nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) - gen gen.GenesisData + rootDir string + cfg NetworkCfg + nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) + gen gen.GenesisData + stopping chan struct{} + nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc } // Name returns the name of the private network @@ -98,9 +100,10 @@ func (n Network) Genesis() gen.GenesisData { // CreateNetworkFromTemplate uses the specified template to deploy a new private network // under the specified root directory. -func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool) (Network, error) { - n := Network{ - rootDir: rootDir, +func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool) (*Network, error) { + n := &Network{ + rootDir: rootDir, + stopping: make(chan struct{}), } n.cfg.Name = name n.cfg.TemplateFile = templateFile @@ -239,7 +242,7 @@ func (n *Network) scanForNodes() error { } // Start the network, ensuring primary relay starts first -func (n Network) Start(binDir string, redirectOutput bool) error { +func (n *Network) Start(binDir string, redirectOutput bool) error { // Start relays // Determine IP:PORT for said relays // Start remaining nodes, pointing at the relays @@ -252,7 +255,8 @@ func (n Network) Start(binDir string, redirectOutput bool) error { for _, relayDir := range n.cfg.RelayDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) args := nodecontrol.AlgodStartArgs{ - RedirectOutput: redirectOutput, + RedirectOutput: redirectOutput, + RunStateChanged: n.nodeRunStateChanges, } _, err := nc.StartAlgod(args) @@ -276,8 +280,25 @@ func (n Network) Start(binDir string, redirectOutput bool) error { return err } +// NodeRunStateChangesCallback sets the callback for node run state changes. +func (n *Network) NodeRunStateChangesCallback(callback nodecontrol.AlgodRunStateChangedFunc) { + n.nodeRunStateCallback = callback +} + +// nodeRunStateChanges is a callback that notifies the network that a given node run state have changed. +func (n *Network) nodeRunStateChanges(nc *nodecontrol.NodeController, err error) { + select { + case <-n.stopping: + return + default: + } + if n.nodeRunStateCallback != nil { + n.nodeRunStateCallback(nc, err) + } +} + // retry fetching the relay address -func (n Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress string, err error) { +func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress string, err error) { for i := 1; ; i++ { relayAddress, err = nc.GetListeningAddress() if err == nil { @@ -294,7 +315,7 @@ func (n Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress st // GetPeerAddresses returns an array of Relay addresses, if any; to be used to start nodes // outside of the main 'Start' call. -func (n Network) GetPeerAddresses(binDir string) []string { +func (n *Network) GetPeerAddresses(binDir string) []string { var peerAddresses []string for _, relayDir := range n.cfg.RelayDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) @@ -306,10 +327,11 @@ func (n Network) GetPeerAddresses(binDir string) []string { return peerAddresses } -func (n Network) startNodes(binDir, relayAddress string, redirectOutput bool) error { +func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) error { args := nodecontrol.AlgodStartArgs{ - PeerAddress: relayAddress, - RedirectOutput: redirectOutput, + PeerAddress: relayAddress, + RedirectOutput: redirectOutput, + RunStateChanged: n.nodeRunStateChanges, } for _, nodeDir := range n.nodeDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) @@ -323,7 +345,7 @@ func (n Network) startNodes(binDir, relayAddress string, redirectOutput bool) er // StartNode can be called to start a node after the network has been started. // It determines the correct PeerAddresses to use. -func (n Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err error) { +func (n *Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err error) { controller := nodecontrol.MakeNodeController(binDir, nodeDir) peers := n.GetPeerAddresses(binDir) peerAddresses := strings.Join(peers, ";") @@ -337,7 +359,12 @@ func (n Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err err // Stop the network, ensuring primary relay stops first // No return code - we try to kill them if we can (if we read valid PID file) -func (n Network) Stop(binDir string) { +func (n *Network) Stop(binDir string) { + select { + case <-n.stopping: + default: + close(n.stopping) + } c := make(chan struct{}, len(n.cfg.RelayDirs)+len(n.nodeDirs)) stopNodeContoller := func(nc nodecontrol.NodeController) { defer func() { @@ -358,6 +385,7 @@ func (n Network) Stop(binDir string) { <-c } close(c) + n.stopping = make(chan struct{}) } // NetworkNodeStatus represents the result from checking the status of a particular node instance @@ -367,7 +395,7 @@ type NetworkNodeStatus struct { } // GetGoalClient returns the libgoal.Client for the specified node name -func (n Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err error) { +func (n *Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err error) { nodeDir, err := n.GetNodeDir(nodeName) if err != nil { return @@ -386,7 +414,7 @@ func (n Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.Node } // NodesStatus retrieves the status of all nodes in the network and returns the status/error for each -func (n Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { +func (n *Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { statuses := make(map[string]NetworkNodeStatus) for _, relayDir := range n.cfg.RelayDirs { @@ -420,7 +448,7 @@ func (n Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { // Delete the network - try stopping it first if we can. // No return code - we try to kill them if we can (if we read valid PID file) -func (n Network) Delete(binDir string) error { +func (n *Network) Delete(binDir string) error { n.Stop(binDir) return os.RemoveAll(n.rootDir) } diff --git a/nodecontrol/NodeController.go b/nodecontrol/NodeController.go index 0761ee03a1..b182d4df52 100644 --- a/nodecontrol/NodeController.go +++ b/nodecontrol/NodeController.go @@ -50,6 +50,10 @@ func MakeNodeController(binDir, algodDataDir string) NodeController { return nc } +// AlgodRunStateChangedFunc is the callback function from the node controller that reports upstream +// in case there was a change with the algod running state. +type AlgodRunStateChangedFunc func(*NodeController, error) + // AlgodStartArgs are the possible arguments for starting algod type AlgodStartArgs struct { PeerAddress string @@ -57,6 +61,7 @@ type AlgodStartArgs struct { RedirectOutput bool RunUnderHost bool TelemetryOverride string + RunStateChanged AlgodRunStateChangedFunc } // KMDStartArgs are the possible arguments for starting kmd diff --git a/nodecontrol/algodControl.go b/nodecontrol/algodControl.go index bfca00f783..acafbcd73e 100644 --- a/nodecontrol/algodControl.go +++ b/nodecontrol/algodControl.go @@ -184,18 +184,28 @@ func (nc *NodeController) StartAlgod(args AlgodStartArgs) (alreadyRunning bool, } // Wait on the algod process and check if exits - c := make(chan bool) + algodExitChan := make(chan struct{}) + startAlgodCompletedChan := make(chan struct{}) + defer close(startAlgodCompletedChan) go func() { // this Wait call is important even beyond the scope of this function; it allows the system to // move the process from a "zombie" state into "done" state, and is required for the Signal(0) test. - algodCmd.Wait() - c <- true + err := algodCmd.Wait() + select { + case <-startAlgodCompletedChan: + // we've already exited this function, so we want to report to the error to the callback. + if args.RunStateChanged != nil { + args.RunStateChanged(nc, err) + } + default: + } + algodExitChan <- struct{}{} }() success := false for !success { select { - case <-c: + case <-algodExitChan: return false, errAlgodExitedEarly case <-time.After(time.Millisecond * 100): // If we can't talk to the API yet, spin diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index e96b13f34f..d1d6a46574 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -46,7 +46,7 @@ type LibGoalFixture struct { NC nodecontrol.NodeController rootDir string Name string - network netdeploy.Network + network *netdeploy.Network t TestingT clientPartKeys map[string][]account.Participation } @@ -85,6 +85,7 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys) f.failOnError(err, "CreateNetworkFromTemplate failed: %v") + network.NodeRunStateChangesCallback(f.nodeRunStateChanges) f.network = network if startNetwork { @@ -92,6 +93,13 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri } } +func (f *LibGoalFixture) nodeRunStateChanges(nc *nodecontrol.NodeController, err error) { + if f.t == nil || err == nil { + return + } + f.t.Errorf("Node %s has changed it's status to %v", nc.GetDataDir(), err) +} + func (f *LibGoalFixture) importRootKeys(lg *libgoal.Client, dataDir string) { genID, err := lg.GenesisID() if err != nil { From 4f1cca3c76c952c92a6c19efc3f8263bf9492e98 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 14:15:19 -0500 Subject: [PATCH 02/12] few more touchups. --- cmd/goal/network.go | 2 +- netdeploy/network.go | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cmd/goal/network.go b/cmd/goal/network.go index 7123f33288..1fc7ae9e1a 100644 --- a/cmd/goal/network.go +++ b/cmd/goal/network.go @@ -107,7 +107,7 @@ var networkCreateCmd = &cobra.Command{ }, } -func getNetworkAndBinDir() (netdeploy.Network, string) { +func getNetworkAndBinDir() (*netdeploy.Network, string) { networkRootDir, err := filepath.Abs(networkRootDir) if err != nil { panic(err) diff --git a/netdeploy/network.go b/netdeploy/network.go index 676166308b..e07159937c 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -153,9 +153,10 @@ func isValidNetworkDir(rootDir string) bool { // LoadNetwork loads and initializes the Network state representing // an existing deployed network. -func LoadNetwork(rootDir string) (Network, error) { - n := Network{ - rootDir: rootDir, +func LoadNetwork(rootDir string) (*Network, error) { + n := &Network{ + rootDir: rootDir, + stopping: make(chan struct{}), } if !isValidNetworkDir(rootDir) { From f4b12f2040b322ac95b679e652832a017f556003 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 15:18:01 -0500 Subject: [PATCH 03/12] sync --- netdeploy/network.go | 68 +++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index e07159937c..c2d2f191a5 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -25,6 +25,8 @@ import ( "strings" "time" + "github.com/algorand/go-deadlock" + "github.com/algorand/go-algorand/config" "github.com/algorand/go-algorand/daemon/algod/api/spec/v1" "github.com/algorand/go-algorand/gen" @@ -52,8 +54,9 @@ type Network struct { cfg NetworkCfg nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) gen gen.GenesisData - stopping chan struct{} nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc + runningNCsMu deadlock.RWMutex + runningNCs map[string]*nodecontrol.NodeController // maps each node data directroy to a node controller. } // Name returns the name of the private network @@ -102,8 +105,8 @@ func (n Network) Genesis() gen.GenesisData { // under the specified root directory. func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool) (*Network, error) { n := &Network{ - rootDir: rootDir, - stopping: make(chan struct{}), + rootDir: rootDir, + runningNCs: make(map[string]*nodecontrol.NodeController), } n.cfg.Name = name n.cfg.TemplateFile = templateFile @@ -155,8 +158,8 @@ func isValidNetworkDir(rootDir string) bool { // an existing deployed network. func LoadNetwork(rootDir string) (*Network, error) { n := &Network{ - rootDir: rootDir, - stopping: make(chan struct{}), + rootDir: rootDir, + runningNCs: make(map[string]*nodecontrol.NodeController), } if !isValidNetworkDir(rootDir) { @@ -254,7 +257,9 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { var peerAddressListBuilder strings.Builder for _, relayDir := range n.cfg.RelayDirs { - nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) + + nodeFulllPath := n.getNodeFullPath(relayDir) + nc := nodecontrol.MakeNodeController(binDir, nodeFulllPath) args := nodecontrol.AlgodStartArgs{ RedirectOutput: redirectOutput, RunStateChanged: n.nodeRunStateChanges, @@ -274,6 +279,9 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { peerAddressListBuilder.WriteString(";") } peerAddressListBuilder.WriteString(relayAddress) + n.runningNCsMu.Lock() + n.runningNCs[nodeFulllPath] = &nc + n.runningNCsMu.Unlock() } peerAddressList := peerAddressListBuilder.String() @@ -288,12 +296,16 @@ func (n *Network) NodeRunStateChangesCallback(callback nodecontrol.AlgodRunState // nodeRunStateChanges is a callback that notifies the network that a given node run state have changed. func (n *Network) nodeRunStateChanges(nc *nodecontrol.NodeController, err error) { - select { - case <-n.stopping: - return - default: + running := false + n.runningNCsMu.RLock() + for _, listedNC := range n.runningNCs { + if listedNC == nc { + running = true + break + } } - if n.nodeRunStateCallback != nil { + n.runningNCsMu.RUnlock() + if n.nodeRunStateCallback != nil && running { n.nodeRunStateCallback(nc, err) } } @@ -318,8 +330,15 @@ func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress s // outside of the main 'Start' call. func (n *Network) GetPeerAddresses(binDir string) []string { var peerAddresses []string + + n.runningNCsMu.RLock() + defer n.runningNCsMu.RUnlock() + for _, relayDir := range n.cfg.RelayDirs { - nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) + nc := n.runningNCs[n.getNodeFullPath(relayDir)] + if nc == nil { + continue + } relayAddress, err := nc.GetListeningAddress() if err == nil { peerAddresses = append(peerAddresses, relayAddress) @@ -335,11 +354,15 @@ func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) e RunStateChanged: n.nodeRunStateChanges, } for _, nodeDir := range n.nodeDirs { - nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) + fullNodeDirPath := n.getNodeFullPath(nodeDir) + nc := nodecontrol.MakeNodeController(binDir, fullNodeDirPath) _, err := nc.StartAlgod(args) if err != nil { return err } + n.runningNCsMu.Lock() + n.runningNCs[fullNodeDirPath] = &nc + n.runningNCsMu.Unlock() } return nil } @@ -361,32 +384,25 @@ func (n *Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err er // Stop the network, ensuring primary relay stops first // No return code - we try to kill them if we can (if we read valid PID file) func (n *Network) Stop(binDir string) { - select { - case <-n.stopping: - default: - close(n.stopping) - } c := make(chan struct{}, len(n.cfg.RelayDirs)+len(n.nodeDirs)) - stopNodeContoller := func(nc nodecontrol.NodeController) { + stopNodeContoller := func(nc *nodecontrol.NodeController) { defer func() { c <- struct{}{} }() nc.FullStop() } - for _, relayDir := range n.cfg.RelayDirs { - nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) - go stopNodeContoller(nc) - } - for _, nodeDir := range n.nodeDirs { - nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) + n.runningNCsMu.Lock() + for _, nc := range n.runningNCs { go stopNodeContoller(nc) } + n.runningNCs = make(map[string]*nodecontrol.NodeController) + n.runningNCsMu.Unlock() + // wait until we finish stopping all the node controllers. for i := cap(c); i > 0; i-- { <-c } close(c) - n.stopping = make(chan struct{}) } // NetworkNodeStatus represents the result from checking the status of a particular node instance From ae0d60a4abbc73290e6e4c8ce23d7e52bb934a0f Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 15:37:58 -0500 Subject: [PATCH 04/12] bugfix. --- netdeploy/network.go | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index c2d2f191a5..d0df210f14 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -60,17 +60,17 @@ type Network struct { } // Name returns the name of the private network -func (n Network) Name() string { +func (n *Network) Name() string { return n.cfg.Name } // PrimaryDataDir returns the primary data directory for the network -func (n Network) PrimaryDataDir() string { +func (n *Network) PrimaryDataDir() string { return n.getNodeFullPath(n.cfg.RelayDirs[0]) } // NodeDataDirs returns an array of node data directories (not the relays) -func (n Network) NodeDataDirs() []string { +func (n *Network) NodeDataDirs() []string { var directories []string for _, nodeDir := range n.nodeDirs { directories = append(directories, n.getNodeFullPath(nodeDir)) @@ -79,7 +79,7 @@ func (n Network) NodeDataDirs() []string { } // GetNodeDir returns the node directory that is associated with the given node name. -func (n Network) GetNodeDir(nodeName string) (string, error) { +func (n *Network) GetNodeDir(nodeName string) (string, error) { possibleDir := n.getNodeFullPath(nodeName) if isNodeDir(possibleDir) { return possibleDir, nil @@ -97,7 +97,7 @@ func isNodeDir(path string) bool { } // Genesis returns the genesis data for this network -func (n Network) Genesis() gen.GenesisData { +func (n *Network) Genesis() gen.GenesisData { return n.gen } @@ -191,12 +191,12 @@ func loadNetworkCfg(configFile string) (NetworkCfg, error) { } // Save persists the network state in the root directory (in network.json) -func (n Network) Save(rootDir string) error { +func (n *Network) Save(rootDir string) error { cfgFile := filepath.Join(rootDir, configFileName) return saveNetworkCfg(n.cfg, cfgFile) } -func (n Network) getNodeFullPath(nodeDir string) string { +func (n *Network) getNodeFullPath(nodeDir string) string { return filepath.Join(n.rootDir, nodeDir) } @@ -392,8 +392,24 @@ func (n *Network) Stop(binDir string) { nc.FullStop() } n.runningNCsMu.Lock() - for _, nc := range n.runningNCs { - go stopNodeContoller(nc) + + for _, relayDir := range n.cfg.RelayDirs { + relayFullPath := n.getNodeFullPath(relayDir) + pnc := n.runningNCs[relayFullPath] + if pnc == nil { + nc := nodecontrol.MakeNodeController(binDir, relayFullPath) + pnc = &nc + } + go stopNodeContoller(pnc) + } + for _, nodeDir := range n.nodeDirs { + nodeFullPath := n.getNodeFullPath(nodeDir) + pnc := n.runningNCs[nodeFullPath] + if pnc == nil { + nc := nodecontrol.MakeNodeController(binDir, nodeFullPath) + pnc = &nc + } + go stopNodeContoller(pnc) } n.runningNCs = make(map[string]*nodecontrol.NodeController) n.runningNCsMu.Unlock() @@ -421,7 +437,7 @@ func (n *Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err } // GetNodeController returns the node controller for the specified node name -func (n Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.NodeController, err error) { +func (n *Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.NodeController, err error) { nodeDir, err := n.GetNodeDir(nodeName) if err != nil { return From 4453fb966e887148d0593f1b2dde280474b62c5b Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 16:15:11 -0500 Subject: [PATCH 05/12] Update few more usecases. --- netdeploy/network.go | 8 ++++---- .../onlineOfflineParticipation_test.go | 8 ++++---- .../participation/participationRewards_test.go | 8 ++++---- .../partitionRecovery/partitionRecovery_test.go | 8 ++++---- test/framework/fixtures/libgoalFixture.go | 17 ++++++++++++++--- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index d0df210f14..e438c6202e 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -262,7 +262,7 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { nc := nodecontrol.MakeNodeController(binDir, nodeFulllPath) args := nodecontrol.AlgodStartArgs{ RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateChanges, + RunStateChanged: n.nodeRunStateChanged, } _, err := nc.StartAlgod(args) @@ -294,8 +294,8 @@ func (n *Network) NodeRunStateChangesCallback(callback nodecontrol.AlgodRunState n.nodeRunStateCallback = callback } -// nodeRunStateChanges is a callback that notifies the network that a given node run state have changed. -func (n *Network) nodeRunStateChanges(nc *nodecontrol.NodeController, err error) { +// nodeRunStateChanged is a callback that notifies the network that a given node run state have changed. +func (n *Network) nodeRunStateChanged(nc *nodecontrol.NodeController, err error) { running := false n.runningNCsMu.RLock() for _, listedNC := range n.runningNCs { @@ -351,7 +351,7 @@ func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) e args := nodecontrol.AlgodStartArgs{ PeerAddress: relayAddress, RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateChanges, + RunStateChanged: n.nodeRunStateChanged, } for _, nodeDir := range n.nodeDirs { fullNodeDirPath := n.getNodeFullPath(nodeDir) diff --git a/test/e2e-go/features/participation/onlineOfflineParticipation_test.go b/test/e2e-go/features/participation/onlineOfflineParticipation_test.go index 1b1e73704c..7419f05b27 100644 --- a/test/e2e-go/features/participation/onlineOfflineParticipation_test.go +++ b/test/e2e-go/features/participation/onlineOfflineParticipation_test.go @@ -18,8 +18,8 @@ package participation import ( "path/filepath" + "runtime" "testing" - "runtime" "github.com/stretchr/testify/require" @@ -52,7 +52,7 @@ func TestParticipationKeyOnlyAccountParticipatesCorrectly(t *testing.T) { // since block proposer selection is probabilistic, it is not guaranteed that the account will be chosen // it is a trade-off between test flakiness and test duration proposalWindow := 50 // arbitrary - blockWasProposedByPartkeyOnlyAccountRecently := waitForAccountToProposeBlock(a, fixture, partkeyOnlyAccount, proposalWindow) + blockWasProposedByPartkeyOnlyAccountRecently := waitForAccountToProposeBlock(a, &fixture, partkeyOnlyAccount, proposalWindow) a.True(blockWasProposedByPartkeyOnlyAccountRecently, "partkey-only account should be proposing blocks") // verify partkeyonly_account cannot spend @@ -71,7 +71,7 @@ func TestParticipationKeyOnlyAccountParticipatesCorrectly(t *testing.T) { a.Error(err, "partkey only account should fail to go offline") } -func waitForAccountToProposeBlock(a *require.Assertions, fixture fixtures.RestClientFixture, account string, window int) bool { +func waitForAccountToProposeBlock(a *require.Assertions, fixture *fixtures.RestClientFixture, account string, window int) bool { client := fixture.AlgodClient curStatus, err := client.Status() @@ -183,7 +183,7 @@ func TestNewAccountCanGoOnlineAndParticipate(t *testing.T) { // check that account starts participating after a while proposalWindow := 20 // arbitrary - blockWasProposedByNewAccountRecently := waitForAccountToProposeBlock(a, fixture, newAccount, proposalWindow) + blockWasProposedByNewAccountRecently := waitForAccountToProposeBlock(a, &fixture, newAccount, proposalWindow) a.True(blockWasProposedByNewAccountRecently, "newly online account should be proposing blocks") } diff --git a/test/e2e-go/features/participation/participationRewards_test.go b/test/e2e-go/features/participation/participationRewards_test.go index 30828798db..cf6a56131d 100644 --- a/test/e2e-go/features/participation/participationRewards_test.go +++ b/test/e2e-go/features/participation/participationRewards_test.go @@ -30,7 +30,7 @@ import ( "github.com/algorand/go-algorand/test/framework/fixtures" ) -func getFirstAccountFromNamedNode(fixture fixtures.RestClientFixture, r *require.Assertions, nodeName string) (account string) { +func getFirstAccountFromNamedNode(fixture *fixtures.RestClientFixture, r *require.Assertions, nodeName string) (account string) { cli := fixture.GetLibGoalClientForNamedNode(nodeName) wh, err := cli.GetUnencryptedWalletHandle() r.NoError(err) @@ -82,9 +82,9 @@ func TestOnlineOfflineRewards(t *testing.T) { defer fixture.Shutdown() // get online and offline accounts - onlineAccount := getFirstAccountFromNamedNode(fixture, r, "Online") + onlineAccount := getFirstAccountFromNamedNode(&fixture, r, "Online") onlineClient := fixture.GetLibGoalClientForNamedNode("Online") - offlineAccount := getFirstAccountFromNamedNode(fixture, r, "Offline") + offlineAccount := getFirstAccountFromNamedNode(&fixture, r, "Offline") offlineClient := fixture.GetLibGoalClientForNamedNode("Offline") // learn initial balances @@ -184,7 +184,7 @@ func TestRewardUnitThreshold(t *testing.T) { defer fixture.Shutdown() // get "poor" account (has 1% stake as opposed to 33%) - poorAccount := getFirstAccountFromNamedNode(fixture, r, "SmallNode") + poorAccount := getFirstAccountFromNamedNode(&fixture, r, "SmallNode") client := fixture.GetLibGoalClientForNamedNode("SmallNode") // make new account wh, _ := client.GetUnencryptedWalletHandle() diff --git a/test/e2e-go/features/partitionRecovery/partitionRecovery_test.go b/test/e2e-go/features/partitionRecovery/partitionRecovery_test.go index b489036f1e..6f1e094667 100644 --- a/test/e2e-go/features/partitionRecovery/partitionRecovery_test.go +++ b/test/e2e-go/features/partitionRecovery/partitionRecovery_test.go @@ -18,9 +18,9 @@ package partitionrecovery import ( "path/filepath" + "runtime" "testing" "time" - "runtime" "github.com/stretchr/testify/require" @@ -91,7 +91,7 @@ func TestPartitionRecoverySwapStartup(t *testing.T) { fixture.Setup(t, filepath.Join("nettemplates", "TwoNodes50EachWithRelay.json")) defer fixture.Shutdown() - runTestWithStaggeredStopStart(t, fixture) + runTestWithStaggeredStopStart(t, &fixture) } func TestPartitionRecoveryStaggerRestart(t *testing.T) { @@ -114,10 +114,10 @@ func TestPartitionRecoveryStaggerRestart(t *testing.T) { fixture.Setup(t, filepath.Join("nettemplates", "ThreeNodesEvenDist.json")) defer fixture.Shutdown() - runTestWithStaggeredStopStart(t, fixture) + runTestWithStaggeredStopStart(t, &fixture) } -func runTestWithStaggeredStopStart(t *testing.T, fixture fixtures.RestClientFixture) { +func runTestWithStaggeredStopStart(t *testing.T, fixture *fixtures.RestClientFixture) { a := require.New(t) // Get Node1 so we can wait until it has reached the target round diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index d1d6a46574..8c33635ca1 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -25,6 +25,7 @@ import ( "testing" "time" + "github.com/algorand/go-deadlock" "github.com/stretchr/testify/require" "github.com/algorand/go-algorand/config" @@ -48,6 +49,7 @@ type LibGoalFixture struct { Name string network *netdeploy.Network t TestingT + tMu deadlock.RWMutex clientPartKeys map[string][]account.Participation } @@ -85,7 +87,7 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys) f.failOnError(err, "CreateNetworkFromTemplate failed: %v") - network.NodeRunStateChangesCallback(f.nodeRunStateChanges) + network.NodeRunStateChangesCallback(f.nodeRunStateChanged) f.network = network if startNetwork { @@ -93,8 +95,13 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri } } -func (f *LibGoalFixture) nodeRunStateChanges(nc *nodecontrol.NodeController, err error) { - if f.t == nil || err == nil { +func (f *LibGoalFixture) nodeRunStateChanged(nc *nodecontrol.NodeController, err error) { + if err == nil { + return + } + f.tMu.RLock() + defer f.tMu.RUnlock() + if f.t == nil { return } f.t.Errorf("Node %s has changed it's status to %v", nc.GetDataDir(), err) @@ -247,8 +254,12 @@ func (f *LibGoalFixture) Start() { // It ensures the current test context is set and then reset after the test ends // It should be called in the form of "defer fixture.SetTestContext(t)()" func (f *LibGoalFixture) SetTestContext(t TestingT) func() { + f.tMu.Lock() + defer f.tMu.Unlock() f.t = t return func() { + f.tMu.Lock() + defer f.tMu.Unlock() f.t = nil } } From c32e7aa48c467dc85822a58c6a69111b37c6fbe7 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Wed, 8 Jan 2020 21:41:17 -0500 Subject: [PATCH 06/12] Refactoring --- netdeploy/network.go | 49 +++++++++++++---------- test/framework/fixtures/libgoalFixture.go | 2 + 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index e438c6202e..7a778d19a1 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -335,9 +335,11 @@ func (n *Network) GetPeerAddresses(binDir string) []string { defer n.runningNCsMu.RUnlock() for _, relayDir := range n.cfg.RelayDirs { - nc := n.runningNCs[n.getNodeFullPath(relayDir)] + fullPath := n.getNodeFullPath(relayDir) + nc := n.runningNCs[fullPath] if nc == nil { - continue + tnc := nodecontrol.MakeNodeController(binDir, fullPath) + nc = &tnc } relayAddress, err := nc.GetListeningAddress() if err == nil { @@ -391,28 +393,33 @@ func (n *Network) Stop(binDir string) { }() nc.FullStop() } - n.runningNCsMu.Lock() - for _, relayDir := range n.cfg.RelayDirs { - relayFullPath := n.getNodeFullPath(relayDir) - pnc := n.runningNCs[relayFullPath] - if pnc == nil { - nc := nodecontrol.MakeNodeController(binDir, relayFullPath) - pnc = &nc + func() { + n.runningNCsMu.Lock() + defer n.runningNCsMu.Unlock() + for _, relayDir := range n.cfg.RelayDirs { + relayFullPath := n.getNodeFullPath(relayDir) + pnc := n.runningNCs[relayFullPath] + if pnc == nil { + nc := nodecontrol.MakeNodeController(binDir, relayFullPath) + pnc = &nc + } else { + delete(n.runningNCs, relayFullPath) + } + go stopNodeContoller(pnc) } - go stopNodeContoller(pnc) - } - for _, nodeDir := range n.nodeDirs { - nodeFullPath := n.getNodeFullPath(nodeDir) - pnc := n.runningNCs[nodeFullPath] - if pnc == nil { - nc := nodecontrol.MakeNodeController(binDir, nodeFullPath) - pnc = &nc + for _, nodeDir := range n.nodeDirs { + nodeFullPath := n.getNodeFullPath(nodeDir) + pnc := n.runningNCs[nodeFullPath] + if pnc == nil { + nc := nodecontrol.MakeNodeController(binDir, nodeFullPath) + pnc = &nc + } else { + delete(n.runningNCs, nodeFullPath) + } + go stopNodeContoller(pnc) } - go stopNodeContoller(pnc) - } - n.runningNCs = make(map[string]*nodecontrol.NodeController) - n.runningNCsMu.Unlock() + }() // wait until we finish stopping all the node controllers. for i := cap(c); i > 0; i-- { diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index 8c33635ca1..29bf27a44b 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -95,6 +95,8 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri } } +// nodeRunStateChanged is a callback from the network indicating that the node run state have changed. +// i.e. node terminated, and not due to shutdown.. this is likely to be a crash/panic. func (f *LibGoalFixture) nodeRunStateChanged(nc *nodecontrol.NodeController, err error) { if err == nil { return From e751c6853dc372ce8c8c2b4956dd451f01f87c97 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Mon, 13 Jan 2020 18:01:21 -0500 Subject: [PATCH 07/12] Simplify. --- cmd/goal/network.go | 2 +- netdeploy/network.go | 211 ++++++++-------------- test/framework/fixtures/libgoalFixture.go | 2 +- 3 files changed, 81 insertions(+), 134 deletions(-) diff --git a/cmd/goal/network.go b/cmd/goal/network.go index 1fc7ae9e1a..4185543332 100644 --- a/cmd/goal/network.go +++ b/cmd/goal/network.go @@ -93,7 +93,7 @@ var networkCreateCmd = &cobra.Command{ panic(err) } - network, err := netdeploy.CreateNetworkFromTemplate(networkName, networkRootDir, networkTemplateFile, binDir, !noImportKeys) + network, err := netdeploy.CreateNetworkFromTemplate(networkName, networkRootDir, networkTemplateFile, binDir, !noImportKeys, nil) if err != nil { if noClean { reportInfof(" ** failed ** - Preserving network rootdir '%s'", networkRootDir) diff --git a/netdeploy/network.go b/netdeploy/network.go index cd634dc7e1..98d273c284 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -25,8 +25,6 @@ import ( "strings" "time" - "github.com/algorand/go-deadlock" - "github.com/algorand/go-algorand/config" "github.com/algorand/go-algorand/daemon/algod/api/spec/v1" "github.com/algorand/go-algorand/gen" @@ -55,58 +53,14 @@ type Network struct { nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) gen gen.GenesisData nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc - runningNCsMu deadlock.RWMutex - runningNCs map[string]*nodecontrol.NodeController // maps each node data directroy to a node controller. -} - -// Name returns the name of the private network -func (n *Network) Name() string { - return n.cfg.Name -} - -// PrimaryDataDir returns the primary data directory for the network -func (n *Network) PrimaryDataDir() string { - return n.getNodeFullPath(n.cfg.RelayDirs[0]) -} - -// NodeDataDirs returns an array of node data directories (not the relays) -func (n *Network) NodeDataDirs() []string { - var directories []string - for _, nodeDir := range n.nodeDirs { - directories = append(directories, n.getNodeFullPath(nodeDir)) - } - return directories -} - -// GetNodeDir returns the node directory that is associated with the given node name. -func (n *Network) GetNodeDir(nodeName string) (string, error) { - possibleDir := n.getNodeFullPath(nodeName) - if isNodeDir(possibleDir) { - return possibleDir, nil - } - return "", fmt.Errorf("no node exists that is named '%s'", nodeName) -} - -func isNodeDir(path string) bool { - if util.IsDir(path) { - if util.FileExists(filepath.Join(path, config.GenesisJSONFile)) { - return true - } - } - return false -} - -// Genesis returns the genesis data for this network -func (n *Network) Genesis() gen.GenesisData { - return n.gen } // CreateNetworkFromTemplate uses the specified template to deploy a new private network // under the specified root directory. -func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool) (*Network, error) { +func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc) (*Network, error) { n := &Network{ - rootDir: rootDir, - runningNCs: make(map[string]*nodecontrol.NodeController), + rootDir: rootDir, + nodeRunStateCallback: nodeRunStateCallback, } n.cfg.Name = name n.cfg.TemplateFile = templateFile @@ -139,27 +93,11 @@ func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, impor return n, err } -func isValidNetworkDir(rootDir string) bool { - cfgFile := filepath.Join(rootDir, configFileName) - fileExists := util.FileExists(cfgFile) - - // If file exists, network assumed to exist - if !fileExists { - return false - } - - // Now check for genesis.json file too - cfgFile = filepath.Join(rootDir, genesisFileName) - fileExists = util.FileExists(cfgFile) - return fileExists -} - // LoadNetwork loads and initializes the Network state representing // an existing deployed network. func LoadNetwork(rootDir string) (*Network, error) { n := &Network{ - rootDir: rootDir, - runningNCs: make(map[string]*nodecontrol.NodeController), + rootDir: rootDir, } if !isValidNetworkDir(rootDir) { @@ -190,6 +128,63 @@ func loadNetworkCfg(configFile string) (NetworkCfg, error) { return cfg, err } +// Name returns the name of the private network +func (n *Network) Name() string { + return n.cfg.Name +} + +// PrimaryDataDir returns the primary data directory for the network +func (n *Network) PrimaryDataDir() string { + return n.getNodeFullPath(n.cfg.RelayDirs[0]) +} + +// NodeDataDirs returns an array of node data directories (not the relays) +func (n *Network) NodeDataDirs() []string { + var directories []string + for _, nodeDir := range n.nodeDirs { + directories = append(directories, n.getNodeFullPath(nodeDir)) + } + return directories +} + +// GetNodeDir returns the node directory that is associated with the given node name. +func (n *Network) GetNodeDir(nodeName string) (string, error) { + possibleDir := n.getNodeFullPath(nodeName) + if isNodeDir(possibleDir) { + return possibleDir, nil + } + return "", fmt.Errorf("no node exists that is named '%s'", nodeName) +} + +func isNodeDir(path string) bool { + if util.IsDir(path) { + if util.FileExists(filepath.Join(path, config.GenesisJSONFile)) { + return true + } + } + return false +} + +// Genesis returns the genesis data for this network +func (n *Network) Genesis() gen.GenesisData { + return n.gen +} + +func isValidNetworkDir(rootDir string) bool { + cfgFile := filepath.Join(rootDir, configFileName) + fileExists := util.FileExists(cfgFile) + + // If file exists, network assumed to exist + if !fileExists { + return false + } + + // Now check for genesis.json file too + cfgFile = filepath.Join(rootDir, genesisFileName) + fileExists = util.FileExists(cfgFile) + return fileExists +} + // Save persists the network state in the root directory (in network.json) func (n *Network) Save(rootDir string) error { cfgFile := filepath.Join(rootDir, configFileName) @@ -262,7 +257,7 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { nc := nodecontrol.MakeNodeController(binDir, nodeFulllPath) args := nodecontrol.AlgodStartArgs{ RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateChanged, + RunStateChanged: n.nodeRunStateCallback, } _, err := nc.StartAlgod(args) @@ -282,10 +277,6 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { algodKmdPath, _ := filepath.Abs(filepath.Join(nodeFulllPath, libgoal.DefaultKMDDataDir)) nc.SetKMDDataDir(algodKmdPath) - - n.runningNCsMu.Lock() - n.runningNCs[nodeFulllPath] = &nc - n.runningNCsMu.Unlock() } peerAddressList := peerAddressListBuilder.String() @@ -298,22 +289,6 @@ func (n *Network) NodeRunStateChangesCallback(callback nodecontrol.AlgodRunState n.nodeRunStateCallback = callback } -// nodeRunStateChanged is a callback that notifies the network that a given node run state have changed. -func (n *Network) nodeRunStateChanged(nc *nodecontrol.NodeController, err error) { - running := false - n.runningNCsMu.RLock() - for _, listedNC := range n.runningNCs { - if listedNC == nc { - running = true - break - } - } - n.runningNCsMu.RUnlock() - if n.nodeRunStateCallback != nil && running { - n.nodeRunStateCallback(nc, err) - } -} - // retry fetching the relay address func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress string, err error) { for i := 1; ; i++ { @@ -335,16 +310,8 @@ func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress s func (n *Network) GetPeerAddresses(binDir string) []string { var peerAddresses []string - n.runningNCsMu.RLock() - defer n.runningNCsMu.RUnlock() - for _, relayDir := range n.cfg.RelayDirs { - fullPath := n.getNodeFullPath(relayDir) - nc := n.runningNCs[fullPath] - if nc == nil { - tnc := nodecontrol.MakeNodeController(binDir, fullPath) - nc = &tnc - } + nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) relayAddress, err := nc.GetListeningAddress() if err == nil { peerAddresses = append(peerAddresses, relayAddress) @@ -357,18 +324,14 @@ func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) e args := nodecontrol.AlgodStartArgs{ PeerAddress: relayAddress, RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateChanged, + RunStateChanged: n.nodeRunStateCallback, } for _, nodeDir := range n.nodeDirs { - fullNodeDirPath := n.getNodeFullPath(nodeDir) - nc := nodecontrol.MakeNodeController(binDir, fullNodeDirPath) + nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) _, err := nc.StartAlgod(args) if err != nil { return err } - n.runningNCsMu.Lock() - n.runningNCs[fullNodeDirPath] = &nc - n.runningNCsMu.Unlock() } return nil } @@ -398,36 +361,20 @@ func (n *Network) Stop(binDir string) { nc.FullStop() } - func() { - n.runningNCsMu.Lock() - defer n.runningNCsMu.Unlock() - for _, relayDir := range n.cfg.RelayDirs { - relayFullPath := n.getNodeFullPath(relayDir) - pnc := n.runningNCs[relayFullPath] - if pnc == nil { - nc := nodecontrol.MakeNodeController(binDir, relayFullPath) - algodKmdPath, _ := filepath.Abs(filepath.Join(relayFullPath, libgoal.DefaultKMDDataDir)) - nc.SetKMDDataDir(algodKmdPath) - pnc = &nc - } else { - delete(n.runningNCs, relayFullPath) - } - go stopNodeContoller(pnc) - } - for _, nodeDir := range n.nodeDirs { - nodeFullPath := n.getNodeFullPath(nodeDir) - pnc := n.runningNCs[nodeFullPath] - if pnc == nil { - nc := nodecontrol.MakeNodeController(binDir, nodeFullPath) - algodKmdPath, _ := filepath.Abs(filepath.Join(nodeFullPath, libgoal.DefaultKMDDataDir)) - nc.SetKMDDataDir(algodKmdPath) - pnc = &nc - } else { - delete(n.runningNCs, nodeFullPath) - } - go stopNodeContoller(pnc) - } - }() + for _, relayDir := range n.cfg.RelayDirs { + relayDataDir := n.getNodeFullPath(relayDir) + nc := nodecontrol.MakeNodeController(binDir, relayDataDir) + algodKmdPath, _ := filepath.Abs(filepath.Join(relayDataDir, libgoal.DefaultKMDDataDir)) + nc.SetKMDDataDir(algodKmdPath) + go stopNodeContoller(&nc) + } + for _, nodeDir := range n.nodeDirs { + nodeDataDir := n.getNodeFullPath(nodeDir) + nc := nodecontrol.MakeNodeController(binDir, nodeDataDir) + algodKmdPath, _ := filepath.Abs(filepath.Join(nodeDataDir, libgoal.DefaultKMDDataDir)) + nc.SetKMDDataDir(algodKmdPath) + go stopNodeContoller(&nc) + } // wait until we finish stopping all the node controllers. for i := cap(c); i > 0; i-- { diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index 050853ced1..f27e33619b 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -84,7 +84,7 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri os.RemoveAll(f.rootDir) templateFile = filepath.Join(f.testDataDir, templateFile) importKeys := false // Don't automatically import root keys when creating folders, we'll import on-demand - network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys) + network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys, f.nodeRunStateChanged) f.failOnError(err, "CreateNetworkFromTemplate failed: %v") network.NodeRunStateChangesCallback(f.nodeRunStateChanged) From 4facaccf349b27b551057203f61d35516cfe1fc2 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Mon, 13 Jan 2020 18:21:35 -0500 Subject: [PATCH 08/12] undo network referencing. --- cmd/goal/network.go | 2 +- netdeploy/network.go | 47 ++++++++++------------- test/framework/fixtures/libgoalFixture.go | 4 +- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/cmd/goal/network.go b/cmd/goal/network.go index 4185543332..969661a98a 100644 --- a/cmd/goal/network.go +++ b/cmd/goal/network.go @@ -107,7 +107,7 @@ var networkCreateCmd = &cobra.Command{ }, } -func getNetworkAndBinDir() (*netdeploy.Network, string) { +func getNetworkAndBinDir() (netdeploy.Network, string) { networkRootDir, err := filepath.Abs(networkRootDir) if err != nil { panic(err) diff --git a/netdeploy/network.go b/netdeploy/network.go index 98d273c284..ff2e0657aa 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -57,8 +57,8 @@ type Network struct { // CreateNetworkFromTemplate uses the specified template to deploy a new private network // under the specified root directory. -func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc) (*Network, error) { - n := &Network{ +func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc) (Network, error) { + n := Network{ rootDir: rootDir, nodeRunStateCallback: nodeRunStateCallback, } @@ -95,8 +95,8 @@ func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, impor // LoadNetwork loads and initializes the Network state representing // an existing deployed network. -func LoadNetwork(rootDir string) (*Network, error) { - n := &Network{ +func LoadNetwork(rootDir string) (Network, error) { + n := Network{ rootDir: rootDir, } @@ -129,12 +129,12 @@ func loadNetworkCfg(configFile string) (NetworkCfg, error) { } // Name returns the name of the private network -func (n *Network) Name() string { +func (n Network) Name() string { return n.cfg.Name } // PrimaryDataDir returns the primary data directory for the network -func (n *Network) PrimaryDataDir() string { +func (n Network) PrimaryDataDir() string { return n.getNodeFullPath(n.cfg.RelayDirs[0]) } @@ -148,7 +148,7 @@ func (n *Network) NodeDataDirs() []string { } // GetNodeDir returns the node directory that is associated with the given node name. -func (n *Network) GetNodeDir(nodeName string) (string, error) { +func (n Network) GetNodeDir(nodeName string) (string, error) { possibleDir := n.getNodeFullPath(nodeName) if isNodeDir(possibleDir) { return possibleDir, nil @@ -166,7 +166,7 @@ func isNodeDir(path string) bool { } // Genesis returns the genesis data for this network -func (n *Network) Genesis() gen.GenesisData { +func (n Network) Genesis() gen.GenesisData { return n.gen } @@ -186,12 +186,12 @@ func isValidNetworkDir(rootDir string) bool { } // Save persists the network state in the root directory (in network.json) -func (n *Network) Save(rootDir string) error { +func (n Network) Save(rootDir string) error { cfgFile := filepath.Join(rootDir, configFileName) return saveNetworkCfg(n.cfg, cfgFile) } -func (n *Network) getNodeFullPath(nodeDir string) string { +func (n Network) getNodeFullPath(nodeDir string) string { return filepath.Join(n.rootDir, nodeDir) } @@ -206,7 +206,7 @@ func saveNetworkCfg(cfg NetworkCfg, configFile string) error { return err } -func (n *Network) scanForNodes() error { +func (n Network) scanForNodes() error { // Enumerate direct sub-directories of our root and look for valid node data directories (where genesis.json exists) entries, err := ioutil.ReadDir(n.rootDir) if err != nil { @@ -241,7 +241,7 @@ func (n *Network) scanForNodes() error { } // Start the network, ensuring primary relay starts first -func (n *Network) Start(binDir string, redirectOutput bool) error { +func (n Network) Start(binDir string, redirectOutput bool) error { // Start relays // Determine IP:PORT for said relays // Start remaining nodes, pointing at the relays @@ -284,13 +284,8 @@ func (n *Network) Start(binDir string, redirectOutput bool) error { return err } -// NodeRunStateChangesCallback sets the callback for node run state changes. -func (n *Network) NodeRunStateChangesCallback(callback nodecontrol.AlgodRunStateChangedFunc) { - n.nodeRunStateCallback = callback -} - // retry fetching the relay address -func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress string, err error) { +func (n Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress string, err error) { for i := 1; ; i++ { relayAddress, err = nc.GetListeningAddress() if err == nil { @@ -307,7 +302,7 @@ func (n *Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress s // GetPeerAddresses returns an array of Relay addresses, if any; to be used to start nodes // outside of the main 'Start' call. -func (n *Network) GetPeerAddresses(binDir string) []string { +func (n Network) GetPeerAddresses(binDir string) []string { var peerAddresses []string for _, relayDir := range n.cfg.RelayDirs { @@ -320,7 +315,7 @@ func (n *Network) GetPeerAddresses(binDir string) []string { return peerAddresses } -func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) error { +func (n Network) startNodes(binDir, relayAddress string, redirectOutput bool) error { args := nodecontrol.AlgodStartArgs{ PeerAddress: relayAddress, RedirectOutput: redirectOutput, @@ -338,7 +333,7 @@ func (n *Network) startNodes(binDir, relayAddress string, redirectOutput bool) e // StartNode can be called to start a node after the network has been started. // It determines the correct PeerAddresses to use. -func (n *Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err error) { +func (n Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err error) { controller := nodecontrol.MakeNodeController(binDir, nodeDir) peers := n.GetPeerAddresses(binDir) peerAddresses := strings.Join(peers, ";") @@ -352,7 +347,7 @@ func (n *Network) StartNode(binDir, nodeDir string, redirectOutput bool) (err er // Stop the network, ensuring primary relay stops first // No return code - we try to kill them if we can (if we read valid PID file) -func (n *Network) Stop(binDir string) { +func (n Network) Stop(binDir string) { c := make(chan struct{}, len(n.cfg.RelayDirs)+len(n.nodeDirs)) stopNodeContoller := func(nc *nodecontrol.NodeController) { defer func() { @@ -390,7 +385,7 @@ type NetworkNodeStatus struct { } // GetGoalClient returns the libgoal.Client for the specified node name -func (n *Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err error) { +func (n Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err error) { nodeDir, err := n.GetNodeDir(nodeName) if err != nil { return @@ -399,7 +394,7 @@ func (n *Network) GetGoalClient(binDir, nodeName string) (lg libgoal.Client, err } // GetNodeController returns the node controller for the specified node name -func (n *Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.NodeController, err error) { +func (n Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.NodeController, err error) { nodeDir, err := n.GetNodeDir(nodeName) if err != nil { return @@ -409,7 +404,7 @@ func (n *Network) GetNodeController(binDir, nodeName string) (nc nodecontrol.Nod } // NodesStatus retrieves the status of all nodes in the network and returns the status/error for each -func (n *Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { +func (n Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { statuses := make(map[string]NetworkNodeStatus) for _, relayDir := range n.cfg.RelayDirs { @@ -443,7 +438,7 @@ func (n *Network) NodesStatus(binDir string) map[string]NetworkNodeStatus { // Delete the network - try stopping it first if we can. // No return code - we try to kill them if we can (if we read valid PID file) -func (n *Network) Delete(binDir string) error { +func (n Network) Delete(binDir string) error { n.Stop(binDir) return os.RemoveAll(n.rootDir) } diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index f27e33619b..6e5e488335 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -47,7 +47,7 @@ type LibGoalFixture struct { NC nodecontrol.NodeController rootDir string Name string - network *netdeploy.Network + network netdeploy.Network t TestingT tMu deadlock.RWMutex clientPartKeys map[string][]account.Participation @@ -86,8 +86,6 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri importKeys := false // Don't automatically import root keys when creating folders, we'll import on-demand network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys, f.nodeRunStateChanged) f.failOnError(err, "CreateNetworkFromTemplate failed: %v") - - network.NodeRunStateChangesCallback(f.nodeRunStateChanged) f.network = network if startNetwork { From 3d945ebf6719d0d10a77bded6a56e99e67eb026f Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Mon, 13 Jan 2020 18:24:03 -0500 Subject: [PATCH 09/12] undo few func-ptr. --- netdeploy/network.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index ff2e0657aa..c79d0497cc 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -139,7 +139,7 @@ func (n Network) PrimaryDataDir() string { } // NodeDataDirs returns an array of node data directories (not the relays) -func (n *Network) NodeDataDirs() []string { +func (n Network) NodeDataDirs() []string { var directories []string for _, nodeDir := range n.nodeDirs { directories = append(directories, n.getNodeFullPath(nodeDir)) @@ -206,7 +206,7 @@ func saveNetworkCfg(cfg NetworkCfg, configFile string) error { return err } -func (n Network) scanForNodes() error { +func (n *Network) scanForNodes() error { // Enumerate direct sub-directories of our root and look for valid node data directories (where genesis.json exists) entries, err := ioutil.ReadDir(n.rootDir) if err != nil { From d2daa99b37cff4d933549cd22beb1f15deb4e335 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Mon, 13 Jan 2020 18:29:21 -0500 Subject: [PATCH 10/12] undo some more stuff. --- netdeploy/network.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index c79d0497cc..2b4dfe7277 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -274,9 +274,6 @@ func (n Network) Start(binDir string, redirectOutput bool) error { peerAddressListBuilder.WriteString(";") } peerAddressListBuilder.WriteString(relayAddress) - - algodKmdPath, _ := filepath.Abs(filepath.Join(nodeFulllPath, libgoal.DefaultKMDDataDir)) - nc.SetKMDDataDir(algodKmdPath) } peerAddressList := peerAddressListBuilder.String() @@ -304,7 +301,6 @@ func (n Network) getRelayAddress(nc nodecontrol.NodeController) (relayAddress st // outside of the main 'Start' call. func (n Network) GetPeerAddresses(binDir string) []string { var peerAddresses []string - for _, relayDir := range n.cfg.RelayDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(relayDir)) relayAddress, err := nc.GetListeningAddress() @@ -355,7 +351,6 @@ func (n Network) Stop(binDir string) { }() nc.FullStop() } - for _, relayDir := range n.cfg.RelayDirs { relayDataDir := n.getNodeFullPath(relayDir) nc := nodecontrol.MakeNodeController(binDir, relayDataDir) @@ -370,7 +365,6 @@ func (n Network) Stop(binDir string) { nc.SetKMDDataDir(algodKmdPath) go stopNodeContoller(&nc) } - // wait until we finish stopping all the node controllers. for i := cap(c); i > 0; i-- { <-c From c0b9eb9609d47d74d2e4e71c020df874424e6343 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Tue, 14 Jan 2020 20:53:33 -0500 Subject: [PATCH 11/12] Update method names --- netdeploy/network.go | 26 +++++++++++------------ nodecontrol/NodeController.go | 6 +++--- nodecontrol/algodControl.go | 4 ++-- test/framework/fixtures/libgoalFixture.go | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index 2b4dfe7277..1d2fb381a4 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -48,19 +48,19 @@ type NetworkCfg struct { // Network represents an instance of a deployed network type Network struct { - rootDir string - cfg NetworkCfg - nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) - gen gen.GenesisData - nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc + rootDir string + cfg NetworkCfg + nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) + gen gen.GenesisData + nodeExitcallback nodecontrol.AlgodExitErrorCallback } // CreateNetworkFromTemplate uses the specified template to deploy a new private network // under the specified root directory. -func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeRunStateCallback nodecontrol.AlgodRunStateChangedFunc) (Network, error) { +func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeExitcallback nodecontrol.AlgodExitErrorCallback) (Network, error) { n := Network{ - rootDir: rootDir, - nodeRunStateCallback: nodeRunStateCallback, + rootDir: rootDir, + nodeExitcallback: nodeExitcallback, } n.cfg.Name = name n.cfg.TemplateFile = templateFile @@ -256,8 +256,8 @@ func (n Network) Start(binDir string, redirectOutput bool) error { nodeFulllPath := n.getNodeFullPath(relayDir) nc := nodecontrol.MakeNodeController(binDir, nodeFulllPath) args := nodecontrol.AlgodStartArgs{ - RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateCallback, + RedirectOutput: redirectOutput, + ExitErrorCallback: n.nodeExitcallback, } _, err := nc.StartAlgod(args) @@ -313,9 +313,9 @@ func (n Network) GetPeerAddresses(binDir string) []string { func (n Network) startNodes(binDir, relayAddress string, redirectOutput bool) error { args := nodecontrol.AlgodStartArgs{ - PeerAddress: relayAddress, - RedirectOutput: redirectOutput, - RunStateChanged: n.nodeRunStateCallback, + PeerAddress: relayAddress, + RedirectOutput: redirectOutput, + ExitErrorCallback: n.nodeExitcallback, } for _, nodeDir := range n.nodeDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) diff --git a/nodecontrol/NodeController.go b/nodecontrol/NodeController.go index b182d4df52..8467bf5ff6 100644 --- a/nodecontrol/NodeController.go +++ b/nodecontrol/NodeController.go @@ -50,9 +50,9 @@ func MakeNodeController(binDir, algodDataDir string) NodeController { return nc } -// AlgodRunStateChangedFunc is the callback function from the node controller that reports upstream +// AlgodExitErrorCallback is the callback function from the node controller that reports upstream // in case there was a change with the algod running state. -type AlgodRunStateChangedFunc func(*NodeController, error) +type AlgodExitErrorCallback func(*NodeController, error) // AlgodStartArgs are the possible arguments for starting algod type AlgodStartArgs struct { @@ -61,7 +61,7 @@ type AlgodStartArgs struct { RedirectOutput bool RunUnderHost bool TelemetryOverride string - RunStateChanged AlgodRunStateChangedFunc + ExitErrorCallback AlgodExitErrorCallback } // KMDStartArgs are the possible arguments for starting kmd diff --git a/nodecontrol/algodControl.go b/nodecontrol/algodControl.go index acafbcd73e..7563b95c92 100644 --- a/nodecontrol/algodControl.go +++ b/nodecontrol/algodControl.go @@ -194,8 +194,8 @@ func (nc *NodeController) StartAlgod(args AlgodStartArgs) (alreadyRunning bool, select { case <-startAlgodCompletedChan: // we've already exited this function, so we want to report to the error to the callback. - if args.RunStateChanged != nil { - args.RunStateChanged(nc, err) + if args.ExitErrorCallback != nil { + args.ExitErrorCallback(nc, err) } default: } diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index 6e5e488335..200773c5bc 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -84,7 +84,7 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri os.RemoveAll(f.rootDir) templateFile = filepath.Join(f.testDataDir, templateFile) importKeys := false // Don't automatically import root keys when creating folders, we'll import on-demand - network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys, f.nodeRunStateChanged) + network, err := netdeploy.CreateNetworkFromTemplate("test", f.rootDir, templateFile, f.binDir, importKeys, f.nodeExitWithError) f.failOnError(err, "CreateNetworkFromTemplate failed: %v") f.network = network @@ -93,9 +93,9 @@ func (f *LibGoalFixture) setup(test TestingT, testName string, templateFile stri } } -// nodeRunStateChanged is a callback from the network indicating that the node run state have changed. +// nodeExitWithError is a callback from the network indicating that the node exit with an error after a successfull startup. // i.e. node terminated, and not due to shutdown.. this is likely to be a crash/panic. -func (f *LibGoalFixture) nodeRunStateChanged(nc *nodecontrol.NodeController, err error) { +func (f *LibGoalFixture) nodeExitWithError(nc *nodecontrol.NodeController, err error) { if err == nil { return } From f914b3897aff30a61d21c1257f92722dbabd6869 Mon Sep 17 00:00:00 2001 From: Tsachi Herman Date: Tue, 14 Jan 2020 22:47:56 -0500 Subject: [PATCH 12/12] Few more touchups. --- netdeploy/network.go | 10 +++++----- test/framework/fixtures/libgoalFixture.go | 13 ++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/netdeploy/network.go b/netdeploy/network.go index 1d2fb381a4..0d867bb60c 100644 --- a/netdeploy/network.go +++ b/netdeploy/network.go @@ -52,15 +52,15 @@ type Network struct { cfg NetworkCfg nodeDirs map[string]string // mapping between the node name and the directories where the node is operation on (not including RelayDirs) gen gen.GenesisData - nodeExitcallback nodecontrol.AlgodExitErrorCallback + nodeExitCallback nodecontrol.AlgodExitErrorCallback } // CreateNetworkFromTemplate uses the specified template to deploy a new private network // under the specified root directory. -func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeExitcallback nodecontrol.AlgodExitErrorCallback) (Network, error) { +func CreateNetworkFromTemplate(name, rootDir, templateFile, binDir string, importKeys bool, nodeExitCallback nodecontrol.AlgodExitErrorCallback) (Network, error) { n := Network{ rootDir: rootDir, - nodeExitcallback: nodeExitcallback, + nodeExitCallback: nodeExitCallback, } n.cfg.Name = name n.cfg.TemplateFile = templateFile @@ -257,7 +257,7 @@ func (n Network) Start(binDir string, redirectOutput bool) error { nc := nodecontrol.MakeNodeController(binDir, nodeFulllPath) args := nodecontrol.AlgodStartArgs{ RedirectOutput: redirectOutput, - ExitErrorCallback: n.nodeExitcallback, + ExitErrorCallback: n.nodeExitCallback, } _, err := nc.StartAlgod(args) @@ -315,7 +315,7 @@ func (n Network) startNodes(binDir, relayAddress string, redirectOutput bool) er args := nodecontrol.AlgodStartArgs{ PeerAddress: relayAddress, RedirectOutput: redirectOutput, - ExitErrorCallback: n.nodeExitcallback, + ExitErrorCallback: n.nodeExitCallback, } for _, nodeDir := range n.nodeDirs { nc := nodecontrol.MakeNodeController(binDir, n.getNodeFullPath(nodeDir)) diff --git a/test/framework/fixtures/libgoalFixture.go b/test/framework/fixtures/libgoalFixture.go index 200773c5bc..ffc11f12a7 100644 --- a/test/framework/fixtures/libgoalFixture.go +++ b/test/framework/fixtures/libgoalFixture.go @@ -20,8 +20,10 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "strings" + "syscall" "testing" "time" @@ -99,12 +101,21 @@ func (f *LibGoalFixture) nodeExitWithError(nc *nodecontrol.NodeController, err e if err == nil { return } + f.tMu.RLock() defer f.tMu.RUnlock() if f.t == nil { return } - f.t.Errorf("Node %s has changed it's status to %v", nc.GetDataDir(), err) + exitError, ok := err.(*exec.ExitError) + if !ok { + require.NoError(f.t, err, "Node at %s has terminated with an error", nc.GetDataDir()) + return + } + ws := exitError.Sys().(syscall.WaitStatus) + exitCode := ws.ExitStatus() + + require.NoError(f.t, err, "Node at %s has terminated with error code %d", nc.GetDataDir(), exitCode) } func (f *LibGoalFixture) importRootKeys(lg *libgoal.Client, dataDir string) {