Skip to content

Commit c9fb551

Browse files
committed
Fix autostart for swarm scope connected containers
The swarm scope network connected containers with autostart enabled there was a dependency problem with the cluster to be initialized before we can autostart them. With the current container restart code happening before cluster init, these containers were not getting autostarted properly. Added a fix to delay the container start of those containers which has atleast one swarm scope endpoint to until after the cluster is initialized. Signed-off-by: Jana Radhakrishnan <[email protected]>
1 parent cf58eb4 commit c9fb551

File tree

6 files changed

+92
-10
lines changed

6 files changed

+92
-10
lines changed

cmd/dockerd/daemon.go

+5
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
262262
logrus.Fatalf("Error creating cluster component: %v", err)
263263
}
264264

265+
// Restart all autostart containers which has a swarm endpoint
266+
// and is not yet running now that we have successfully
267+
// initialized the cluster.
268+
d.RestartSwarmContainers()
269+
265270
logrus.Info("Daemon has completed initialization")
266271

267272
logrus.WithFields(logrus.Fields{

daemon/cluster/cluster.go

+28-9
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,11 @@ type Cluster struct {
135135
// helps in identifying the attachment ID via the taskID and the
136136
// corresponding attachment configuration obtained from the manager.
137137
type attacher struct {
138-
taskID string
139-
config *network.NetworkingConfig
140-
attachWaitCh chan *network.NetworkingConfig
141-
detachWaitCh chan struct{}
138+
taskID string
139+
config *network.NetworkingConfig
140+
attachWaitCh chan *network.NetworkingConfig
141+
attachCompleteCh chan struct{}
142+
detachWaitCh chan struct{}
142143
}
143144

144145
type node struct {
@@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID,
12621263
agent := c.node.Agent()
12631264
c.RUnlock()
12641265

1265-
if ok && attacher != nil && attacher.detachWaitCh != nil {
1266+
if ok && attacher != nil &&
1267+
attacher.detachWaitCh != nil &&
1268+
attacher.attachCompleteCh != nil {
1269+
// Attachment may be in progress still so wait for
1270+
// attachment to complete.
12661271
select {
1267-
case <-attacher.detachWaitCh:
1272+
case <-attacher.attachCompleteCh:
12681273
case <-ctx.Done():
12691274
return ctx.Err()
12701275
}
1276+
1277+
if attacher.taskID == taskID {
1278+
select {
1279+
case <-attacher.detachWaitCh:
1280+
case <-ctx.Done():
1281+
return ctx.Err()
1282+
}
1283+
}
12711284
}
12721285

12731286
return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
@@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
12891302
agent := c.node.Agent()
12901303
attachWaitCh := make(chan *network.NetworkingConfig)
12911304
detachWaitCh := make(chan struct{})
1305+
attachCompleteCh := make(chan struct{})
12921306
c.attachers[aKey] = &attacher{
1293-
attachWaitCh: attachWaitCh,
1294-
detachWaitCh: detachWaitCh,
1307+
attachWaitCh: attachWaitCh,
1308+
attachCompleteCh: attachCompleteCh,
1309+
detachWaitCh: detachWaitCh,
12951310
}
12961311
c.Unlock()
12971312

@@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
13061321
return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
13071322
}
13081323

1324+
c.Lock()
1325+
c.attachers[aKey].taskID = taskID
1326+
close(attachCompleteCh)
1327+
c.Unlock()
1328+
13091329
logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
13101330

13111331
var config *network.NetworkingConfig
@@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
13161336
}
13171337

13181338
c.Lock()
1319-
c.attachers[aKey].taskID = taskID
13201339
c.attachers[aKey].config = config
13211340
c.Unlock()
13221341
return config, nil

daemon/container_operations.go

+5
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
384384
return nil, nil, err
385385
}
386386

387+
// This container has attachment to a swarm scope
388+
// network. Update the container network settings accordingly.
389+
container.NetworkSettings.HasSwarmEndpoint = true
387390
return n, config, nil
388391
}
389392

@@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
492495
// on first network connecting.
493496
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
494497
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
498+
cleanOperationalData(nConf)
495499
if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
496500
return err
497501
}
@@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
512516
}
513517

514518
for i, epConf := range epConfigs {
519+
cleanOperationalData(epConf)
515520
if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil {
516521
return err
517522
}

daemon/daemon.go

+31-1
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,13 @@ func (daemon *Daemon) restore() error {
202202
// fixme: only if not running
203203
// get list of containers we need to restart
204204
if !c.IsRunning() && !c.IsPaused() {
205-
if daemon.configStore.AutoRestart && c.ShouldRestart() {
205+
// Do not autostart containers which
206+
// has endpoints in a swarm scope
207+
// network yet since the cluster is
208+
// not initialized yet. We will start
209+
// it after the cluster is
210+
// initialized.
211+
if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
206212
mapLock.Lock()
207213
restartContainers[c] = make(chan struct{})
208214
mapLock.Unlock()
@@ -346,6 +352,30 @@ func (daemon *Daemon) restore() error {
346352
return nil
347353
}
348354

355+
// RestartSwarmContainers restarts any autostart container which has a
356+
// swarm endpoint.
357+
func (daemon *Daemon) RestartSwarmContainers() {
358+
group := sync.WaitGroup{}
359+
for _, c := range daemon.List() {
360+
if !c.IsRunning() && !c.IsPaused() {
361+
// Autostart all the containers which has a
362+
// swarm endpoint now that the cluster is
363+
// initialized.
364+
if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint {
365+
group.Add(1)
366+
go func(c *container.Container) {
367+
defer group.Done()
368+
if err := daemon.containerStart(c, ""); err != nil {
369+
logrus.Error(err)
370+
}
371+
}(c)
372+
}
373+
}
374+
375+
}
376+
group.Wait()
377+
}
378+
349379
// waitForNetworks is used during daemon initialization when starting up containers
350380
// It ensures that all of a container's networks are available before the daemon tries to start the container.
351381
// In practice it just makes sure the discovery service is available for containers which use a network that require discovery.

daemon/network/settings.go

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ type Settings struct {
2121
SecondaryIPAddresses []networktypes.Address
2222
SecondaryIPv6Addresses []networktypes.Address
2323
IsAnonymousEndpoint bool
24+
HasSwarmEndpoint bool
2425
}
2526

2627
// EndpointSettings is a package local wrapper for

integration-cli/docker_cli_swarm_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) {
242242
c.Assert(err, checker.IsNil)
243243
c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777")
244244
}
245+
246+
func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) {
247+
d := s.AddDaemon(c, true, true)
248+
249+
out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo")
250+
c.Assert(err, checker.IsNil)
251+
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
252+
253+
out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top")
254+
c.Assert(err, checker.IsNil)
255+
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
256+
257+
out, err = d.Cmd("ps", "-q")
258+
c.Assert(err, checker.IsNil)
259+
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
260+
261+
d.Restart()
262+
263+
out, err = d.Cmd("ps", "-q")
264+
c.Assert(err, checker.IsNil)
265+
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
266+
}

0 commit comments

Comments
 (0)