Skip to content

Commit d647b2d

Browse files
Add envoy proxy liveness checks (cloudfoundry#110)
Co-authored-by: Plamen Bardarov <[email protected]>
1 parent 974aef6 commit d647b2d

File tree

3 files changed

+161
-5
lines changed

3 files changed

+161
-5
lines changed

depot/transformer/transformer.go

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package transformer
22

33
import (
44
"bytes"
5+
"code.cloudfoundry.org/lager/v3"
56
"errors"
67
"fmt"
78
"os"
@@ -19,7 +20,6 @@ import (
1920
"code.cloudfoundry.org/executor/depot/steps"
2021
"code.cloudfoundry.org/executor/depot/uploader"
2122
"code.cloudfoundry.org/garden"
22-
"code.cloudfoundry.org/lager/v3"
2323
"code.cloudfoundry.org/workpool"
2424
"github.com/tedsuo/ifrit"
2525
)
@@ -63,8 +63,10 @@ type transformer struct {
6363
gracefulShutdownInterval time.Duration
6464
healthCheckWorkPool *workpool.WorkPool
6565

66-
useContainerProxy bool
67-
drainWait time.Duration
66+
useContainerProxy bool
67+
drainWait time.Duration
68+
enableContainerProxyHealthChecks bool
69+
proxyHealthCheckInterval time.Duration
6870

6971
postSetupHook []string
7072
postSetupUser string
@@ -93,6 +95,13 @@ func WithContainerProxy(drainWait time.Duration) Option {
9395
}
9496
}
9597

98+
func WithProxyLivenessChecks(interval time.Duration) Option {
99+
return func(t *transformer) {
100+
t.enableContainerProxyHealthChecks = true
101+
t.proxyHealthCheckInterval = interval
102+
}
103+
}
104+
96105
func WithPostSetupHook(user string, hook []string) Option {
97106
return func(t *transformer) {
98107
t.postSetupUser = user
@@ -443,11 +452,13 @@ func (t *transformer) StepsRunner(
443452
}
444453

445454
var proxyStartupChecks []ifrit.Runner
455+
var proxyLivenessChecks []ifrit.Runner
446456

447457
if t.useContainerProxy && t.useDeclarativeHealthCheck {
448458
envoyStartupLogger := logger.Session("envoy-startup-check")
459+
envoyLivenessLogger := logger.Session("envoy-liveness-check")
449460

450-
for idx, p := range config.ProxyTLSPorts {
461+
for idx, port := range config.ProxyTLSPorts {
451462
// add envoy startup checks
452463
startupSidecarName := fmt.Sprintf("%s-envoy-startup-healthcheck-%d", gardenContainer.Handle(), idx)
453464

@@ -457,7 +468,7 @@ func (t *transformer) StepsRunner(
457468
config.BindMounts,
458469
"",
459470
startupSidecarName,
460-
int(p),
471+
int(port),
461472
DefaultDeclarativeHealthcheckRequestTimeout,
462473
executor.TCPCheck,
463474
executor.IsStartupCheck,
@@ -467,6 +478,30 @@ func (t *transformer) StepsRunner(
467478
config.MetronClient,
468479
false,
469480
)
481+
482+
if t.enableContainerProxyHealthChecks {
483+
livenessSidecarName := fmt.Sprintf("%s-envoy-liveness-healthcheck-%d", gardenContainer.Handle(), idx)
484+
485+
livenessStep := t.createCheck(
486+
&container,
487+
gardenContainer,
488+
config.BindMounts,
489+
"",
490+
livenessSidecarName,
491+
int(port),
492+
DefaultDeclarativeHealthcheckRequestTimeout,
493+
executor.TCPCheck,
494+
executor.IsLivenessCheck,
495+
t.proxyHealthCheckInterval,
496+
envoyLivenessLogger,
497+
"instance proxy health check failed",
498+
config.MetronClient,
499+
t.emitHealthCheckMetrics,
500+
)
501+
502+
proxyLivenessChecks = append(proxyLivenessChecks, livenessStep)
503+
}
504+
470505
proxyStartupChecks = append(proxyStartupChecks, step)
471506
}
472507
}
@@ -479,8 +514,10 @@ func (t *transformer) StepsRunner(
479514
logStreamer,
480515
config.BindMounts,
481516
proxyStartupChecks,
517+
proxyLivenessChecks,
482518
config.MetronClient,
483519
)
520+
484521
substeps = append(substeps, monitor)
485522
}
486523

@@ -804,6 +841,7 @@ func (t *transformer) transformCheckDefinition(
804841
logstreamer log_streamer.LogStreamer,
805842
bindMounts []garden.BindMount,
806843
proxyStartupChecks []ifrit.Runner,
844+
proxyLivenessChecks []ifrit.Runner,
807845
metronClient loggingclient.IngressClient,
808846
) ifrit.Runner {
809847
var startupChecks []ifrit.Runner
@@ -913,6 +951,7 @@ func (t *transformer) transformCheckDefinition(
913951
}
914952

915953
startupCheck := steps.NewParallel(append(proxyStartupChecks, startupChecks...))
954+
livenessChecks = append(livenessChecks, proxyLivenessChecks...)
916955
livenessCheck := steps.NewCodependent(livenessChecks, false, false)
917956

918957
return steps.NewHealthCheckStep(

depot/transformer/transformer_test.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ var _ = Describe("Transformer", func() {
670670
Context("and container proxy is enabled", func() {
671671
BeforeEach(func() {
672672
options = append(options, transformer.WithContainerProxy(time.Second))
673+
options = append(options, transformer.WithProxyLivenessChecks(time.Second))
673674
cfg.BindMounts = append(cfg.BindMounts, garden.BindMount{
674675
Origin: garden.BindMountOriginHost,
675676
SrcPath: declarativeHealthcheckSrcPath,
@@ -1504,6 +1505,112 @@ var _ = Describe("Transformer", func() {
15041505
}))
15051506
})
15061507

1508+
Context("and container proxy is enabled", func() {
1509+
var (
1510+
otherStartupProcess *gardenfakes.FakeProcess
1511+
otherStartupCh chan int
1512+
otherLivenessProcess *gardenfakes.FakeProcess
1513+
otherLivenessCh chan int
1514+
)
1515+
1516+
BeforeEach(func() {
1517+
options = append(options, transformer.WithContainerProxy(time.Second))
1518+
cfg.ProxyTLSPorts = []uint16{61001}
1519+
1520+
otherStartupCh = make(chan int)
1521+
otherStartupProcess = makeProcess(otherStartupCh)
1522+
1523+
otherLivenessCh = make(chan int)
1524+
otherLivenessProcess = makeProcess(otherLivenessCh)
1525+
1526+
healthcheckCallCount := int64(0)
1527+
1528+
gardenContainer.RunStub = func(spec garden.ProcessSpec, io garden.ProcessIO) (process garden.Process, err error) {
1529+
defer GinkgoRecover()
1530+
// get rid of race condition caused by write inside the BeforeEach
1531+
processLock.Lock()
1532+
defer processLock.Unlock()
1533+
1534+
switch spec.Path {
1535+
case "/action/path":
1536+
return actionProcess, nil
1537+
case filepath.Join(transformer.HealthCheckDstPath, "healthcheck"):
1538+
oldCount := atomic.AddInt64(&healthcheckCallCount, 1)
1539+
switch oldCount {
1540+
case 1:
1541+
return startupProcess, nil
1542+
case 2:
1543+
return otherStartupProcess, nil
1544+
case 3:
1545+
return livenessProcess, nil
1546+
case 4:
1547+
return otherLivenessProcess, nil
1548+
}
1549+
return livenessProcess, nil
1550+
case "/monitor/path":
1551+
return monitorProcess, nil
1552+
}
1553+
1554+
err = errors.New("")
1555+
Fail("unexpected executable path: " + spec.Path)
1556+
return
1557+
}
1558+
})
1559+
1560+
JustBeforeEach(func() {
1561+
otherStartupCh <- 0
1562+
})
1563+
1564+
AfterEach(func() {
1565+
close(otherStartupCh)
1566+
close(otherLivenessCh)
1567+
})
1568+
1569+
Context("and proxy liveness check is enabled", func() {
1570+
BeforeEach(func() {
1571+
options = append(options, transformer.WithProxyLivenessChecks(time.Second*30))
1572+
})
1573+
1574+
It("starts the proxy liveness check", func() {
1575+
Eventually(gardenContainer.RunCallCount).Should(Equal(5))
1576+
var ids []string
1577+
var args [][]string
1578+
for i := 0; i < gardenContainer.RunCallCount(); i++ {
1579+
spec, _ := gardenContainer.RunArgsForCall(i)
1580+
ids = append(ids, spec.ID)
1581+
args = append(args, spec.Args)
1582+
}
1583+
1584+
Expect(ids).To(ContainElement(fmt.Sprintf("%s-%s", gardenContainer.Handle(), "envoy-liveness-healthcheck-0")))
1585+
Expect(args).To(ContainElement([]string{
1586+
"-port=61001",
1587+
"-timeout=1000ms",
1588+
"-liveness-interval=30s",
1589+
}))
1590+
})
1591+
})
1592+
1593+
Context("and proxy liveness check is disabled", func() {
1594+
It("does not start the proxy liveness check", func() {
1595+
Eventually(gardenContainer.RunCallCount).Should(Equal(4))
1596+
var ids []string
1597+
var args [][]string
1598+
for i := 0; i < gardenContainer.RunCallCount(); i++ {
1599+
spec, _ := gardenContainer.RunArgsForCall(i)
1600+
ids = append(ids, spec.ID)
1601+
args = append(args, spec.Args)
1602+
}
1603+
1604+
Expect(ids).To(Not(ContainElement(fmt.Sprintf("%s-%s", gardenContainer.Handle(), "envoy-liveness-healthcheck-0"))))
1605+
Expect(args).To(Not(ContainElement([]string{
1606+
"-port=61001",
1607+
"-timeout=1000ms",
1608+
"-liveness-interval=30s",
1609+
})))
1610+
})
1611+
})
1612+
})
1613+
15071614
Context("when optional values are not provided in liveness check defintion", func() {
15081615
BeforeEach(func() {
15091616
container.CheckDefinition = &models.CheckDefinition{

initializer/initializer.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,14 @@ type ExecutorConfig struct {
102102
DeleteWorkPoolSize int `json:"delete_work_pool_size,omitempty"`
103103
DiskMB string `json:"disk_mb,omitempty"`
104104
EnableContainerProxy bool `json:"enable_container_proxy,omitempty"`
105+
EnableContainerProxyHealthChecks bool `json:"enable_container_proxy_healthcheck,omitempty"`
105106
EnableDeclarativeHealthcheck bool `json:"enable_declarative_healthcheck,omitempty"`
106107
EnableHealtcheckMetrics bool `json:"enable_healthcheck_metrics,omitempty"`
107108
EnableUnproxiedPortMappings bool `json:"enable_unproxied_port_mappings"`
108109
EnvoyConfigRefreshDelay durationjson.Duration `json:"envoy_config_refresh_delay"`
109110
EnvoyConfigReloadDuration durationjson.Duration `json:"envoy_config_reload_duration"`
110111
EnvoyDrainTimeout durationjson.Duration `json:"envoy_drain_timeout,omitempty"`
112+
ProxyHealthCheckInterval durationjson.Duration `json:"proxy_healthcheck_interval,omitempty"`
111113
ExportNetworkEnvVars bool `json:"export_network_env_vars,omitempty"` // DEPRECATED. Kept around for dusts compatability
112114
GardenAddr string `json:"garden_addr,omitempty"`
113115
GardenHealthcheckCommandRetryPause durationjson.Duration `json:"garden_healthcheck_command_retry_pause,omitempty"`
@@ -264,6 +266,8 @@ func Initialize(
264266
sidecarRootFSPath,
265267
config.EnableContainerProxy,
266268
time.Duration(config.EnvoyDrainTimeout),
269+
config.EnableContainerProxyHealthChecks,
270+
time.Duration(config.ProxyHealthCheckInterval),
267271
)
268272

269273
hub := event.NewHub()
@@ -568,6 +572,8 @@ func initializeTransformer(
568572
declarativeHealthcheckRootFS string,
569573
enableContainerProxy bool,
570574
drainWait time.Duration,
575+
enableProxyHealthChecks bool,
576+
proxyHealthCheckInterval time.Duration,
571577
) transformer.Transformer {
572578
var options []transformer.Option
573579
compressor := compressor.NewTgz()
@@ -584,6 +590,10 @@ func initializeTransformer(
584590

585591
if enableContainerProxy {
586592
options = append(options, transformer.WithContainerProxy(drainWait))
593+
594+
if enableProxyHealthChecks {
595+
options = append(options, transformer.WithProxyLivenessChecks(proxyHealthCheckInterval))
596+
}
587597
}
588598

589599
options = append(options, transformer.WithPostSetupHook(postSetupUser, postSetupHook))

0 commit comments

Comments
 (0)