Skip to content

Commit 2879701

Browse files
committed
[EXPERIMENTAL] Integration Test on Swarm
This commit adds contrib/integration-cli-on-swarm/integration-cli-on-swarm.sh, which enables IT to be running in parallel, using Swarm-mode and Funker. Please refer to contrib/integration-cli-on-swarm/README.md The test takes almost 5 to 6 minutes, with 10 n1-standard-4 GCE instances. $ ./contrib/integration-cli-on-swarm/integration-cli-on-swarm.sh --push-worker-image example.gcr.io/foo/bar --replicas 30 --shuffle 2016/12/29 08:32:15 Loaded 1618 tests (30 chunks) 2016/12/29 08:32:15 Executing 30 chunks in parallel, against "integration-cli-worker" 2016/12/29 08:32:15 Executing chunk 0 (contains 54 test filters) .. 2016/12/29 08:34:34 Finished chunk 28 [1/30] with 54 test filters in 2m19.098068003s, code=0. 2016/12/29 08:34:38 Finished chunk 12 [2/30] with 54 test filters in 2m23.088569511s, code=0. 2016/12/29 08:34:48 Finished chunk 10 [3/30] with 54 test filters in 2m33.880679079s, code=0. 2016/12/29 08:34:54 Finished chunk 20 [4/30] with 54 test filters in 2m39.973747028s, code=0. 2016/12/29 08:35:11 Finished chunk 18 [5/30] with 54 test filters in 2m56.28384361s, code=0. 2016/12/29 08:35:11 Finished chunk 29 [6/30] with 52 test filters in 2m56.54047088s, code=0. 2016/12/29 08:35:15 Finished chunk 1 [7/30] with 54 test filters in 3m0.285044426s, code=0. 2016/12/29 08:35:22 Finished chunk 6 [8/30] with 54 test filters in 3m7.211775338s, code=0. 2016/12/29 08:35:24 Finished chunk 25 [9/30] with 54 test filters in 3m9.938413009s, code=0. 2016/12/29 08:35:30 Finished chunk 27 [10/30] with 54 test filters in 3m15.219834368s, code=0. 2016/12/29 08:35:36 Finished chunk 9 [11/30] with 54 test filters in 3m21.615434162s, code=0. 2016/12/29 08:35:41 Finished chunk 13 [12/30] with 54 test filters in 3m26.576907401s, code=0. 2016/12/29 08:35:45 Finished chunk 17 [13/30] with 54 test filters in 3m30.290752537s, code=0. 2016/12/29 08:35:53 Finished chunk 2 [14/30] with 54 test filters in 3m38.148423321s, code=0. 2016/12/29 08:35:55 Finished chunk 24 [15/30] with 54 test filters in 3m40.09669137s, code=0. 2016/12/29 08:35:57 Finished chunk 8 [16/30] with 54 test filters in 3m42.299945108s, code=0. 2016/12/29 08:35:57 Finished chunk 22 [17/30] with 54 test filters in 3m42.946558809s, code=0. 2016/12/29 08:35:59 Finished chunk 23 [18/30] with 54 test filters in 3m44.232557165s, code=0. 2016/12/29 08:36:02 Finished chunk 3 [19/30] with 54 test filters in 3m47.112051358s, code=0. 2016/12/29 08:36:11 Finished chunk 15 [20/30] with 54 test filters in 3m56.340656645s, code=0. 2016/12/29 08:36:11 Finished chunk 11 [21/30] with 54 test filters in 3m56.882401231s, code=0. 2016/12/29 08:36:22 Finished chunk 19 [22/30] with 54 test filters in 4m7.551093516s, code=0. 2016/12/29 08:36:23 Finished chunk 21 [23/30] with 54 test filters in 4m8.221093446s, code=0. 2016/12/29 08:36:25 Finished chunk 16 [24/30] with 54 test filters in 4m10.450451705s, code=0. 2016/12/29 08:36:27 Finished chunk 5 [25/30] with 54 test filters in 4m12.162272692s, code=0. 2016/12/29 08:36:28 Finished chunk 14 [26/30] with 54 test filters in 4m13.977801031s, code=0. 2016/12/29 08:36:29 Finished chunk 0 [27/30] with 54 test filters in 4m14.34086812s, code=0. 2016/12/29 08:36:49 Finished chunk 26 [28/30] with 54 test filters in 4m34.437085539s, code=0. 2016/12/29 08:37:14 Finished chunk 7 [29/30] with 54 test filters in 4m59.22902721s, code=0. 2016/12/29 08:37:20 Finished chunk 4 [30/30] with 54 test filters in 5m5.103469214s, code=0. 2016/12/29 08:37:20 Executed 30 chunks in 5m5.104379119s. PASS: 30, FAIL: 0. Signed-off-by: Akihiro Suda <[email protected]>
1 parent 7fb83eb commit 2879701

File tree

24 files changed

+1486
-5
lines changed

24 files changed

+1486
-5
lines changed

.dockerignore

+2
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ bundles
33
vendor/pkg
44
.go-pkg-cache
55
.git
6+
hack/integration-cli-on-swarm/integration-cli-on-swarm
7+

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ man/man1
3131
man/man5
3232
man/man8
3333
vendor/pkg/
34+
hack/integration-cli-on-swarm/integration-cli-on-swarm

Makefile

+22-2
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ DOCKER_MOUNT := $(if $(DOCKER_MOUNT),$(DOCKER_MOUNT),-v /go/src/github.com/docke
5858
DOCKER_CONTAINER_NAME := $(if $(CONTAINER_NAME),--name $(CONTAINER_NAME),)
5959

6060
# enable package cache if DOCKER_INCREMENTAL_BINARY and DOCKER_MOUNT (i.e.DOCKER_HOST) are set
61-
PKGCACHE_MAP := gopath:/go/pkg goroot-linux_amd64_netgo:/usr/local/go/pkg/linux_amd64_netgo
61+
PKGCACHE_MAP := gopath:/go/pkg goroot-linux_amd64:/usr/local/go/pkg/linux_amd64 goroot-linux_amd64_netgo:/usr/local/go/pkg/linux_amd64_netgo
6262
PKGCACHE_VOLROOT := dockerdev-go-pkg-cache
6363
PKGCACHE_VOL := $(if $(PKGCACHE_DIR),$(CURDIR)/$(PKGCACHE_DIR)/,$(PKGCACHE_VOLROOT)-)
64-
DOCKER_MOUNT := $(if $(DOCKER_INCREMENTAL_BINARY),$(DOCKER_MOUNT) $(shell echo $(PKGCACHE_MAP) | sed -E 's@([^ ]*)@-v "$(PKGCACHE_VOL)\1"@g'),$(DOCKER_MOUNT))
64+
DOCKER_MOUNT_PKGCACHE := $(if $(DOCKER_INCREMENTAL_BINARY),$(shell echo $(PKGCACHE_MAP) | sed -E 's@([^ ]*)@-v "$(PKGCACHE_VOL)\1"@g'),)
65+
DOCKER_MOUNT := $(DOCKER_MOUNT) $(DOCKER_MOUNT_PKGCACHE)
6566

6667
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
6768
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
@@ -74,6 +75,9 @@ export BUILD_APT_MIRROR
7475

7576
SWAGGER_DOCS_PORT ?= 9000
7677

78+
INTEGRATION_CLI_MASTER_IMAGE := $(if $(INTEGRATION_CLI_MASTER_IMAGE), $(INTEGRATION_CLI_MASTER_IMAGE), integration-cli-master)
79+
INTEGRATION_CLI_WORKER_IMAGE := $(if $(INTEGRATION_CLI_WORKER_IMAGE), $(INTEGRATION_CLI_WORKER_IMAGE), integration-cli-worker)
80+
7781
# if this session isn't interactive, then we don't want to allocate a
7882
# TTY, which would fail, but if it is interactive, we do want to attach
7983
# so that the user can send e.g. ^C through.
@@ -172,3 +176,19 @@ swagger-docs: ## preview the API documentation
172176
-e 'REDOC_OPTIONS=hide-hostname="true" lazy-rendering' \
173177
-p $(SWAGGER_DOCS_PORT):80 \
174178
bfirsh/redoc:1.6.2
179+
180+
build-integration-cli-on-swarm: build ## build images and binary for running integration-cli on Swarm in parallel
181+
@echo "Building hack/integration-cli-on-swarm"
182+
go build -o ./hack/integration-cli-on-swarm/integration-cli-on-swarm ./hack/integration-cli-on-swarm/host
183+
@echo "Building $(INTEGRATION_CLI_MASTER_IMAGE)"
184+
docker build -t $(INTEGRATION_CLI_MASTER_IMAGE) hack/integration-cli-on-swarm/agent
185+
# For worker, we don't use `docker build` so as to enable DOCKER_INCREMENTAL_BINARY and so on
186+
@echo "Building $(INTEGRATION_CLI_WORKER_IMAGE) from $(DOCKER_IMAGE)"
187+
$(eval tmp := integration-cli-worker-tmp)
188+
# We mount pkgcache, but not bundle (bundle needs to be baked into the image)
189+
# For avoiding bakings DOCKER_GRAPHDRIVER and so on to image, we cannot use $(DOCKER_ENVS) here
190+
docker run -t -d --name $(tmp) -e DOCKER_GITCOMMIT -e BUILDFLAGS -e DOCKER_INCREMENTAL_BINARY --privileged $(DOCKER_MOUNT_PKGCACHE) $(DOCKER_IMAGE) top
191+
docker exec $(tmp) hack/make.sh build-integration-test-binary dynbinary
192+
docker exec $(tmp) go build -o /worker github.com/docker/docker/hack/integration-cli-on-swarm/agent/worker
193+
docker commit -c 'ENTRYPOINT ["/worker"]' $(tmp) $(INTEGRATION_CLI_WORKER_IMAGE)
194+
docker rm -f $(tmp)
+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Integration Testing on Swarm
2+
3+
IT on Swarm allows you to execute integration test in parallel across a Docker Swarm cluster
4+
5+
## Architecture
6+
7+
### Master service
8+
9+
- Works as a funker caller
10+
- Calls a worker funker (`-worker-service`) with a chunk of `-check.f` filter strings (passed as a file via `-input` flag, typically `/mnt/input`)
11+
12+
### Worker service
13+
14+
- Works as a funker callee
15+
- Executes an equivalent of `TESTFLAGS=-check.f TestFoo|TestBar|TestBaz ... make test-integration-cli` using the bind-mounted API socket (`docker.sock`)
16+
17+
### Client
18+
19+
- Controls master and workers via `docker stack`
20+
- No need to have a local daemon
21+
22+
Typically, the master and workers are supposed to be running on a cloud environment,
23+
while the client is supposed to be running on a laptop, e.g. Docker for Mac/Windows.
24+
25+
## Requirement
26+
27+
- Docker daemon 1.13 or later
28+
- Private registry for distributed execution with multiple nodes
29+
30+
## Usage
31+
32+
### Step 1: Prepare images
33+
34+
$ make build-integration-cli-on-swarm
35+
36+
Following environment variables are known to work in this step:
37+
38+
- `BUILDFLAGS`
39+
- `DOCKER_INCREMENTAL_BINARY`
40+
41+
### Step 2: Execute tests
42+
43+
$ ./hack/integration-cli-on-swarm/integration-cli-on-swarm -replicas 40 -push-worker-image YOUR_REGISTRY.EXAMPLE.COM/integration-cli-worker:latest
44+
45+
Following environment variables are known to work in this step:
46+
47+
- `DOCKER_GRAPHDRIVER`
48+
- `DOCKER_EXPERIMENTAL`
49+
50+
#### Flags
51+
52+
Basic flags:
53+
54+
- `-replicas N`: the number of worker service replicas. i.e. degree of parallelism.
55+
- `-chunks N`: the number of chunks. By default, `chunks` == `replicas`.
56+
- `-push-worker-image REGISTRY/IMAGE:TAG`: push the worker image to the registry. Note that if you have only single node and hence you do not need a private registry, you do not need to specify `-push-worker-image`.
57+
58+
Experimental flags for mitigating makespan nonuniformity:
59+
60+
- `-shuffle`: Shuffle the test filter strings
61+
62+
Flags for debugging IT on Swarm itself:
63+
64+
- `-rand-seed N`: the random seed. This flag is useful for deterministic replaying. By default(0), the timestamp is used.
65+
- `-filters-file FILE`: the file contains `-check.f` strings. By default, the file is automatically generated.
66+
- `-dry-run`: skip the actual workload
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# this Dockerfile is solely used for the master image.
2+
# Please refer to the top-level Makefile for the worker image.
3+
FROM golang:1.7
4+
ADD . /go/src/github.com/docker/docker/hack/integration-cli-on-swarm/agent
5+
RUN go build -o /master github.com/docker/docker/hack/integration-cli-on-swarm/agent/master
6+
ENTRYPOINT ["/master"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"log"
7+
"strings"
8+
"sync"
9+
"sync/atomic"
10+
"time"
11+
12+
"github.com/bfirsh/funker-go"
13+
"github.com/docker/docker/hack/integration-cli-on-swarm/agent/types"
14+
)
15+
16+
const (
17+
// funkerRetryTimeout is for the issue https://github.com/bfirsh/funker/issues/3
18+
// When all the funker replicas are busy in their own job, we cannot connect to funker.
19+
funkerRetryTimeout = 1 * time.Hour
20+
funkerRetryDuration = 1 * time.Second
21+
)
22+
23+
// ticker is needed for some CI (e.g., on Travis, job is aborted when no output emitted for 10 minutes)
24+
func ticker(d time.Duration) chan struct{} {
25+
t := time.NewTicker(d)
26+
stop := make(chan struct{})
27+
go func() {
28+
for {
29+
select {
30+
case <-t.C:
31+
log.Printf("tick (just for keeping CI job active) per %s", d.String())
32+
case <-stop:
33+
t.Stop()
34+
}
35+
}
36+
}()
37+
return stop
38+
}
39+
40+
func executeTests(funkerName string, testChunks [][]string) error {
41+
tickerStopper := ticker(9*time.Minute + 55*time.Second)
42+
defer func() {
43+
close(tickerStopper)
44+
}()
45+
begin := time.Now()
46+
log.Printf("Executing %d chunks in parallel, against %q", len(testChunks), funkerName)
47+
var wg sync.WaitGroup
48+
var passed, failed uint32
49+
for chunkID, tests := range testChunks {
50+
log.Printf("Executing chunk %d (contains %d test filters)", chunkID, len(tests))
51+
wg.Add(1)
52+
go func(chunkID int, tests []string) {
53+
defer wg.Done()
54+
chunkBegin := time.Now()
55+
result, err := executeTestChunkWithRetry(funkerName, types.Args{
56+
ChunkID: chunkID,
57+
Tests: tests,
58+
})
59+
if result.RawLog != "" {
60+
for _, s := range strings.Split(result.RawLog, "\n") {
61+
log.Printf("Log (chunk %d): %s", chunkID, s)
62+
}
63+
}
64+
if err != nil {
65+
log.Printf("Error while executing chunk %d: %v",
66+
chunkID, err)
67+
atomic.AddUint32(&failed, 1)
68+
} else {
69+
if result.Code == 0 {
70+
atomic.AddUint32(&passed, 1)
71+
} else {
72+
atomic.AddUint32(&failed, 1)
73+
}
74+
log.Printf("Finished chunk %d [%d/%d] with %d test filters in %s, code=%d.",
75+
chunkID, passed+failed, len(testChunks), len(tests),
76+
time.Now().Sub(chunkBegin), result.Code)
77+
}
78+
}(chunkID, tests)
79+
}
80+
wg.Wait()
81+
// TODO: print actual tests rather than chunks
82+
log.Printf("Executed %d chunks in %s. PASS: %d, FAIL: %d.",
83+
len(testChunks), time.Now().Sub(begin), passed, failed)
84+
if failed > 0 {
85+
return fmt.Errorf("%d chunks failed", failed)
86+
}
87+
return nil
88+
}
89+
90+
func executeTestChunk(funkerName string, args types.Args) (types.Result, error) {
91+
ret, err := funker.Call(funkerName, args)
92+
if err != nil {
93+
return types.Result{}, err
94+
}
95+
tmp, err := json.Marshal(ret)
96+
if err != nil {
97+
return types.Result{}, err
98+
}
99+
var result types.Result
100+
err = json.Unmarshal(tmp, &result)
101+
return result, err
102+
}
103+
104+
func executeTestChunkWithRetry(funkerName string, args types.Args) (types.Result, error) {
105+
begin := time.Now()
106+
for i := 0; time.Now().Sub(begin) < funkerRetryTimeout; i++ {
107+
result, err := executeTestChunk(funkerName, args)
108+
if err == nil {
109+
log.Printf("executeTestChunk(%q, %d) returned code %d in trial %d", funkerName, args.ChunkID, result.Code, i)
110+
return result, nil
111+
}
112+
if errorSeemsInteresting(err) {
113+
log.Printf("Error while calling executeTestChunk(%q, %d), will retry (trial %d): %v",
114+
funkerName, args.ChunkID, i, err)
115+
}
116+
// TODO: non-constant sleep
117+
time.Sleep(funkerRetryDuration)
118+
}
119+
return types.Result{}, fmt.Errorf("could not call executeTestChunk(%q, %d) in %v", funkerName, args.ChunkID, funkerRetryTimeout)
120+
}
121+
122+
// errorSeemsInteresting returns true if err does not seem about https://github.com/bfirsh/funker/issues/3
123+
func errorSeemsInteresting(err error) bool {
124+
boringSubstrs := []string{"connection refused", "connection reset by peer", "no such host", "transport endpoint is not connected", "no route to host"}
125+
errS := err.Error()
126+
for _, boringS := range boringSubstrs {
127+
if strings.Contains(errS, boringS) {
128+
return false
129+
}
130+
}
131+
return true
132+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"flag"
6+
"io/ioutil"
7+
"log"
8+
"strings"
9+
)
10+
11+
func main() {
12+
if err := xmain(); err != nil {
13+
log.Fatalf("fatal error: %v", err)
14+
}
15+
}
16+
17+
func xmain() error {
18+
workerService := flag.String("worker-service", "", "Name of worker service")
19+
chunks := flag.Int("chunks", 0, "Number of chunks")
20+
input := flag.String("input", "", "Path to input file")
21+
randSeed := flag.Int64("rand-seed", int64(0), "Random seed")
22+
shuffle := flag.Bool("shuffle", false, "Shuffle the input so as to mitigate makespan nonuniformity")
23+
flag.Parse()
24+
if *workerService == "" {
25+
return errors.New("worker-service unset")
26+
}
27+
if *chunks == 0 {
28+
return errors.New("chunks unset")
29+
}
30+
if *input == "" {
31+
return errors.New("input unset")
32+
}
33+
34+
tests, err := loadTests(*input)
35+
if err != nil {
36+
return err
37+
}
38+
testChunks := chunkTests(tests, *chunks, *shuffle, *randSeed)
39+
log.Printf("Loaded %d tests (%d chunks)", len(tests), len(testChunks))
40+
return executeTests(*workerService, testChunks)
41+
}
42+
43+
func chunkTests(tests []string, numChunks int, shuffle bool, randSeed int64) [][]string {
44+
// shuffling (experimental) mitigates makespan nonuniformity
45+
// Not sure this can cause some locality problem..
46+
if shuffle {
47+
shuffleStrings(tests, randSeed)
48+
}
49+
return chunkStrings(tests, numChunks)
50+
}
51+
52+
func loadTests(filename string) ([]string, error) {
53+
b, err := ioutil.ReadFile(filename)
54+
if err != nil {
55+
return nil, err
56+
}
57+
var tests []string
58+
for _, line := range strings.Split(string(b), "\n") {
59+
s := strings.TrimSpace(line)
60+
if s != "" {
61+
tests = append(tests, s)
62+
}
63+
}
64+
return tests, nil
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package main
2+
3+
import (
4+
"math/rand"
5+
)
6+
7+
// chunkStrings chunks the string slice
8+
func chunkStrings(x []string, numChunks int) [][]string {
9+
var result [][]string
10+
chunkSize := (len(x) + numChunks - 1) / numChunks
11+
for i := 0; i < len(x); i += chunkSize {
12+
ub := i + chunkSize
13+
if ub > len(x) {
14+
ub = len(x)
15+
}
16+
result = append(result, x[i:ub])
17+
}
18+
return result
19+
}
20+
21+
// shuffleStrings shuffles strings
22+
func shuffleStrings(x []string, seed int64) {
23+
r := rand.New(rand.NewSource(seed))
24+
for i := range x {
25+
j := r.Intn(i + 1)
26+
x[i], x[j] = x[j], x[i]
27+
}
28+
}

0 commit comments

Comments
 (0)