Skip to content

Commit 800ca4f

Browse files
committed
Task stats endpoint needs to be populated immediately
to avoid changing behavior from streaming stats, we need to populate the stats endpoint immediately when the stats engine starts. So instead of jittering the first stats gather we need to just do it immediately.
1 parent 73bc2f0 commit 800ca4f

File tree

1 file changed

+36
-36
lines changed

1 file changed

+36
-36
lines changed

agent/dockerclient/dockerapi/docker_client.go

+36-36
Original file line numberDiff line numberDiff line change
@@ -1369,56 +1369,56 @@ func (dg *dockerGoClient) Stats(ctx context.Context, id string, inactivityTimeou
13691369
}()
13701370
} else {
13711371
seelog.Infof("DockerGoClient: Starting to Poll for metrics for container %s", id)
1372-
// firstStatC jitters the time at which containers ask for their first stat.
1373-
// We use this channel to 'seed' the queue with docker stats so that containers
1374-
// can publish metrics more quickly.
1375-
firstStatC := time.After(retry.AddJitter(time.Nanosecond, dg.config.PollingMetricsWaitDuration/2))
1376-
// sleeping here jitters the time at which the ticker is created, so that
1377-
// containers do not synchronize on calling the docker stats api.
1378-
// the max sleep is 80% of the polling interval so that we have a chance to
1379-
// get two stats in the first publishing interval.
1380-
time.Sleep(retry.AddJitter(time.Nanosecond, dg.config.PollingMetricsWaitDuration*8/10))
1381-
statPollTicker := time.NewTicker(dg.config.PollingMetricsWaitDuration)
1372+
13821373
go func() {
13831374
defer cancelRequest()
13841375
defer close(statsC)
1376+
// we need to start by getting container stats so that the task stats
1377+
// endpoint will be populated immediately.
1378+
stats, err := getContainerStatsNotStreamed(client, subCtx, id)
1379+
if err != nil {
1380+
errC <- err
1381+
return
1382+
}
1383+
statsC <- stats
1384+
1385+
// sleeping here jitters the time at which the ticker is created, so that
1386+
// containers do not synchronize on calling the docker stats api.
1387+
// the max sleep is 80% of the polling interval so that we have a chance to
1388+
// get two stats in the first publishing interval.
1389+
time.Sleep(retry.AddJitter(time.Nanosecond, dg.config.PollingMetricsWaitDuration*8/10))
1390+
statPollTicker := time.NewTicker(dg.config.PollingMetricsWaitDuration)
13851391
defer statPollTicker.Stop()
1386-
1387-
for {
1388-
// this select statement is waiting on either the stat polling ticker channel
1389-
// or the firstStat time.After channel to fire. firstStat will fire
1390-
// first and then afterwards we will always grab stats on the ticker.
1391-
select {
1392-
case _, ok := <-statPollTicker.C:
1393-
if !ok {
1394-
return
1395-
}
1396-
case <-firstStatC:
1397-
}
1398-
1399-
stream := false
1400-
resp, err = client.ContainerStats(subCtx, id, stream)
1401-
if err != nil {
1402-
errC <- fmt.Errorf("DockerGoClient: Unable to retrieve stats for container %s: %v", id, err)
1403-
return
1404-
}
1405-
1406-
decoder := json.NewDecoder(resp.Body)
1407-
data := new(types.StatsJSON)
1408-
err := decoder.Decode(data)
1392+
for range statPollTicker.C {
1393+
stats, err := getContainerStatsNotStreamed(client, subCtx, id)
14091394
if err != nil {
1410-
errC <- fmt.Errorf("DockerGoClient: Unable to decode stats for container %s: %v", id, err)
1395+
errC <- err
14111396
return
14121397
}
1413-
1414-
statsC <- data
1398+
statsC <- stats
14151399
}
14161400
}()
14171401
}
14181402

14191403
return statsC, errC
14201404
}
14211405

1406+
func getContainerStatsNotStreamed(client sdkclient.Client, ctx context.Context, id string) (*types.StatsJSON, error) {
1407+
resp, err := client.ContainerStats(ctx, id, false)
1408+
if err != nil {
1409+
return nil, fmt.Errorf("DockerGoClient: Unable to retrieve stats for container %s: %v", id, err)
1410+
}
1411+
1412+
decoder := json.NewDecoder(resp.Body)
1413+
stats := &types.StatsJSON{}
1414+
err = decoder.Decode(stats)
1415+
if err != nil {
1416+
return nil, fmt.Errorf("DockerGoClient: Unable to decode stats for container %s: %v", id, err)
1417+
}
1418+
1419+
return stats, nil
1420+
}
1421+
14221422
func (dg *dockerGoClient) RemoveImage(ctx context.Context, imageName string, timeout time.Duration) error {
14231423
ctx, cancel := context.WithTimeout(ctx, timeout)
14241424
defer cancel()

0 commit comments

Comments
 (0)