Skip to content

Commit 1986f20

Browse files
author
Heming Han
committed
use move tcs handler
1 parent 7e640a5 commit 1986f20

File tree

2 files changed

+126
-26
lines changed

2 files changed

+126
-26
lines changed

agent/app/agent.go

+11-26
Original file line numberDiff line numberDiff line change
@@ -20,51 +20,49 @@ import (
2020
"fmt"
2121
"time"
2222

23-
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
24-
"github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"
25-
26-
dockerdoctor "github.com/aws/amazon-ecs-agent/agent/doctor" // for Docker specific container instance health checks
27-
"github.com/aws/amazon-ecs-agent/agent/eni/watcher"
28-
"github.com/aws/amazon-ecs-agent/ecs-agent/doctor"
2923
"github.com/aws/aws-sdk-go/aws/awserr"
3024

31-
"github.com/aws/amazon-ecs-agent/agent/credentials/instancecreds"
32-
"github.com/aws/amazon-ecs-agent/agent/engine/execcmd"
33-
"github.com/aws/amazon-ecs-agent/agent/metrics"
34-
3525
acshandler "github.com/aws/amazon-ecs-agent/agent/acs/handler"
3626
"github.com/aws/amazon-ecs-agent/agent/api"
3727
"github.com/aws/amazon-ecs-agent/agent/api/ecsclient"
3828
"github.com/aws/amazon-ecs-agent/agent/app/factory"
3929
"github.com/aws/amazon-ecs-agent/agent/config"
4030
"github.com/aws/amazon-ecs-agent/agent/containermetadata"
31+
"github.com/aws/amazon-ecs-agent/agent/credentials/instancecreds"
4132
"github.com/aws/amazon-ecs-agent/agent/data"
4233
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
4334
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
4435
"github.com/aws/amazon-ecs-agent/agent/dockerclient/sdkclientfactory"
36+
dockerdoctor "github.com/aws/amazon-ecs-agent/agent/doctor" // for Docker specific container instance health checks
4537
"github.com/aws/amazon-ecs-agent/agent/ec2"
4638
"github.com/aws/amazon-ecs-agent/agent/ecs_client/model/ecs"
4739
"github.com/aws/amazon-ecs-agent/agent/ecscni"
4840
"github.com/aws/amazon-ecs-agent/agent/engine"
4941
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
42+
"github.com/aws/amazon-ecs-agent/agent/engine/execcmd"
5043
engineserviceconnect "github.com/aws/amazon-ecs-agent/agent/engine/serviceconnect"
5144
"github.com/aws/amazon-ecs-agent/agent/eni/pause"
45+
"github.com/aws/amazon-ecs-agent/agent/eni/watcher"
5246
"github.com/aws/amazon-ecs-agent/agent/eventhandler"
5347
"github.com/aws/amazon-ecs-agent/agent/handlers"
48+
"github.com/aws/amazon-ecs-agent/agent/metrics"
5449
"github.com/aws/amazon-ecs-agent/agent/sighandlers"
5550
"github.com/aws/amazon-ecs-agent/agent/sighandlers/exitcodes"
5651
"github.com/aws/amazon-ecs-agent/agent/statemanager"
5752
"github.com/aws/amazon-ecs-agent/agent/stats"
53+
"github.com/aws/amazon-ecs-agent/agent/stats/reporter"
5854
"github.com/aws/amazon-ecs-agent/agent/taskresource"
59-
tcshandler "github.com/aws/amazon-ecs-agent/agent/tcs/handler"
6055
"github.com/aws/amazon-ecs-agent/agent/utils"
6156
"github.com/aws/amazon-ecs-agent/agent/utils/loader"
6257
"github.com/aws/amazon-ecs-agent/agent/utils/mobypkgwrapper"
6358
"github.com/aws/amazon-ecs-agent/agent/version"
6459
acsclient "github.com/aws/amazon-ecs-agent/ecs-agent/acs/client"
6560
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
6661
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
62+
"github.com/aws/amazon-ecs-agent/ecs-agent/doctor"
6763
"github.com/aws/amazon-ecs-agent/ecs-agent/eventstream"
64+
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
65+
"github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"
6866
"github.com/aws/amazon-ecs-agent/ecs-agent/tcs/model/ecstcs"
6967
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"
7068
"github.com/aws/aws-sdk-go/aws"
@@ -866,20 +864,6 @@ func (agent *ecsAgent) startAsyncRoutines(
866864
// Start sending events to the backend
867865
go eventhandler.HandleEngineEvents(agent.ctx, taskEngine, client, taskHandler, attachmentEventHandler)
868866

869-
telemetrySessionParams := tcshandler.TelemetrySessionParams{
870-
Ctx: agent.ctx,
871-
CredentialProvider: agent.credentialProvider,
872-
Cfg: agent.cfg,
873-
ContainerInstanceArn: agent.containerInstanceARN,
874-
DeregisterInstanceEventStream: deregisterInstanceEventStream,
875-
ECSClient: client,
876-
TaskEngine: taskEngine,
877-
StatsEngine: statsEngine,
878-
MetricsChannel: telemetryMessages,
879-
HealthChannel: healthMessages,
880-
Doctor: doctor,
881-
}
882-
883867
err := statsEngine.MustInit(agent.ctx, taskEngine, agent.cfg.Cluster, agent.containerInstanceARN)
884868
if err != nil {
885869
seelog.Warnf("Error initializing metrics engine: %v", err)
@@ -888,7 +872,8 @@ func (agent *ecsAgent) startAsyncRoutines(
888872
go statsEngine.StartMetricsPublish()
889873

890874
// Start metrics session in a go routine
891-
go tcshandler.StartMetricsSession(&telemetrySessionParams)
875+
go reporter.StartSession(agent.ctx, agent.containerInstanceARN, agent.credentialProvider, agent.cfg,
876+
deregisterInstanceEventStream, client, taskEngine, telemetryMessages, healthMessages, doctor)
892877
}
893878

894879
func (agent *ecsAgent) startSpotInstanceDrainingPoller(ctx context.Context, client api.ECSClient) {

agent/stats/reporter/reporter.go

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package reporter
2+
3+
import (
4+
"context"
5+
"io"
6+
"time"
7+
8+
"github.com/aws/amazon-ecs-agent/agent/api"
9+
"github.com/aws/amazon-ecs-agent/agent/config"
10+
"github.com/aws/amazon-ecs-agent/agent/engine"
11+
"github.com/aws/amazon-ecs-agent/agent/version"
12+
"github.com/aws/amazon-ecs-agent/ecs-agent/doctor"
13+
"github.com/aws/amazon-ecs-agent/ecs-agent/eventstream"
14+
tcshandler "github.com/aws/amazon-ecs-agent/ecs-agent/tcs/handler"
15+
"github.com/aws/amazon-ecs-agent/ecs-agent/tcs/model/ecstcs"
16+
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"
17+
"github.com/aws/amazon-ecs-agent/ecs-agent/wsclient"
18+
"github.com/aws/aws-sdk-go/aws/credentials"
19+
"github.com/cihub/seelog"
20+
"github.com/pkg/errors"
21+
)
22+
23+
const (
24+
// The maximum time to wait between heartbeats without disconnecting
25+
defaultHeartbeatTimeout = 1 * time.Minute
26+
defaultHeartbeatJitter = 1 * time.Minute
27+
// Default websocket client disconnection timeout initiated by agent
28+
defaultDisconnectionTimeout = 15 * time.Minute
29+
defaultDisconnectionJitter = 30 * time.Minute
30+
)
31+
32+
func StartSession(
33+
ctx context.Context,
34+
containerInstanceArn string,
35+
credentialProvider *credentials.Credentials,
36+
cfg *config.Config,
37+
deregisterInstanceEventStream *eventstream.EventStream,
38+
ecsClient api.ECSClient,
39+
taskEngine engine.TaskEngine,
40+
metricsChannel <-chan ecstcs.TelemetryMessage,
41+
healthChannel <-chan ecstcs.HealthMessage,
42+
doctor *doctor.Doctor) {
43+
ok, cfgParseErr := isContainerHealthMetricsDisabled(cfg)
44+
if cfgParseErr != nil {
45+
seelog.Warnf("Error starting metrics session: %v", cfgParseErr)
46+
return
47+
}
48+
if ok {
49+
seelog.Warnf("Metrics were disabled, not starting the telemetry session")
50+
return
51+
}
52+
53+
backoff := retry.NewExponentialBackoff(time.Second, 1*time.Minute, 0.2, 2)
54+
params := tcshandler.TelemetrySessionParams{
55+
Ctx: ctx,
56+
ContainerInstanceArn: containerInstanceArn,
57+
Cluster: cfg.Cluster,
58+
DisableContainerHealthMetrics: cfg.DisableMetrics.Enabled(),
59+
CredentialProvider: credentialProvider,
60+
Cfg: &wsclient.WSClientMinAgentConfig{
61+
AWSRegion: cfg.AWSRegion,
62+
AcceptInsecureCert: cfg.AcceptInsecureCert,
63+
DockerEndpoint: cfg.DockerEndpoint,
64+
IsDocker: true,
65+
},
66+
DeregisterInstanceEventStream: deregisterInstanceEventStream,
67+
HeartbeatTimeout: defaultHeartbeatTimeout,
68+
HeartbeatJitterMax: defaultHeartbeatJitter,
69+
DisconnectTimeout: defaultDisconnectionTimeout,
70+
DisconnectJitterMax: defaultDisconnectionJitter,
71+
MetricsChannel: metricsChannel,
72+
HealthChannel: healthChannel,
73+
Doctor: doctor,
74+
}
75+
for {
76+
tcsError := startTelemetrySessionAttempt(&params, containerInstanceArn, taskEngine, ecsClient)
77+
if tcsError == nil || tcsError == io.EOF {
78+
seelog.Info("TCS Websocket connection closed for a valid reason")
79+
backoff.Reset()
80+
} else {
81+
seelog.Errorf("Error: lost websocket connection with ECS Telemetry service (TCS): %v", tcsError)
82+
params.Time().Sleep(backoff.Duration())
83+
}
84+
select {
85+
case <-params.Ctx.Done():
86+
seelog.Info("TCS session exited cleanly.")
87+
return
88+
default:
89+
}
90+
}
91+
}
92+
93+
func startTelemetrySessionAttempt(params *tcshandler.TelemetrySessionParams,
94+
containerInstanceArn string, taskEngine engine.TaskEngine, ecsClient api.ECSClient) error {
95+
params.AgentVersion = version.Version
96+
params.AgentHash = version.GitHashString()
97+
if dockerVersion, getVersionErr := taskEngine.Version(); getVersionErr == nil {
98+
params.ContainerRuntime = tcshandler.ContainerRuntimeDocker
99+
params.ContainerRuntimeVersion = dockerVersion
100+
}
101+
tcsEndpoint, err := ecsClient.DiscoverTelemetryEndpoint(containerInstanceArn)
102+
if err != nil {
103+
seelog.Errorf("tcs: unable to discover poll endpoint: %v", err)
104+
}
105+
params.Endpoint = tcsEndpoint
106+
return tcshandler.StartMetricsSession(params)
107+
}
108+
109+
func isContainerHealthMetricsDisabled(cfg *config.Config) (bool, error) {
110+
if cfg != nil {
111+
return cfg.DisableMetrics.Enabled() && cfg.DisableDockerHealthCheck.Enabled(), nil
112+
113+
}
114+
return false, errors.New("Config is empty in the tcs session parameter")
115+
}

0 commit comments

Comments
 (0)