From e09091858f768778c79a5ff213d05af0e44cab43 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 31 Dec 2025 16:15:31 -0600 Subject: [PATCH 01/22] Add dumb scale test agent --- client/internal/clientcommon.go | 6 +- internal/examples/scale/README.md | 26 ++++++ internal/examples/scale/main.go | 107 +++++++++++++++++++++++ internal/examples/scale/scale/agent.go | 108 ++++++++++++++++++++++++ internal/examples/scale/scale/logger.go | 30 +++++++ internal/examples/server/data/agent.go | 19 +++-- 6 files changed, 287 insertions(+), 9 deletions(-) create mode 100644 internal/examples/scale/README.md create mode 100644 internal/examples/scale/main.go create mode 100644 internal/examples/scale/scale/agent.go create mode 100644 internal/examples/scale/scale/logger.go diff --git a/client/internal/clientcommon.go b/client/internal/clientcommon.go index 987e759b..09600d8c 100644 --- a/client/internal/clientcommon.go +++ b/client/internal/clientcommon.go @@ -142,8 +142,10 @@ func (c *ClientCommon) PrepareStart( } } - if err := c.ClientSyncedState.SetRemoteConfigStatus(settings.RemoteConfigStatus); err != nil { - return err + if c.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_ReportsRemoteConfig) { + if err := c.ClientSyncedState.SetRemoteConfigStatus(settings.RemoteConfigStatus); err != nil { + return err + } } var packageStatuses *protobufs.PackageStatuses diff --git a/internal/examples/scale/README.md b/internal/examples/scale/README.md new file mode 100644 index 00000000..233df9d8 --- /dev/null +++ b/internal/examples/scale/README.md @@ -0,0 +1,26 @@ +# scale + +Scale provides dumb agents to scale test an OpAMP server. + +Websocket and HTTP servers are supported, but all agents must use the same connection type. +Each agent uses it's own OpAMP agent client, and runs in a goroutine. + +The main driver logs to stdout, and all agents log to stderr. + +## Usage + +``` +scale \ + -agent-count uint + The number of agents to start. (default 1000) + -heartbeat duration + Heartbeat duration (default 30s) + -server-url string + OpAMP server URL (default "wss://127.0.0.1:4320/v1/opamp") + -tls-ca_file string + Path to the CA cert. It verifies the server certificate + -tls-insecure + Disable the client transport security. + -tls-insecure_skip_verify + Will enable TLS but not verify the certificate. +``` diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go new file mode 100644 index 00000000..ba768f89 --- /dev/null +++ b/internal/examples/scale/main.go @@ -0,0 +1,107 @@ +package main + +import ( + "context" + "flag" + "log" + "net/url" + "os" + "os/signal" + "time" + + "github.com/open-telemetry/opamp-go/internal/examples/scale/scale" + "go.opentelemetry.io/collector/config/configtls" +) + +func main() { + logger := log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) + + var agentCount uint64 + flag.Uint64Var(&agentCount, "agent-count", 1000, "The number of agents to start.") + + var serverURL string + flag.StringVar(&serverURL, "server-url", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server URL") + + var heartbeat time.Duration + flag.DurationVar(&heartbeat, "heartbeat", time.Second*30, "Heartbeat duration") + + var tlsInsecure bool + flag.BoolVar(&tlsInsecure, "tls-insecure", false, "Disable the client transport security.") + + var tlsInsecureSkipVerify bool + flag.BoolVar(&tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate.") + + var tlsCAFile string + flag.StringVar(&tlsCAFile, "tls-ca_file", "", "Path to the CA cert. It verifies the server certificate") + + flag.Parse() + + // Verify args + if agentCount == 0 { + logger.Fatal("Arg: agent-count must not be zero") + } + + parsedURL, err := url.Parse(serverURL) + if err != nil { + logger.Fatalf("Arg: server-url failed to parse: %v", err) + } + switch parsedURL.Scheme { + case "http", "https": + case "ws", "wss": + default: + logger.Fatalf("Arg: server-url has an unknown scheme: %v", parsedURL.Scheme) + } + + if heartbeat < 0 { + logger.Fatalf("Arg: heartbeat must be non-negative, got %s", heartbeat) + } + + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + // Create tls.Config for clients + // client certs are not currently supported + tlsCFG, err := (&configtls.ClientConfig{ + Insecure: tlsInsecure, + InsecureSkipVerify: tlsInsecureSkipVerify, + Config: configtls.Config{ + CAFile: tlsCAFile, + }, + }).LoadTLSConfig(ctx) + if err != nil { + logger.Fatalf("Unable to create tls.Config: %v", err) + } + + logger.Printf("Starting %d agents", agentCount) + // Create a slice to track agents so we can safely stop them later. + // Use of slice instead of a concurrent goroutine to reduce memory usage. + agents := make([]*scale.Agent, 0, agentCount) + for range agentCount { + select { + case <-ctx.Done(): // early termination + return + default: + } + + agent := scale.NewAgent(serverURL, heartbeat, tlsCFG) + // Use context.Background instead of ctx so we can do a clean shutdown if SIGINT is recieved. + if err := agent.Start(context.Background()); err != nil { + logger.Printf("Error starting agent: %v\n", err) + continue + } + agents = append(agents, agent) + } + logger.Printf("%d agents started", len(agents)) + <-ctx.Done() + for _, agent := range agents { + if err := agent.Stop(); err != nil { + logger.Printf("Error stopping agent: %v\n", err) + } + } + logger.Println("All agents stopped") + + for _, agent := range agents { + agent.Wait() + } + logger.Println("All agents terminated cleanly") +} diff --git a/internal/examples/scale/scale/agent.go b/internal/examples/scale/scale/agent.go new file mode 100644 index 00000000..22082df9 --- /dev/null +++ b/internal/examples/scale/scale/agent.go @@ -0,0 +1,108 @@ +package scale + +import ( + "context" + "crypto/tls" + "fmt" + "strings" + "time" + + "github.com/google/uuid" + "github.com/open-telemetry/opamp-go/client" + "github.com/open-telemetry/opamp-go/client/types" + "github.com/open-telemetry/opamp-go/protobufs" +) + +const ( + serviceName = "io.opentelemetry.opamp.scale" + serviceVersion = "0.1.0" + scaleCapabilities = protobufs.AgentCapabilities_AgentCapabilities_ReportsStatus +) + +var agentDescription = protobufs.AgentDescription{ + IdentifyingAttributes: []*protobufs.KeyValue{{ + Key: "service.name", + Value: &protobufs.AnyValue{ + Value: &protobufs.AnyValue_StringValue{StringValue: serviceName}, + }, + }, { + Key: "service.version", + Value: &protobufs.AnyValue{ + Value: &protobufs.AnyValue_StringValue{StringValue: serviceVersion}, + }, + }}, +} + +type Agent struct { + logger types.Logger + client client.OpAMPClient + settings types.StartSettings + + runningCh chan struct{} // used to determine if the agent has is running +} + +func NewAgent(serverURL string, heartbeat time.Duration, tlsCFG *tls.Config) *Agent { + uid, err := uuid.NewV7() + if err != nil { + panic(err) + } + logger := NewLogger(uid) + + var opampClient client.OpAMPClient + if strings.HasPrefix(serverURL, "http") { // is http(s) connection + opampClient = client.NewHTTP(logger) + } else { // is a websockets connection + opampClient = client.NewWebSocket(logger) + } + + return &Agent{ + logger: logger, + client: opampClient, + settings: types.StartSettings{ + OpAMPServerURL: serverURL, + TLSConfig: tlsCFG, + InstanceUid: types.InstanceUid(uid), + HeartbeatInterval: &heartbeat, + }, + } +} + +// Start starts the OpAMP client with the passed context. +func (a *Agent) Start(ctx context.Context) error { + err := a.client.SetAgentDescription(&agentDescription) + if err != nil { + return fmt.Errorf("unable to set agent description: %w", err) + } + capabilities := scaleCapabilities + err = a.client.SetCapabilities(&capabilities) + if err != nil { + return fmt.Errorf("unable to set agent capabilities: %w", err) + } + + err = a.client.Start(ctx, a.settings) + if err != nil { + return fmt.Errorf("unable to start OpAMP client: %w", err) + } + a.runningCh = make(chan struct{}) + return nil +} + +// Stop stops the OpAMP client with a 5s timeout. +func (a *Agent) Stop() error { + if a.runningCh == nil { + return fmt.Errorf("agent has not started") + } + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + defer close(a.runningCh) + return a.client.Stop(ctx) +} + +// Wait will wait until a running agent stops. +func (a *Agent) Wait() error { + if a.runningCh == nil { + return fmt.Errorf("agent has not started") + } + <-a.runningCh + return nil +} diff --git a/internal/examples/scale/scale/logger.go b/internal/examples/scale/scale/logger.go new file mode 100644 index 00000000..c8f4115f --- /dev/null +++ b/internal/examples/scale/scale/logger.go @@ -0,0 +1,30 @@ +package scale + +import ( + "context" + "log" + "os" + + "github.com/google/uuid" + "github.com/open-telemetry/opamp-go/client/types" +) + +var _ types.Logger = &Logger{} + +type Logger struct { + logger *log.Logger +} + +func NewLogger(uid uuid.UUID) *Logger { + return &Logger{ + logger: log.New(os.Stderr, "agent-"+uid.String()+": ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix), + } +} + +func (l *Logger) Debugf(ctx context.Context, format string, v ...interface{}) { + l.logger.Printf(format, v...) +} + +func (l *Logger) Errorf(ctx context.Context, format string, v ...interface{}) { + l.logger.Printf(format, v...) +} diff --git a/internal/examples/server/data/agent.go b/internal/examples/server/data/agent.go index 9e79312d..1d5d1a5d 100644 --- a/internal/examples/server/data/agent.go +++ b/internal/examples/server/data/agent.go @@ -266,18 +266,23 @@ func (agent *Agent) processStatusUpdate( if agentDescrChanged { // Agent description is changed. - // We need to recalculate the config. - configChanged = agent.calcRemoteConfig() + if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_ReportsRemoteConfig) { + // We need to recalculate the config. + configChanged = agent.calcRemoteConfig() + if agent.Status.RemoteConfigStatus != nil { + configChanged = configChanged || bytes.Equal(agent.Status.RemoteConfigStatus.LastRemoteConfigHash, agent.remoteConfig.ConfigHash) + } + } - // And set connection settings that are appropriate for the Agent description. - agent.calcConnectionSettings(response) + if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_AcceptsOtherConnectionSettings) { + // And set connection settings that are appropriate for the Agent description. + agent.calcConnectionSettings(response) + } } // If remote config is changed and different from what the Agent has then // send the new remote config to the Agent. - if configChanged || - (agent.Status.RemoteConfigStatus != nil && - bytes.Compare(agent.Status.RemoteConfigStatus.LastRemoteConfigHash, agent.remoteConfig.ConfigHash) != 0) { + if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_AcceptsRemoteConfig) && configChanged { // The new status resulted in a change in the config of the Agent or the Agent // does not have this config (hash is different). Send the new config the Agent. response.RemoteConfig = agent.remoteConfig From 229943dcbb363911be84b093c6141a20c3f7ab4a Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Tue, 6 Jan 2026 13:17:35 -0600 Subject: [PATCH 02/22] Change scale test driver to use example agents --- internal/examples/agent/agent/agent.go | 154 +++++++++++++++++-------- internal/examples/agent/main.go | 5 +- internal/examples/scale/main.go | 38 +++--- 3 files changed, 133 insertions(+), 64 deletions(-) diff --git a/internal/examples/agent/agent/agent.go b/internal/examples/agent/agent/agent.go index a6f7299e..3924a277 100644 --- a/internal/examples/agent/agent/agent.go +++ b/internal/examples/agent/agent/agent.go @@ -10,6 +10,7 @@ import ( "crypto/x509/pkix" "encoding/pem" "fmt" + "log" "net/http" "os" "runtime" @@ -27,7 +28,7 @@ import ( "github.com/open-telemetry/opamp-go/protobufs" ) -const localConfig = ` +var localConfig = []byte(` exporters: otlp: endpoint: localhost:1111 @@ -44,24 +45,29 @@ service: receivers: [otlp] processors: [] exporters: [otlp] -` +`) + +// Agent identification constants +const ( + agentType = "io.opentelemetry.collector" + agentVersion = "1.0.0" +) type Agent struct { + client client.OpAMPClient logger types.Logger + doneCh chan struct{} agentType string agentVersion string + instanceId uuid.UUID agentConfig *config.AgentConfig - effectiveConfig string - - instanceId uuid.UUID + effectiveConfig []byte agentDescription *protobufs.AgentDescription - opampClient client.OpAMPClient - remoteConfigStatus *protobufs.RemoteConfigStatus metricReporter *MetricReporter @@ -89,31 +95,58 @@ func (p *proxySettings) Clone() *proxySettings { } } -func NewAgent(logger types.Logger, agentType, agentVersion string, agentConfig *config.AgentConfig) *Agent { +type Option func(agent *Agent) + +// WithLogger is used to set an Agent's logger +func WithLogger(l types.Logger) Option { + return func(agent *Agent) { + agent.logger = l + } +} + +// WithLogger is used to set an Agent's type +func WithAgentType(s string) Option { + return func(agent *Agent) { + agent.agentType = s + } +} + +// WithLogger is used to set an Agent's version +func WithAgentVersion(s string) Option { + return func(agent *Agent) { + agent.agentVersion = s + } +} + +// WithLogger is used to set an Agent's id +func WithInstanceID(id uuid.UUID) Option { + return func(agent *Agent) { + agent.instanceId = id + } +} + +// WithNoClientCertRequest will ensure the agent does not request a client cert when initially connecting. +func WithNoClientCertRequest() Option { + return func(agent *Agent) { + agent.certRequested = true + } +} + +func NewAgent(agentConfig *config.AgentConfig, options ...Option) *Agent { agent := &Agent{ - effectiveConfig: localConfig, - logger: logger, + logger: &Logger{Logger: log.Default()}, agentType: agentType, agentVersion: agentVersion, agentConfig: agentConfig, + effectiveConfig: localConfig, } - agent.createAgentIdentity() - agent.logger.Debugf(context.Background(), "Agent starting, id=%v, type=%s, version=%s.", - agent.instanceId, agentType, agentVersion) - - agent.loadLocalConfig() - - tls, err := agentConfig.GetTLSConfig(context.Background()) - if err != nil { - agent.logger.Errorf(context.Background(), "Cannot get the TLS config: %v", err) - return nil + for _, option := range options { + option(agent) } - if err = agent.connect(withTLSConfig(tls)); err != nil { - agent.logger.Errorf(context.Background(), "Cannot connect OpAMP client: %v", err) - return nil - } + agent.createAgentIdentity() + agent.loadLocalConfig() return agent } @@ -139,7 +172,13 @@ func withProxy(proxy *proxySettings) settingsOp { } func (agent *Agent) connect(ops ...settingsOp) error { - agent.opampClient = client.NewWebSocket(agent.logger) + if strings.HasPrefix(agent.agentConfig.Endpoint, "http") { + agent.client = client.NewHTTP(agent.logger) + } else if strings.HasPrefix(agent.agentConfig.Endpoint, "ws") { + agent.client = client.NewWebSocket(agent.logger) + } else { + return fmt.Errorf("server endpoint has unknown scheme: %s", agent.agentConfig.Endpoint) + } settings := types.StartSettings{ OpAMPServerURL: agent.agentConfig.Endpoint, @@ -175,7 +214,7 @@ func (agent *Agent) connect(ops ...settingsOp) error { headers: settings.ProxyHeaders, } - err := agent.opampClient.SetAgentDescription(agent.agentDescription) + err := agent.client.SetAgentDescription(agent.agentDescription) if err != nil { return err } @@ -186,7 +225,7 @@ func (agent *Agent) connect(ops ...settingsOp) error { protobufs.AgentCapabilities_AgentCapabilities_ReportsOwnMetrics | protobufs.AgentCapabilities_AgentCapabilities_AcceptsOpAMPConnectionSettings | protobufs.AgentCapabilities_AgentCapabilities_ReportsConnectionSettingsStatus - err = agent.opampClient.SetCapabilities(&supportedCapabilities) + err = agent.client.SetCapabilities(&supportedCapabilities) if err != nil { return err } @@ -201,7 +240,7 @@ func (agent *Agent) connect(ops ...settingsOp) error { agent.logger.Debugf(context.Background(), "Starting OpAMP client...") - err = agent.opampClient.Start(context.Background(), settings) + err = agent.client.Start(context.Background(), settings) if err != nil { return err } @@ -213,16 +252,19 @@ func (agent *Agent) connect(ops ...settingsOp) error { func (agent *Agent) disconnect(ctx context.Context) { agent.logger.Debugf(ctx, "Disconnecting from server...") - agent.opampClient.Stop(ctx) + agent.client.Stop(ctx) } +// createAgentIdentity sets the instanceId if it is not already set and populates agentDescription. func (agent *Agent) createAgentIdentity() { // Generate instance id. - uid, err := uuid.NewV7() - if err != nil { - panic(err) + if agent.instanceId == uuid.Nil { + uid, err := uuid.NewV7() + if err != nil { + panic(err) + } + agent.instanceId = uid } - agent.instanceId = uid hostname, _ := os.Hostname() @@ -275,23 +317,24 @@ func (agent *Agent) updateAgentIdentity(ctx context.Context, instanceId uuid.UUI } } +// loadLocalConfig sets effectiveConfig func (agent *Agent) loadLocalConfig() { k := koanf.New(".") - _ = k.Load(rawbytes.Provider([]byte(localConfig)), yaml.Parser()) + _ = k.Load(rawbytes.Provider(localConfig), yaml.Parser()) effectiveConfigBytes, err := k.Marshal(yaml.Parser()) if err != nil { panic(err) } - agent.effectiveConfig = string(effectiveConfigBytes) + agent.effectiveConfig = effectiveConfigBytes } func (agent *Agent) composeEffectiveConfig() *protobufs.EffectiveConfig { return &protobufs.EffectiveConfig{ ConfigMap: &protobufs.AgentConfigMap{ ConfigMap: map[string]*protobufs.AgentConfigFile{ - "": {Body: []byte(agent.effectiveConfig)}, + "": {Body: agent.effectiveConfig}, }, }, } @@ -345,7 +388,7 @@ func (agent *Agent) applyRemoteConfig(config *protobufs.AgentRemoteConfig) (conf // Begin with local config. We will later merge received configs on top of it. k := koanf.New(".") - if err := k.Load(rawbytes.Provider([]byte(localConfig)), yaml.Parser()); err != nil { + if err := k.Load(rawbytes.Provider(localConfig), yaml.Parser()); err != nil { return false, err } @@ -392,22 +435,41 @@ func (agent *Agent) applyRemoteConfig(config *protobufs.AgentRemoteConfig) (conf panic(err) } - newEffectiveConfig := string(effectiveConfigBytes) configChanged = false - if agent.effectiveConfig != newEffectiveConfig { + if !bytes.Equal(agent.effectiveConfig, effectiveConfigBytes) { agent.logger.Debugf(context.Background(), "Effective config changed. Need to report to server.") - agent.effectiveConfig = newEffectiveConfig + agent.effectiveConfig = effectiveConfigBytes configChanged = true } return configChanged, nil } +func (agent *Agent) Start() error { + tls, err := agent.agentConfig.GetTLSConfig(context.Background()) + if err != nil { + return err + } + agent.logger.Debugf(context.Background(), "Agent starting, id=%v, type=%s, version=%s.", + agent.instanceId, agent.agentType, agent.agentVersion) + agent.doneCh = make(chan struct{}) + return agent.connect(withTLSConfig(tls)) +} + func (agent *Agent) Shutdown() { + if agent.doneCh == nil { + agent.logger.Debugf(context.Background(), "Agent not running.") + return + } agent.logger.Debugf(context.Background(), "Agent shutting down...") - if agent.opampClient != nil { - _ = agent.opampClient.Stop(context.Background()) + if agent.client != nil { + _ = agent.client.Stop(context.Background()) } + close(agent.doneCh) +} + +func (agent *Agent) Wait() { + <-agent.doneCh } // requestClientCertificate sets a request to be sent to the Server to create @@ -470,7 +532,7 @@ func (agent *Agent) requestClientCertificate() { // Send the request to the Server (immediately if already connected // or upon next successful connection). - err = agent.opampClient.RequestConnectionSettings( + err = agent.client.RequestConnectionSettings( &protobufs.ConnectionSettingsRequest{ Opamp: &protobufs.OpAMPConnectionSettingsRequest{ CertificateRequest: &protobufs.CertificateRequest{ @@ -493,7 +555,7 @@ func (agent *Agent) onMessage(ctx context.Context, msg *types.MessageData) { var err error configChanged, err = agent.applyRemoteConfig(msg.RemoteConfig) if err != nil { - agent.opampClient.SetRemoteConfigStatus( + agent.client.SetRemoteConfigStatus( &protobufs.RemoteConfigStatus{ LastRemoteConfigHash: msg.RemoteConfig.ConfigHash, Status: protobufs.RemoteConfigStatuses_RemoteConfigStatuses_FAILED, @@ -501,7 +563,7 @@ func (agent *Agent) onMessage(ctx context.Context, msg *types.MessageData) { }, ) } else { - agent.opampClient.SetRemoteConfigStatus(&protobufs.RemoteConfigStatus{ + agent.client.SetRemoteConfigStatus(&protobufs.RemoteConfigStatus{ LastRemoteConfigHash: msg.RemoteConfig.ConfigHash, Status: protobufs.RemoteConfigStatuses_RemoteConfigStatuses_APPLIED, }) @@ -518,7 +580,7 @@ func (agent *Agent) onMessage(ctx context.Context, msg *types.MessageData) { } if configChanged { - err := agent.opampClient.UpdateEffectiveConfig(ctx) + err := agent.client.UpdateEffectiveConfig(ctx) if err != nil { agent.logger.Errorf(ctx, err.Error()) } diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index 7c6d9e88..84e5c7b0 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -51,7 +51,10 @@ func main() { }, } - agent := agent.NewAgent(&agent.Logger{Logger: log.Default()}, agentType, agentVersion, config) + agent := agent.NewAgent(config, agent.WithAgentType(agentType), agent.WithAgentVersion(agentVersion)) + if err := agent.Start(); err != nil { + log.Fatal("Agent encountered error when starting: %v", err) + } interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt) diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index ba768f89..6b03b7a7 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -9,6 +9,9 @@ import ( "os/signal" "time" + "github.com/google/uuid" + "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" + "github.com/open-telemetry/opamp-go/internal/examples/config" "github.com/open-telemetry/opamp-go/internal/examples/scale/scale" "go.opentelemetry.io/collector/config/configtls" ) @@ -59,23 +62,21 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) defer cancel() - // Create tls.Config for clients - // client certs are not currently supported - tlsCFG, err := (&configtls.ClientConfig{ - Insecure: tlsInsecure, - InsecureSkipVerify: tlsInsecureSkipVerify, - Config: configtls.Config{ - CAFile: tlsCAFile, + cfg := &config.AgentConfig{ + Endpoint: serverURL, + TLSSetting: configtls.ClientConfig{ + Insecure: tlsInsecure, + InsecureSkipVerify: tlsInsecureSkipVerify, + Config: configtls.Config{ + CAFile: tlsCAFile, + }, }, - }).LoadTLSConfig(ctx) - if err != nil { - logger.Fatalf("Unable to create tls.Config: %v", err) } logger.Printf("Starting %d agents", agentCount) // Create a slice to track agents so we can safely stop them later. // Use of slice instead of a concurrent goroutine to reduce memory usage. - agents := make([]*scale.Agent, 0, agentCount) + agents := make([]*agent.Agent, 0, agentCount) for range agentCount { select { case <-ctx.Done(): // early termination @@ -83,9 +84,14 @@ func main() { default: } - agent := scale.NewAgent(serverURL, heartbeat, tlsCFG) - // Use context.Background instead of ctx so we can do a clean shutdown if SIGINT is recieved. - if err := agent.Start(context.Background()); err != nil { + id, err := uuid.NewV7() + if err != nil { + panic(err) + } + agentLogger := scale.NewLogger(id) + agent := agent.NewAgent(cfg, agent.WithNoClientCertRequest(), agent.WithInstanceID(id), agent.WithLogger(agentLogger)) // TODO heartbeat? + + if err := agent.Start(); err != nil { logger.Printf("Error starting agent: %v\n", err) continue } @@ -94,9 +100,7 @@ func main() { logger.Printf("%d agents started", len(agents)) <-ctx.Done() for _, agent := range agents { - if err := agent.Stop(); err != nil { - logger.Printf("Error stopping agent: %v\n", err) - } + agent.Shutdown() } logger.Println("All agents stopped") From d10f4e3fe3e2b2d9f94a86d621541baff2d204e0 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Tue, 6 Jan 2026 13:37:25 -0600 Subject: [PATCH 03/22] Cleanup --- internal/examples/agent/agent/agent.go | 5 +- internal/examples/agent/agent/logger.go | 9 ++ internal/examples/agent/main.go | 2 +- internal/examples/config/agent.go | 6 +- internal/examples/scale/README.md | 2 +- internal/examples/scale/main.go | 20 ++--- internal/examples/scale/scale/agent.go | 108 ------------------------ internal/examples/scale/scale/logger.go | 30 ------- 8 files changed, 28 insertions(+), 154 deletions(-) delete mode 100644 internal/examples/scale/scale/agent.go delete mode 100644 internal/examples/scale/scale/logger.go diff --git a/internal/examples/agent/agent/agent.go b/internal/examples/agent/agent/agent.go index 3924a277..2c513dfa 100644 --- a/internal/examples/agent/agent/agent.go +++ b/internal/examples/agent/agent/agent.go @@ -181,8 +181,9 @@ func (agent *Agent) connect(ops ...settingsOp) error { } settings := types.StartSettings{ - OpAMPServerURL: agent.agentConfig.Endpoint, - InstanceUid: types.InstanceUid(agent.instanceId), + OpAMPServerURL: agent.agentConfig.Endpoint, + HeartbeatInterval: agent.agentConfig.HeartbeatInterval, + InstanceUid: types.InstanceUid(agent.instanceId), Callbacks: types.Callbacks{ OnConnect: func(ctx context.Context) { agent.logger.Debugf(ctx, "Connected to the server.") diff --git a/internal/examples/agent/agent/logger.go b/internal/examples/agent/agent/logger.go index 2e5c5ef5..44d746a2 100644 --- a/internal/examples/agent/agent/logger.go +++ b/internal/examples/agent/agent/logger.go @@ -3,7 +3,9 @@ package agent import ( "context" "log" + "os" + "github.com/google/uuid" "github.com/open-telemetry/opamp-go/client/types" ) @@ -13,6 +15,13 @@ type Logger struct { Logger *log.Logger } +// NewScaleLogger returns a logger that prints to stderr with passed uid as a part of the prefix. +func NewScaleLogger(uid uuid.UUID) *Logger { + return &Logger{ + Logger: log.New(os.Stderr, "agent-"+uid.String()+": ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix), + } +} + func (l *Logger) Debugf(_ context.Context, format string, v ...interface{}) { l.Logger.Printf(format, v...) } diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index 84e5c7b0..88c56bd6 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -53,7 +53,7 @@ func main() { agent := agent.NewAgent(config, agent.WithAgentType(agentType), agent.WithAgentVersion(agentVersion)) if err := agent.Start(); err != nil { - log.Fatal("Agent encountered error when starting: %v", err) + log.Fatalf("Agent encountered error when starting: %v", err) } interrupt := make(chan os.Signal, 1) diff --git a/internal/examples/config/agent.go b/internal/examples/config/agent.go index 3e3cf9d4..5f15766e 100644 --- a/internal/examples/config/agent.go +++ b/internal/examples/config/agent.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "fmt" "net/url" + "time" "github.com/open-telemetry/opamp-go/internal/examples/certs" "go.opentelemetry.io/collector/config/configopaque" @@ -12,8 +13,9 @@ import ( ) type AgentConfig struct { - Endpoint string `mapstructure:"endpoint"` - TLSSetting configtls.ClientConfig `mapstructure:"tls,omitempty"` + Endpoint string `mapstructure:"endpoint"` + HeartbeatInterval *time.Duration `mapstructure:"heartbeat_interval"` + TLSSetting configtls.ClientConfig `mapstructure:"tls,omitempty"` } func (a *AgentConfig) GetTLSConfig(ctx context.Context) (*tls.Config, error) { diff --git a/internal/examples/scale/README.md b/internal/examples/scale/README.md index 233df9d8..b817d10e 100644 --- a/internal/examples/scale/README.md +++ b/internal/examples/scale/README.md @@ -1,6 +1,6 @@ # scale -Scale provides dumb agents to scale test an OpAMP server. +Scale provides agents to scale test an OpAMP server. Websocket and HTTP servers are supported, but all agents must use the same connection type. Each agent uses it's own OpAMP agent client, and runs in a goroutine. diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index 6b03b7a7..4cdc4c00 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -12,7 +12,6 @@ import ( "github.com/google/uuid" "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" "github.com/open-telemetry/opamp-go/internal/examples/config" - "github.com/open-telemetry/opamp-go/internal/examples/scale/scale" "go.opentelemetry.io/collector/config/configtls" ) @@ -63,7 +62,8 @@ func main() { defer cancel() cfg := &config.AgentConfig{ - Endpoint: serverURL, + Endpoint: serverURL, + HeartbeatInterval: &heartbeat, TLSSetting: configtls.ClientConfig{ Insecure: tlsInsecure, InsecureSkipVerify: tlsInsecureSkipVerify, @@ -88,24 +88,24 @@ func main() { if err != nil { panic(err) } - agentLogger := scale.NewLogger(id) - agent := agent.NewAgent(cfg, agent.WithNoClientCertRequest(), agent.WithInstanceID(id), agent.WithLogger(agentLogger)) // TODO heartbeat? + agentLogger := agent.NewScaleLogger(id) + a := agent.NewAgent(cfg, agent.WithNoClientCertRequest(), agent.WithInstanceID(id), agent.WithLogger(agentLogger)) - if err := agent.Start(); err != nil { + if err := a.Start(); err != nil { logger.Printf("Error starting agent: %v\n", err) continue } - agents = append(agents, agent) + agents = append(agents, a) } logger.Printf("%d agents started", len(agents)) <-ctx.Done() - for _, agent := range agents { - agent.Shutdown() + for _, a := range agents { + a.Shutdown() } logger.Println("All agents stopped") - for _, agent := range agents { - agent.Wait() + for _, a := range agents { + a.Wait() } logger.Println("All agents terminated cleanly") } diff --git a/internal/examples/scale/scale/agent.go b/internal/examples/scale/scale/agent.go deleted file mode 100644 index 22082df9..00000000 --- a/internal/examples/scale/scale/agent.go +++ /dev/null @@ -1,108 +0,0 @@ -package scale - -import ( - "context" - "crypto/tls" - "fmt" - "strings" - "time" - - "github.com/google/uuid" - "github.com/open-telemetry/opamp-go/client" - "github.com/open-telemetry/opamp-go/client/types" - "github.com/open-telemetry/opamp-go/protobufs" -) - -const ( - serviceName = "io.opentelemetry.opamp.scale" - serviceVersion = "0.1.0" - scaleCapabilities = protobufs.AgentCapabilities_AgentCapabilities_ReportsStatus -) - -var agentDescription = protobufs.AgentDescription{ - IdentifyingAttributes: []*protobufs.KeyValue{{ - Key: "service.name", - Value: &protobufs.AnyValue{ - Value: &protobufs.AnyValue_StringValue{StringValue: serviceName}, - }, - }, { - Key: "service.version", - Value: &protobufs.AnyValue{ - Value: &protobufs.AnyValue_StringValue{StringValue: serviceVersion}, - }, - }}, -} - -type Agent struct { - logger types.Logger - client client.OpAMPClient - settings types.StartSettings - - runningCh chan struct{} // used to determine if the agent has is running -} - -func NewAgent(serverURL string, heartbeat time.Duration, tlsCFG *tls.Config) *Agent { - uid, err := uuid.NewV7() - if err != nil { - panic(err) - } - logger := NewLogger(uid) - - var opampClient client.OpAMPClient - if strings.HasPrefix(serverURL, "http") { // is http(s) connection - opampClient = client.NewHTTP(logger) - } else { // is a websockets connection - opampClient = client.NewWebSocket(logger) - } - - return &Agent{ - logger: logger, - client: opampClient, - settings: types.StartSettings{ - OpAMPServerURL: serverURL, - TLSConfig: tlsCFG, - InstanceUid: types.InstanceUid(uid), - HeartbeatInterval: &heartbeat, - }, - } -} - -// Start starts the OpAMP client with the passed context. -func (a *Agent) Start(ctx context.Context) error { - err := a.client.SetAgentDescription(&agentDescription) - if err != nil { - return fmt.Errorf("unable to set agent description: %w", err) - } - capabilities := scaleCapabilities - err = a.client.SetCapabilities(&capabilities) - if err != nil { - return fmt.Errorf("unable to set agent capabilities: %w", err) - } - - err = a.client.Start(ctx, a.settings) - if err != nil { - return fmt.Errorf("unable to start OpAMP client: %w", err) - } - a.runningCh = make(chan struct{}) - return nil -} - -// Stop stops the OpAMP client with a 5s timeout. -func (a *Agent) Stop() error { - if a.runningCh == nil { - return fmt.Errorf("agent has not started") - } - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - defer close(a.runningCh) - return a.client.Stop(ctx) -} - -// Wait will wait until a running agent stops. -func (a *Agent) Wait() error { - if a.runningCh == nil { - return fmt.Errorf("agent has not started") - } - <-a.runningCh - return nil -} diff --git a/internal/examples/scale/scale/logger.go b/internal/examples/scale/scale/logger.go deleted file mode 100644 index c8f4115f..00000000 --- a/internal/examples/scale/scale/logger.go +++ /dev/null @@ -1,30 +0,0 @@ -package scale - -import ( - "context" - "log" - "os" - - "github.com/google/uuid" - "github.com/open-telemetry/opamp-go/client/types" -) - -var _ types.Logger = &Logger{} - -type Logger struct { - logger *log.Logger -} - -func NewLogger(uid uuid.UUID) *Logger { - return &Logger{ - logger: log.New(os.Stderr, "agent-"+uid.String()+": ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix), - } -} - -func (l *Logger) Debugf(ctx context.Context, format string, v ...interface{}) { - l.logger.Printf(format, v...) -} - -func (l *Logger) Errorf(ctx context.Context, format string, v ...interface{}) { - l.logger.Printf(format, v...) -} From 6591f69f6a1b48d06e4b8f4ecbfa6d413a9e6d1c Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 7 Jan 2026 12:47:46 -0600 Subject: [PATCH 04/22] Server performance improvements --- server/serverimpl.go | 68 +++++++++++++++++++++++++++----------- server/serverimpl_test.go | 21 +++++++++--- server/types/connection.go | 4 +++ server/wsconnection.go | 7 ++++ 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/server/serverimpl.go b/server/serverimpl.go index 12048199..f1a4cc81 100644 --- a/server/serverimpl.go +++ b/server/serverimpl.go @@ -43,6 +43,9 @@ type server struct { httpServer *http.Server httpServerServeWg *sync.WaitGroup + // gwPool is a gzip.Writer pool. This is intended to lower the amount of writers that are created when responding to HTTP requests. + gwPool sync.Pool + // The network address Server is listening on. Nil if not started. addr net.Addr } @@ -65,7 +68,14 @@ func New(logger types.Logger) *server { logger = &internal.NopLogger{} } - return &server{logger: logger} + return &server{ + logger: logger, + gwPool: sync.Pool{ + New: func() any { + return gzip.NewWriter(io.Discard) + }, + }, + } } func (s *server) Attach(settings Settings) (HTTPHandlerFunc, ConnContext, error) { @@ -245,11 +255,20 @@ func (s *server) handleWSConnection(reqCtx context.Context, wsConn *websocket.Co connectionCallbacks.OnConnected(reqCtx, agentConn) - sentCustomCapabilities := false + var sentCustomCapabilities sync.Once - // Loop until fail to read from the WebSocket connection. + // Loop until fail to read from the WebSocket connection or reqCtx is cancelled. +LOOP: for { - msgContext := context.Background() + select { + case <-reqCtx.Done(): // signal connection shutdown, note that ReadMessage below is a blocking call, so this will not write as soon as the context is cancelled. + if err := agentConn.SendClose(); err != nil { + s.logger.Errorf(context.Background(), "error sending close frame for WebSocket connection: %v", err) + break LOOP + } + default: + } + msgContext := context.Background() // FIXME why not reqContext? request := protobufs.AgentToServer{} // Block until the next message can be read. @@ -294,12 +313,11 @@ func (s *server) handleWSConnection(reqCtx context.Context, wsConn *websocket.Co if len(response.InstanceUid) == 0 { response.InstanceUid = request.InstanceUid } - if !sentCustomCapabilities { + sentCustomCapabilities.Do(func() { response.CustomCapabilities = &protobufs.CustomCapabilities{ Capabilities: s.settings.CustomCapabilities, } - sentCustomCapabilities = true - } + }) err = agentConn.Send(msgContext, response) if err != nil { @@ -311,32 +329,42 @@ func (s *server) handleWSConnection(reqCtx context.Context, wsConn *websocket.Co } } -func decompressGzip(data []byte) ([]byte, error) { - r, err := gzip.NewReader(bytes.NewBuffer(data)) +func (s *server) readReqBody(req *http.Request) ([]byte, error) { + if req.Header.Get(headerContentEncoding) == contentEncodingGzip { + data, err := decompressGzip(req.Body) + if err != nil { + return nil, err + } + return data, nil + } + data, err := io.ReadAll(req.Body) if err != nil { return nil, err } - defer r.Close() - return io.ReadAll(r) + return data, nil } -func (s *server) readReqBody(req *http.Request) ([]byte, error) { - data, err := io.ReadAll(req.Body) +func (s *server) compressGzip(data []byte) ([]byte, error) { // FIXME should we pass the request writer instead of allocating a buffer? + var buf bytes.Buffer + w, _ := s.gwPool.Get().(*gzip.Writer) + defer s.gwPool.Put(w) + w.Reset(&buf) + + _, err := w.Write(data) if err != nil { return nil, err } - if req.Header.Get(headerContentEncoding) == contentEncodingGzip { - data, err = decompressGzip(data) - if err != nil { - return nil, err - } + err = w.Close() + if err != nil { + return nil, err } - return data, nil + return buf.Bytes(), nil } func compressGzip(data []byte) ([]byte, error) { var buf bytes.Buffer w := gzip.NewWriter(&buf) + _, err := w.Write(data) if err != nil { return nil, err @@ -405,7 +433,7 @@ func (s *server) handlePlainHTTPRequest(req *http.Request, w http.ResponseWriter // Send the response. w.Header().Set(headerContentType, contentTypeProtobuf) if req.Header.Get(headerAcceptEncoding) == contentEncodingGzip { - bodyBytes, err = compressGzip(bodyBytes) + bodyBytes, err = s.compressGzip(bodyBytes) if err != nil { s.logger.Errorf(req.Context(), "Cannot compress response: %v", err) w.WriteHeader(http.StatusInternalServerError) diff --git a/server/serverimpl_test.go b/server/serverimpl_test.go index 74b1f1cc..1d111946 100644 --- a/server/serverimpl_test.go +++ b/server/serverimpl_test.go @@ -964,11 +964,8 @@ func TestServerHonoursAcceptEncoding(t *testing.T) { // Verify the received message is what was sent. assert.True(t, proto.Equal(rcvMsg.Load().(proto.Message), &sendMsg)) - // Read Server's response. - b, err = io.ReadAll(resp.Body) - require.NoError(t, err) - // Decompress the gzip response - b, err = decompressGzip(b) + // Read and decompress the gzip response + b, err = decompressGzip(resp.Body) require.NoError(t, err) assert.EqualValues(t, http.StatusOK, resp.StatusCode) @@ -1398,3 +1395,17 @@ func TestServerTLS(t *testing.T) { eventually(t, func() bool { return atomic.LoadInt32(&onCloseCalled) == 1 }) } + +func BenchmarkCompressGzip(b *testing.B) { + input := []byte("Hello, World!") + s := New(nil) + b.ResetTimer() + b.ReportAllocs() + + for range b.N { + p, err := s.compressGzip(input) + if p == nil || err != nil { + b.Fatal(err) + } + } +} diff --git a/server/types/connection.go b/server/types/connection.go index 2b2bd814..b244a901 100644 --- a/server/types/connection.go +++ b/server/types/connection.go @@ -20,6 +20,10 @@ type Connection interface { // Should return as soon as possible if the ctx is cancelled. Send(ctx context.Context, message *protobufs.ServerToAgent) error + // SendClose sends a close control frame with the CloseCode set to going away. + // This should be used to signal a server shutdown. + SendClose() error + // Disconnect closes the network connection. // Any blocked Read or Write operations will be unblocked and return errors. Disconnect() error diff --git a/server/wsconnection.go b/server/wsconnection.go index c721a043..d4430437 100644 --- a/server/wsconnection.go +++ b/server/wsconnection.go @@ -40,6 +40,13 @@ func (c *wsConnection) Send(_ context.Context, message *protobufs.ServerToAgent) return internal.WriteWSMessage(c.wsConn, message) } +func (c *wsConnection) SendClose() error { + if c.closed.Load() { + return nil + } + return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), 0) +} + func (c *wsConnection) Disconnect() error { if !c.closed.CompareAndSwap(false, true) { return nil From b2e6edd392cb932d1b5c65058aea8a2e8aede412 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 7 Jan 2026 13:16:09 -0600 Subject: [PATCH 05/22] Fix issues --- server/serverimpl_test.go | 2 +- server/wsconnection.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/serverimpl_test.go b/server/serverimpl_test.go index 1d111946..ce99e598 100644 --- a/server/serverimpl_test.go +++ b/server/serverimpl_test.go @@ -965,7 +965,7 @@ func TestServerHonoursAcceptEncoding(t *testing.T) { assert.True(t, proto.Equal(rcvMsg.Load().(proto.Message), &sendMsg)) // Read and decompress the gzip response - b, err = decompressGzip(resp.Body) + b, err = srv.decompressGzip(resp.Body) require.NoError(t, err) assert.EqualValues(t, http.StatusOK, resp.StatusCode) diff --git a/server/wsconnection.go b/server/wsconnection.go index d4430437..c403acd2 100644 --- a/server/wsconnection.go +++ b/server/wsconnection.go @@ -5,6 +5,7 @@ import ( "net" "sync" "sync/atomic" + "time" "github.com/gorilla/websocket" @@ -44,7 +45,7 @@ func (c *wsConnection) SendClose() error { if c.closed.Load() { return nil } - return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), 0) + return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), time.Time{} } func (c *wsConnection) Disconnect() error { From 11e0b804d274f016ee1e73fbb989d4d8100056d6 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 7 Jan 2026 14:01:34 -0600 Subject: [PATCH 06/22] fix --- server/wsconnection.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/wsconnection.go b/server/wsconnection.go index c403acd2..160fb1e0 100644 --- a/server/wsconnection.go +++ b/server/wsconnection.go @@ -45,7 +45,7 @@ func (c *wsConnection) SendClose() error { if c.closed.Load() { return nil } - return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), time.Time{} + return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), time.Time{}) } func (c *wsConnection) Disconnect() error { From 5dea6253778735329f35a7ed98e37ba4567ea968 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 7 Jan 2026 14:12:48 -0600 Subject: [PATCH 07/22] Remove extra changes --- client/internal/clientcommon.go | 6 +-- internal/examples/server/data/agent.go | 19 +++---- server/serverimpl.go | 68 ++++++++------------------ server/serverimpl_test.go | 21 ++------ server/types/connection.go | 4 -- server/wsconnection.go | 8 --- 6 files changed, 34 insertions(+), 92 deletions(-) diff --git a/client/internal/clientcommon.go b/client/internal/clientcommon.go index 09600d8c..987e759b 100644 --- a/client/internal/clientcommon.go +++ b/client/internal/clientcommon.go @@ -142,10 +142,8 @@ func (c *ClientCommon) PrepareStart( } } - if c.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_ReportsRemoteConfig) { - if err := c.ClientSyncedState.SetRemoteConfigStatus(settings.RemoteConfigStatus); err != nil { - return err - } + if err := c.ClientSyncedState.SetRemoteConfigStatus(settings.RemoteConfigStatus); err != nil { + return err } var packageStatuses *protobufs.PackageStatuses diff --git a/internal/examples/server/data/agent.go b/internal/examples/server/data/agent.go index 1d5d1a5d..9e79312d 100644 --- a/internal/examples/server/data/agent.go +++ b/internal/examples/server/data/agent.go @@ -266,23 +266,18 @@ func (agent *Agent) processStatusUpdate( if agentDescrChanged { // Agent description is changed. - if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_ReportsRemoteConfig) { - // We need to recalculate the config. - configChanged = agent.calcRemoteConfig() - if agent.Status.RemoteConfigStatus != nil { - configChanged = configChanged || bytes.Equal(agent.Status.RemoteConfigStatus.LastRemoteConfigHash, agent.remoteConfig.ConfigHash) - } - } + // We need to recalculate the config. + configChanged = agent.calcRemoteConfig() - if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_AcceptsOtherConnectionSettings) { - // And set connection settings that are appropriate for the Agent description. - agent.calcConnectionSettings(response) - } + // And set connection settings that are appropriate for the Agent description. + agent.calcConnectionSettings(response) } // If remote config is changed and different from what the Agent has then // send the new remote config to the Agent. - if agent.hasCapability(protobufs.AgentCapabilities_AgentCapabilities_AcceptsRemoteConfig) && configChanged { + if configChanged || + (agent.Status.RemoteConfigStatus != nil && + bytes.Compare(agent.Status.RemoteConfigStatus.LastRemoteConfigHash, agent.remoteConfig.ConfigHash) != 0) { // The new status resulted in a change in the config of the Agent or the Agent // does not have this config (hash is different). Send the new config the Agent. response.RemoteConfig = agent.remoteConfig diff --git a/server/serverimpl.go b/server/serverimpl.go index f1a4cc81..12048199 100644 --- a/server/serverimpl.go +++ b/server/serverimpl.go @@ -43,9 +43,6 @@ type server struct { httpServer *http.Server httpServerServeWg *sync.WaitGroup - // gwPool is a gzip.Writer pool. This is intended to lower the amount of writers that are created when responding to HTTP requests. - gwPool sync.Pool - // The network address Server is listening on. Nil if not started. addr net.Addr } @@ -68,14 +65,7 @@ func New(logger types.Logger) *server { logger = &internal.NopLogger{} } - return &server{ - logger: logger, - gwPool: sync.Pool{ - New: func() any { - return gzip.NewWriter(io.Discard) - }, - }, - } + return &server{logger: logger} } func (s *server) Attach(settings Settings) (HTTPHandlerFunc, ConnContext, error) { @@ -255,20 +245,11 @@ func (s *server) handleWSConnection(reqCtx context.Context, wsConn *websocket.Co connectionCallbacks.OnConnected(reqCtx, agentConn) - var sentCustomCapabilities sync.Once + sentCustomCapabilities := false - // Loop until fail to read from the WebSocket connection or reqCtx is cancelled. -LOOP: + // Loop until fail to read from the WebSocket connection. for { - select { - case <-reqCtx.Done(): // signal connection shutdown, note that ReadMessage below is a blocking call, so this will not write as soon as the context is cancelled. - if err := agentConn.SendClose(); err != nil { - s.logger.Errorf(context.Background(), "error sending close frame for WebSocket connection: %v", err) - break LOOP - } - default: - } - msgContext := context.Background() // FIXME why not reqContext? + msgContext := context.Background() request := protobufs.AgentToServer{} // Block until the next message can be read. @@ -313,11 +294,12 @@ LOOP: if len(response.InstanceUid) == 0 { response.InstanceUid = request.InstanceUid } - sentCustomCapabilities.Do(func() { + if !sentCustomCapabilities { response.CustomCapabilities = &protobufs.CustomCapabilities{ Capabilities: s.settings.CustomCapabilities, } - }) + sentCustomCapabilities = true + } err = agentConn.Send(msgContext, response) if err != nil { @@ -329,42 +311,32 @@ LOOP: } } -func (s *server) readReqBody(req *http.Request) ([]byte, error) { - if req.Header.Get(headerContentEncoding) == contentEncodingGzip { - data, err := decompressGzip(req.Body) - if err != nil { - return nil, err - } - return data, nil - } - data, err := io.ReadAll(req.Body) +func decompressGzip(data []byte) ([]byte, error) { + r, err := gzip.NewReader(bytes.NewBuffer(data)) if err != nil { return nil, err } - return data, nil + defer r.Close() + return io.ReadAll(r) } -func (s *server) compressGzip(data []byte) ([]byte, error) { // FIXME should we pass the request writer instead of allocating a buffer? - var buf bytes.Buffer - w, _ := s.gwPool.Get().(*gzip.Writer) - defer s.gwPool.Put(w) - w.Reset(&buf) - - _, err := w.Write(data) +func (s *server) readReqBody(req *http.Request) ([]byte, error) { + data, err := io.ReadAll(req.Body) if err != nil { return nil, err } - err = w.Close() - if err != nil { - return nil, err + if req.Header.Get(headerContentEncoding) == contentEncodingGzip { + data, err = decompressGzip(data) + if err != nil { + return nil, err + } } - return buf.Bytes(), nil + return data, nil } func compressGzip(data []byte) ([]byte, error) { var buf bytes.Buffer w := gzip.NewWriter(&buf) - _, err := w.Write(data) if err != nil { return nil, err @@ -433,7 +405,7 @@ func (s *server) handlePlainHTTPRequest(req *http.Request, w http.ResponseWriter // Send the response. w.Header().Set(headerContentType, contentTypeProtobuf) if req.Header.Get(headerAcceptEncoding) == contentEncodingGzip { - bodyBytes, err = s.compressGzip(bodyBytes) + bodyBytes, err = compressGzip(bodyBytes) if err != nil { s.logger.Errorf(req.Context(), "Cannot compress response: %v", err) w.WriteHeader(http.StatusInternalServerError) diff --git a/server/serverimpl_test.go b/server/serverimpl_test.go index ce99e598..74b1f1cc 100644 --- a/server/serverimpl_test.go +++ b/server/serverimpl_test.go @@ -964,8 +964,11 @@ func TestServerHonoursAcceptEncoding(t *testing.T) { // Verify the received message is what was sent. assert.True(t, proto.Equal(rcvMsg.Load().(proto.Message), &sendMsg)) - // Read and decompress the gzip response - b, err = srv.decompressGzip(resp.Body) + // Read Server's response. + b, err = io.ReadAll(resp.Body) + require.NoError(t, err) + // Decompress the gzip response + b, err = decompressGzip(b) require.NoError(t, err) assert.EqualValues(t, http.StatusOK, resp.StatusCode) @@ -1395,17 +1398,3 @@ func TestServerTLS(t *testing.T) { eventually(t, func() bool { return atomic.LoadInt32(&onCloseCalled) == 1 }) } - -func BenchmarkCompressGzip(b *testing.B) { - input := []byte("Hello, World!") - s := New(nil) - b.ResetTimer() - b.ReportAllocs() - - for range b.N { - p, err := s.compressGzip(input) - if p == nil || err != nil { - b.Fatal(err) - } - } -} diff --git a/server/types/connection.go b/server/types/connection.go index b244a901..2b2bd814 100644 --- a/server/types/connection.go +++ b/server/types/connection.go @@ -20,10 +20,6 @@ type Connection interface { // Should return as soon as possible if the ctx is cancelled. Send(ctx context.Context, message *protobufs.ServerToAgent) error - // SendClose sends a close control frame with the CloseCode set to going away. - // This should be used to signal a server shutdown. - SendClose() error - // Disconnect closes the network connection. // Any blocked Read or Write operations will be unblocked and return errors. Disconnect() error diff --git a/server/wsconnection.go b/server/wsconnection.go index 160fb1e0..c721a043 100644 --- a/server/wsconnection.go +++ b/server/wsconnection.go @@ -5,7 +5,6 @@ import ( "net" "sync" "sync/atomic" - "time" "github.com/gorilla/websocket" @@ -41,13 +40,6 @@ func (c *wsConnection) Send(_ context.Context, message *protobufs.ServerToAgent) return internal.WriteWSMessage(c.wsConn, message) } -func (c *wsConnection) SendClose() error { - if c.closed.Load() { - return nil - } - return c.wsConn.WriteControl(websocket.CloseMessage, websocket.FormatCloseMessage(websocket.CloseGoingAway, "Server shutting down"), time.Time{}) -} - func (c *wsConnection) Disconnect() error { if !c.closed.CompareAndSwap(false, true) { return nil From f148d9a9dd8c27f436e55877077a4e0c8f9ecfbd Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 7 Jan 2026 14:18:00 -0600 Subject: [PATCH 08/22] Fix comments --- internal/examples/agent/agent/agent.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/examples/agent/agent/agent.go b/internal/examples/agent/agent/agent.go index 2c513dfa..8939e0eb 100644 --- a/internal/examples/agent/agent/agent.go +++ b/internal/examples/agent/agent/agent.go @@ -104,21 +104,21 @@ func WithLogger(l types.Logger) Option { } } -// WithLogger is used to set an Agent's type +// WithAgentType is used to set an Agent's type func WithAgentType(s string) Option { return func(agent *Agent) { agent.agentType = s } } -// WithLogger is used to set an Agent's version +// WithAgentVersion is used to set an Agent's version func WithAgentVersion(s string) Option { return func(agent *Agent) { agent.agentVersion = s } } -// WithLogger is used to set an Agent's id +// WithInstanceID is used to set an Agent's id func WithInstanceID(id uuid.UUID) Option { return func(agent *Agent) { agent.instanceId = id From 72aaae6a22a72692e49881e8c24a0b3d347f68d9 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Thu, 8 Jan 2026 09:39:39 -0600 Subject: [PATCH 09/22] Add connection count intstrumentation to example server --- internal/examples/agent/agent/agent.go | 11 ----- internal/examples/go.mod | 23 ++++++---- internal/examples/go.sum | 46 +++++++++++-------- internal/examples/scale/main.go | 8 ++-- internal/examples/server/opampsrv/opampsrv.go | 38 ++++++++++++++- 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/internal/examples/agent/agent/agent.go b/internal/examples/agent/agent/agent.go index 8939e0eb..f5bd9b6f 100644 --- a/internal/examples/agent/agent/agent.go +++ b/internal/examples/agent/agent/agent.go @@ -56,7 +56,6 @@ const ( type Agent struct { client client.OpAMPClient logger types.Logger - doneCh chan struct{} agentType string agentVersion string @@ -453,24 +452,14 @@ func (agent *Agent) Start() error { } agent.logger.Debugf(context.Background(), "Agent starting, id=%v, type=%s, version=%s.", agent.instanceId, agent.agentType, agent.agentVersion) - agent.doneCh = make(chan struct{}) return agent.connect(withTLSConfig(tls)) } func (agent *Agent) Shutdown() { - if agent.doneCh == nil { - agent.logger.Debugf(context.Background(), "Agent not running.") - return - } agent.logger.Debugf(context.Background(), "Agent shutting down...") if agent.client != nil { _ = agent.client.Stop(context.Background()) } - close(agent.doneCh) -} - -func (agent *Agent) Wait() { - <-agent.doneCh } // requestClientCertificate sets a request to be sent to the Server to create diff --git a/internal/examples/go.mod b/internal/examples/go.mod index d0de902a..2ef8ded0 100644 --- a/internal/examples/go.mod +++ b/internal/examples/go.mod @@ -1,6 +1,8 @@ module github.com/open-telemetry/opamp-go/internal/examples -go 1.23.0 +go 1.24.0 + +toolchain go1.24.3 require ( github.com/cenkalti/backoff/v4 v4.3.0 @@ -9,24 +11,26 @@ require ( github.com/oklog/ulid/v2 v2.1.0 github.com/open-telemetry/opamp-go v0.1.0 github.com/shirou/gopsutil v3.21.11+incompatible - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.1 go.opentelemetry.io/collector/config/configopaque v1.38.0 go.opentelemetry.io/collector/config/configtls v1.38.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 - go.opentelemetry.io/otel v1.24.0 + go.opentelemetry.io/otel v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 - go.opentelemetry.io/otel/metric v1.24.0 - go.opentelemetry.io/otel/sdk v1.24.0 - go.opentelemetry.io/otel/sdk/metric v1.24.0 + go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 + go.opentelemetry.io/otel/metric v1.39.0 + go.opentelemetry.io/otel/sdk v1.39.0 + go.opentelemetry.io/otel/sdk/metric v1.39.0 google.golang.org/protobuf v1.36.7 ) require ( + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/foxboron/go-tpm-keyfiles v0.0.0-20250323135004-b31fac66206e // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -41,11 +45,12 @@ require ( github.com/tklauser/go-sysconf v0.3.9 // indirect github.com/tklauser/numcpus v0.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.opentelemetry.io/otel/trace v1.24.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/trace v1.39.0 // indirect go.opentelemetry.io/proto/otlp v1.1.0 // indirect golang.org/x/crypto v0.35.0 // indirect golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.30.0 // indirect + golang.org/x/sys v0.39.0 // indirect golang.org/x/text v0.22.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect diff --git a/internal/examples/go.sum b/internal/examples/go.sum index d325a70b..18833950 100644 --- a/internal/examples/go.sum +++ b/internal/examples/go.sum @@ -15,6 +15,8 @@ github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAm github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -32,8 +34,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -51,8 +53,8 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-tpm v0.9.5 h1:ocUmnDebX54dnW+MQWGQRbdaAcJELsa6PqZhJ48KwVU= github.com/google/go-tpm v0.9.5/go.mod h1:h9jEsEECg7gtLis0upRBQU+GhYVH6jMjrFxI8u6bVUY= github.com/google/go-tpm-tools v0.4.4 h1:oiQfAIkc6xTy9Fl5NKTeTJkBTlXdHsxAofmQyxBKY98= @@ -128,8 +130,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/rhnvrm/simples3 v0.6.1/go.mod h1:Y+3vYm2V7Y4VijFoJHHTrja6OgPrJ2cBti8dPGkC3sA= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= @@ -142,32 +144,36 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tklauser/go-sysconf v0.3.9 h1:JeUVdAOWhhxVcU6Eqr/ATFHgXk/mmiItdKeJPev3vTo= github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs= github.com/tklauser/numcpus v0.3.0 h1:ILuRUQBtssgnxw0XXIjKUC56fgnOrFoQQ/4+DeU2biQ= github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/collector/config/configopaque v1.38.0 h1:qLefkP4XNCud1Dge6b6lOU1KptUfAHtVWNs9iGAYYqY= go.opentelemetry.io/collector/config/configopaque v1.38.0/go.mod h1:aAOmM/mSWE2F3A58x4MUw1bYW8TIjVxn5/WfgxRgMu0= go.opentelemetry.io/collector/config/configtls v1.38.0 h1:bn5/oCLpAI+0LVg9q7dySZXi2swNWn6qmvkoq7A8/84= go.opentelemetry.io/collector/config/configtls v1.38.0/go.mod h1:dkV33BhlveIfNTNUjBMYtRrVNVsRwnXpPLxkhLbZcPk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= -go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= -go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 h1:mM8nKi6/iFQ0iqst80wDHU2ge198Ye/TfN0WBS5U24Y= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0/go.mod h1:0PrIIzDteLSmNyxqcGYRL4mDIo8OTuBAOI/Bn1URxac= -go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= -go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= -go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= -go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= -go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8= -go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0= -go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= -go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 h1:5gn2urDL/FBnK8OkCfD1j3/ER79rUuTYmCvlXBKeYL8= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0/go.mod h1:0fBG6ZJxhqByfFZDwSwpZGzJU671HkwpWaNe2t4VUPI= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= +go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -204,8 +210,8 @@ golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index 4cdc4c00..19a0c863 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -92,20 +92,18 @@ func main() { a := agent.NewAgent(cfg, agent.WithNoClientCertRequest(), agent.WithInstanceID(id), agent.WithLogger(agentLogger)) if err := a.Start(); err != nil { + // start errors currently only occur if there is a TLS config error, or the URL scheme is incorrect + // If the server is unavailable, an agent will retry logger.Printf("Error starting agent: %v\n", err) continue } agents = append(agents, a) } logger.Printf("%d agents started", len(agents)) + <-ctx.Done() for _, a := range agents { a.Shutdown() } logger.Println("All agents stopped") - - for _, a := range agents { - a.Wait() - } - logger.Println("All agents terminated cleanly") } diff --git a/internal/examples/server/opampsrv/opampsrv.go b/internal/examples/server/opampsrv/opampsrv.go index b68eb433..5dc4b927 100644 --- a/internal/examples/server/opampsrv/opampsrv.go +++ b/internal/examples/server/opampsrv/opampsrv.go @@ -8,6 +8,12 @@ import ( "github.com/oklog/ulid/v2" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + otelresource "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" "github.com/open-telemetry/opamp-go/internal/examples/certs" "github.com/open-telemetry/opamp-go/internal/examples/server/data" @@ -20,6 +26,7 @@ type Server struct { opampSrv server.OpAMPServer agents *data.Agents logger *Logger + counter metric.Int64UpDownCounter } func NewServer(agents *data.Agents) *Server { @@ -30,10 +37,35 @@ func NewServer(agents *data.Agents) *Server { log.Default().Flags()|log.Lmsgprefix|log.Lmicroseconds, ), } + resource, err := otelresource.New(context.Background(), + otelresource.WithAttributes( + semconv.ServiceNameKey.String("io.opentelemetry.opampserver"), + semconv.ServiceVersionKey.String("0.1.0"), + ), + ) + if err != nil { + panic(err) + } + exporter, err := stdoutmetric.New(stdoutmetric.WithPrettyPrint()) + if err != nil { + panic(err) + } + + meterProvider := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(resource), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), + ) + otel.SetMeterProvider(meterProvider) + meter := otel.Meter("opamp") + counter, err := meter.Int64UpDownCounter("connections.active.count") + if err != nil { + panic(err) + } srv := &Server{ - agents: agents, - logger: logger, + agents: agents, + logger: logger, + counter: counter, } srv.opampSrv = server.New(logger) @@ -49,6 +81,7 @@ func (srv *Server) Start() { return types.ConnectionResponse{ Accept: true, ConnectionCallbacks: types.ConnectionCallbacks{ + OnConnected: func(ctx context.Context, conn types.Connection) { srv.counter.Add(ctx, 1) }, OnMessage: srv.onMessage, OnConnectionClose: srv.onDisconnect, }, @@ -80,6 +113,7 @@ func (srv *Server) Stop() { } func (srv *Server) onDisconnect(conn types.Connection) { + srv.counter.Add(context.Background(), -1) srv.agents.RemoveConnection(conn) } From 0225f007b7980fc2fc8b4f99de12698a43b78906 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Thu, 8 Jan 2026 11:49:58 -0600 Subject: [PATCH 10/22] Control scale agent verbosity --- internal/examples/scale/logger.go | 14 ++++++++ internal/examples/scale/main.go | 29 +++++++++++++-- internal/examples/server/opampsrv/opampsrv.go | 35 ++++++++++++++----- 3 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 internal/examples/scale/logger.go diff --git a/internal/examples/scale/logger.go b/internal/examples/scale/logger.go new file mode 100644 index 00000000..bd3821a7 --- /dev/null +++ b/internal/examples/scale/logger.go @@ -0,0 +1,14 @@ +package main + +import ( + "context" + + "github.com/open-telemetry/opamp-go/client/types" +) + +var _ types.Logger = &NOPLogger{} + +type NOPLogger struct{} + +func (*NOPLogger) Debugf(_ context.Context, format string, v ...interface{}) {} +func (*NOPLogger) Errorf(_ context.Context, format string, v ...interface{}) {} diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index 19a0c863..6ffcefc7 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -13,8 +13,14 @@ import ( "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" "github.com/open-telemetry/opamp-go/internal/examples/config" "go.opentelemetry.io/collector/config/configtls" + "go.opentelemetry.io/otel" ) +// nopErrorHandler is used to turn any otel errors generated by scale agents into a nop +type nopErrorHandler struct{} + +func (l *nopErrorHandler) Handle(err error) {} + func main() { logger := log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) @@ -27,6 +33,9 @@ func main() { var heartbeat time.Duration flag.DurationVar(&heartbeat, "heartbeat", time.Second*30, "Heartbeat duration") + var verboseAgents bool + flag.BoolVar(&verboseAgents, "verbose-agents", false, "Enable agent logging.") + var tlsInsecure bool flag.BoolVar(&tlsInsecure, "tls-insecure", false, "Disable the client transport security.") @@ -73,6 +82,14 @@ func main() { }, } + nopLogger := &NOPLogger{} + + // Silence the otel errors agents can generate. + // i.e.: failed to upload metrics: ... + if !verboseAgents { + otel.SetErrorHandler(&nopErrorHandler{}) + } + logger.Printf("Starting %d agents", agentCount) // Create a slice to track agents so we can safely stop them later. // Use of slice instead of a concurrent goroutine to reduce memory usage. @@ -88,8 +105,16 @@ func main() { if err != nil { panic(err) } - agentLogger := agent.NewScaleLogger(id) - a := agent.NewAgent(cfg, agent.WithNoClientCertRequest(), agent.WithInstanceID(id), agent.WithLogger(agentLogger)) + opts := []agent.Option{ + agent.WithNoClientCertRequest(), + agent.WithInstanceID(id), + } + if verboseAgents { + opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) + } else { + opts = append(opts, agent.WithLogger(nopLogger)) + } + a := agent.NewAgent(cfg, opts...) if err := a.Start(); err != nil { // start errors currently only occur if there is a TLS config error, or the URL scheme is incorrect diff --git a/internal/examples/server/opampsrv/opampsrv.go b/internal/examples/server/opampsrv/opampsrv.go index 5dc4b927..f83b2d04 100644 --- a/internal/examples/server/opampsrv/opampsrv.go +++ b/internal/examples/server/opampsrv/opampsrv.go @@ -9,6 +9,7 @@ import ( "github.com/oklog/ulid/v2" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" "go.opentelemetry.io/otel/metric" sdkmetric "go.opentelemetry.io/otel/sdk/metric" @@ -22,11 +23,17 @@ import ( "github.com/open-telemetry/opamp-go/server/types" ) +var ( + opampReadErrAttr = attribute.String("error.type", "read") + opampRespErrAttr = attribute.String("error.type", "resp") +) + type Server struct { - opampSrv server.OpAMPServer - agents *data.Agents - logger *Logger - counter metric.Int64UpDownCounter + opampSrv server.OpAMPServer + agents *data.Agents + logger *Logger + counter metric.Int64UpDownCounter + errCounter metric.Int64Counter } func NewServer(agents *data.Agents) *Server { @@ -62,10 +69,16 @@ func NewServer(agents *data.Agents) *Server { panic(err) } + errCounter, err := meter.Int64Counter("connections.error.count") + if err != nil { + panic(err) + } + srv := &Server{ - agents: agents, - logger: logger, - counter: counter, + agents: agents, + logger: logger, + counter: counter, + errCounter: errCounter, } srv.opampSrv = server.New(logger) @@ -81,9 +94,15 @@ func (srv *Server) Start() { return types.ConnectionResponse{ Accept: true, ConnectionCallbacks: types.ConnectionCallbacks{ - OnConnected: func(ctx context.Context, conn types.Connection) { srv.counter.Add(ctx, 1) }, + OnConnected: func(ctx context.Context, _ types.Connection) { srv.counter.Add(ctx, 1) }, OnMessage: srv.onMessage, OnConnectionClose: srv.onDisconnect, + OnReadMessageError: func(_ types.Connection, _ int, _ []byte, _ error) { + srv.errCounter.Add(context.Background(), 1, metric.WithAttributes(opampReadErrAttr)) + }, + OnMessageResponseError: func(_ types.Connection, _ *protobufs.ServerToAgent, _ error) { + srv.errCounter.Add(context.Background(), 1, metric.WithAttributes(opampRespErrAttr)) + }, }, } }, From 1aef6c81e75868bebf249cf10d885fdf000c3e20 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 9 Jan 2026 12:39:30 -0600 Subject: [PATCH 11/22] Use internal NopLogger --- internal/examples/go.mod | 2 -- internal/examples/scale/logger.go | 14 -------------- internal/examples/scale/main.go | 3 ++- 3 files changed, 2 insertions(+), 17 deletions(-) delete mode 100644 internal/examples/scale/logger.go diff --git a/internal/examples/go.mod b/internal/examples/go.mod index 2ef8ded0..65237854 100644 --- a/internal/examples/go.mod +++ b/internal/examples/go.mod @@ -2,8 +2,6 @@ module github.com/open-telemetry/opamp-go/internal/examples go 1.24.0 -toolchain go1.24.3 - require ( github.com/cenkalti/backoff/v4 v4.3.0 github.com/google/uuid v1.6.0 diff --git a/internal/examples/scale/logger.go b/internal/examples/scale/logger.go deleted file mode 100644 index bd3821a7..00000000 --- a/internal/examples/scale/logger.go +++ /dev/null @@ -1,14 +0,0 @@ -package main - -import ( - "context" - - "github.com/open-telemetry/opamp-go/client/types" -) - -var _ types.Logger = &NOPLogger{} - -type NOPLogger struct{} - -func (*NOPLogger) Debugf(_ context.Context, format string, v ...interface{}) {} -func (*NOPLogger) Errorf(_ context.Context, format string, v ...interface{}) {} diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index 6ffcefc7..2c3023ea 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -10,6 +10,7 @@ import ( "time" "github.com/google/uuid" + opampinternal "github.com/open-telemetry/opamp-go/internal" "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" "github.com/open-telemetry/opamp-go/internal/examples/config" "go.opentelemetry.io/collector/config/configtls" @@ -82,7 +83,7 @@ func main() { }, } - nopLogger := &NOPLogger{} + nopLogger := &opampinternal.NopLogger{} // Silence the otel errors agents can generate. // i.e.: failed to upload metrics: ... From 853d54e6cecaa11d1b6fcb66f4ce293be0412ac2 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Tue, 13 Jan 2026 12:29:40 -0800 Subject: [PATCH 12/22] Cleanup scale test driver, provide dockerfile, adjust makefile target --- internal/examples/Dockerfile.scale | 21 +++ internal/examples/docker-compose.yml | 22 ++- internal/examples/makefile | 18 ++- internal/examples/scale/README.md | 20 ++- internal/examples/scale/main.go | 205 ++++++++++++++++++--------- makefile | 4 + 6 files changed, 206 insertions(+), 84 deletions(-) create mode 100644 internal/examples/Dockerfile.scale diff --git a/internal/examples/Dockerfile.scale b/internal/examples/Dockerfile.scale new file mode 100644 index 00000000..63099f4b --- /dev/null +++ b/internal/examples/Dockerfile.scale @@ -0,0 +1,21 @@ +FROM golang:1.24-bookworm AS builder + +WORKDIR /src + +COPY ./ ./ + +WORKDIR /src/internal/examples + +ENV GOPROXY="https://proxy.golang.org,direct" + +RUN go mod download + +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -o scale/bin/scale scale/main.go + +FROM gcr.io/distroless/static-debian12:nonroot + +WORKDIR /app + +COPY --from=builder /src/internal/examples/scale/bin/scale ./ + +ENTRYPOINT ["./scale"] diff --git a/internal/examples/docker-compose.yml b/internal/examples/docker-compose.yml index 5018047d..1581fb55 100644 --- a/internal/examples/docker-compose.yml +++ b/internal/examples/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: opamp-server: build: @@ -17,7 +15,8 @@ services: build: context: ../.. dockerfile: internal/examples/Dockerfile.agent - entrypoint: + container_name: opamp-agent + entrypoint: - "./agent" - "--endpoint" - "wss://opamp-server:4320/v1/opamp" @@ -27,8 +26,21 @@ services: depends_on: - opamp-server restart: unless-stopped - deploy: - replicas: 1 + + opamp-scale: + build: + context: ../.. + dockerfile: internal/examples/Dockerfile.scale + container_name: opamp-scale + environment: + SCALE_AGENT_COUNT: "${SCALE_AGENT_COUNT:-1000}" + networks: + - opamp-network + profiles: + - scale + depends_on: + - opamp-server + restart: unless-stopped networks: opamp-network: diff --git a/internal/examples/makefile b/internal/examples/makefile index 78fe17fb..1d36c621 100644 --- a/internal/examples/makefile +++ b/internal/examples/makefile @@ -7,7 +7,7 @@ SRC_ROOT := $(shell git rev-parse --show-toplevel) # Build targets .PHONY: build-examples -build-examples: build-example-agent build-example-supervisor build-example-server +build-examples: build-example-agent build-example-supervisor build-example-server build-example-scale .PHONY: build-example-agent build-example-agent: @@ -21,6 +21,10 @@ build-example-supervisor: build-example-server: $(GOCMD) build -o server/bin/server server/main.go +.PHONY: build-example-scale +build-example-scale: + $(GOCMD) build -o scale/bin/scale scale/main.go + .PHONY: run-examples run-examples: build-examples server/bin/server & @@ -57,11 +61,11 @@ run-docker: build-docker-server build-docker-agent .PHONY: docker-compose-up docker-compose-up: - cd $(SRC_ROOT)/internal/examples && docker compose up --build + cd $(SRC_ROOT)/internal/examples && docker compose up --build -d .PHONY: docker-compose-down docker-compose-down: - cd $(SRC_ROOT)/internal/examples && docker compose down + cd $(SRC_ROOT)/internal/examples && docker compose --profile scale down .PHONY: docker-compose-logs docker-compose-logs: @@ -69,9 +73,9 @@ docker-compose-logs: .PHONY: docker-compose-scale docker-compose-scale: - @echo "Usage: make docker-compose-scale AGENTS=" + @echo "Usage: make docker-compose-scale [AGENTS=]" @if [ -z "$(AGENTS)" ]; then \ - echo "Error: AGENTS variable not set"; \ - exit 1; \ + cd $(SRC_ROOT)/internal/examples && docker compose --profile scale up -d --build; \ + else \ + cd $(SRC_ROOT)/internal/examples && SCALE_AGENT_COUNT=$(AGENTS) docker compose --profile scale up -d --build; \ fi - cd $(SRC_ROOT)/internal/examples && docker compose up --scale opamp-agent=$(AGENTS) -d diff --git a/internal/examples/scale/README.md b/internal/examples/scale/README.md index b817d10e..a67730c4 100644 --- a/internal/examples/scale/README.md +++ b/internal/examples/scale/README.md @@ -9,18 +9,24 @@ The main driver logs to stdout, and all agents log to stderr. ## Usage +Configuration may be specified through command line flags, or environment variables. + +Configuration load priority is: `environment variables > flags > defaults`. + ``` -scale \ +Usage of scale: -agent-count uint - The number of agents to start. (default 1000) + The number of agents to start (env var: SCALE_AGENT_COUNT). (default 1000) -heartbeat duration - Heartbeat duration (default 30s) + Heartbeat duration (env var: SCALE_HEARTBEAT). (default 30s) -server-url string - OpAMP server URL (default "wss://127.0.0.1:4320/v1/opamp") + OpAMP server URL (env var: SCALE_SERVER_URL). (default "wss://127.0.0.1:4320/v1/opamp") -tls-ca_file string - Path to the CA cert. It verifies the server certificate + Path to the OpAMP server CA cert (env var: SCALE_TLS_CA_FILE). -tls-insecure - Disable the client transport security. + Disable the client transport security (env var: SCALE_TLS_INSECURE). -tls-insecure_skip_verify - Will enable TLS but not verify the certificate. + Will enable TLS but not verify the certificate (env var: SCALE_TLS_INSECURE_SKIP_VERIFY). + -verbose-agents + Enable agent logging (env var: SCALE_VERBOSE_AGENTS). ``` diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go index 2c3023ea..89d41bcd 100644 --- a/internal/examples/scale/main.go +++ b/internal/examples/scale/main.go @@ -2,132 +2,207 @@ package main import ( "context" + "errors" "flag" + "fmt" "log" "net/url" "os" "os/signal" + "strconv" "time" - "github.com/google/uuid" opampinternal "github.com/open-telemetry/opamp-go/internal" "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" "github.com/open-telemetry/opamp-go/internal/examples/config" + + "github.com/google/uuid" "go.opentelemetry.io/collector/config/configtls" "go.opentelemetry.io/otel" ) -// nopErrorHandler is used to turn any otel errors generated by scale agents into a nop +// nopErrorHandler is used to turn any otel errors generated by scale agents into a nop. type nopErrorHandler struct{} func (l *nopErrorHandler) Handle(err error) {} -func main() { - logger := log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) - - var agentCount uint64 - flag.Uint64Var(&agentCount, "agent-count", 1000, "The number of agents to start.") - - var serverURL string - flag.StringVar(&serverURL, "server-url", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server URL") +// scaleConfig are all the flags/env vars that can be used to configure the scale test driver. +type scaleConfig struct { + agentCount uint64 + serverURL string + heartbeat time.Duration + verboseAgents bool + tlsInsecure bool + tlsInsecureSkipVerify bool + tlsCAFile string +} - var heartbeat time.Duration - flag.DurationVar(&heartbeat, "heartbeat", time.Second*30, "Heartbeat duration") +// verifyArgs checks that all scaleConfig options are valid. +func (cfg scaleConfig) verifyArgs() error { + if cfg.agentCount == 0 { + return errors.New("agent count must not be zero") + } + parsedURL, err := url.Parse(cfg.serverURL) + if err != nil { + return fmt.Errorf("server-url failed to parse: %w", err) + } + switch parsedURL.Scheme { + case "http", "https": + case "ws", "wss": + default: + return fmt.Errorf("server-url has an unknown scheme: %s", parsedURL.Scheme) + } - var verboseAgents bool - flag.BoolVar(&verboseAgents, "verbose-agents", false, "Enable agent logging.") + if cfg.heartbeat < 0 { + return fmt.Errorf("heartbeat must be non-negative, got: %s", cfg.heartbeat) + } - var tlsInsecure bool - flag.BoolVar(&tlsInsecure, "tls-insecure", false, "Disable the client transport security.") + if cfg.tlsCAFile != "" { + fi, err := os.Stat(cfg.tlsCAFile) + if err != nil { + return fmt.Errorf("tls-ca_file stat failed: %w", err) + } + if fi.IsDir() { + return fmt.Errorf("tls-ca_file: %s is a directory", cfg.tlsCAFile) + } + } - var tlsInsecureSkipVerify bool - flag.BoolVar(&tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate.") + return nil +} - var tlsCAFile string - flag.StringVar(&tlsCAFile, "tls-ca_file", "", "Path to the CA cert. It verifies the server certificate") +// loadEnv will attempt to load scaleConfig options from environment variables. +func loadEnv(cfg *scaleConfig) { + if s, ok := os.LookupEnv("SCALE_AGENT_COUNT"); ok { + count, err := strconv.ParseUint(s, 10, 64) + if err == nil { + cfg.agentCount = count + } + } - flag.Parse() + if s, ok := os.LookupEnv("SCALE_SERVER_URL"); ok { + cfg.serverURL = s + } - // Verify args - if agentCount == 0 { - logger.Fatal("Arg: agent-count must not be zero") + if s, ok := os.LookupEnv("SCALE_HEARTBEAT"); ok { + dur, err := time.ParseDuration(s) + if err == nil { + cfg.heartbeat = dur + } } - parsedURL, err := url.Parse(serverURL) - if err != nil { - logger.Fatalf("Arg: server-url failed to parse: %v", err) + if s, ok := os.LookupEnv("SCALE_VERBOSE_AGENTS"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.verboseAgents = b + } } - switch parsedURL.Scheme { - case "http", "https": - case "ws", "wss": - default: - logger.Fatalf("Arg: server-url has an unknown scheme: %v", parsedURL.Scheme) + + if s, ok := os.LookupEnv("SCALE_TLS_INSECURE"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.tlsInsecure = b + } } - if heartbeat < 0 { - logger.Fatalf("Arg: heartbeat must be non-negative, got %s", heartbeat) + if s, ok := os.LookupEnv("SCALE_TLS_INSECURE_SKIP_VERIFY"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.tlsInsecureSkipVerify = b + } } - ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) - defer cancel() + if s, ok := os.LookupEnv("SCALE_TLS_CA_FILE"); ok { + cfg.tlsCAFile = s + } +} - cfg := &config.AgentConfig{ - Endpoint: serverURL, - HeartbeatInterval: &heartbeat, +// runAgents starts and returns the configured amount of agents. +// If an error is encountered when starting an agent, it is return along with all started agents. +func runAgents(ctx context.Context, cfg scaleConfig) ([]*agent.Agent, error) { + nopLogger := &opampinternal.NopLogger{} + agentConfig := &config.AgentConfig{ + Endpoint: cfg.serverURL, + HeartbeatInterval: &cfg.heartbeat, TLSSetting: configtls.ClientConfig{ - Insecure: tlsInsecure, - InsecureSkipVerify: tlsInsecureSkipVerify, + Insecure: cfg.tlsInsecure, + InsecureSkipVerify: cfg.tlsInsecureSkipVerify, Config: configtls.Config{ - CAFile: tlsCAFile, + CAFile: cfg.tlsCAFile, }, }, } - nopLogger := &opampinternal.NopLogger{} - - // Silence the otel errors agents can generate. - // i.e.: failed to upload metrics: ... - if !verboseAgents { - otel.SetErrorHandler(&nopErrorHandler{}) - } - - logger.Printf("Starting %d agents", agentCount) - // Create a slice to track agents so we can safely stop them later. // Use of slice instead of a concurrent goroutine to reduce memory usage. - agents := make([]*agent.Agent, 0, agentCount) - for range agentCount { + agents := make([]*agent.Agent, 0, cfg.agentCount) + var err error + for range cfg.agentCount { select { - case <-ctx.Done(): // early termination - return + case <-ctx.Done(): + return agents, err default: } id, err := uuid.NewV7() if err != nil { - panic(err) + return nil, err } + opts := []agent.Option{ agent.WithNoClientCertRequest(), agent.WithInstanceID(id), } - if verboseAgents { + + if cfg.verboseAgents { opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) } else { opts = append(opts, agent.WithLogger(nopLogger)) } - a := agent.NewAgent(cfg, opts...) - - if err := a.Start(); err != nil { - // start errors currently only occur if there is a TLS config error, or the URL scheme is incorrect - // If the server is unavailable, an agent will retry - logger.Printf("Error starting agent: %v\n", err) + a := agent.NewAgent(agentConfig, opts...) + if startErr := a.Start(); err != nil { + err = errors.Join(err, startErr) continue } agents = append(agents, a) } + return agents, err +} + +func main() { + var cfg scaleConfig + flag.Uint64Var(&cfg.agentCount, "agent-count", 1000, "The number of agents to start (env var: SCALE_AGENT_COUNT).") + flag.StringVar(&cfg.serverURL, "server-url", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server URL (env var: SCALE_SERVER_URL).") + flag.DurationVar(&cfg.heartbeat, "heartbeat", time.Second*30, "Heartbeat duration (env var: SCALE_HEARTBEAT).") + flag.BoolVar(&cfg.verboseAgents, "verbose-agents", false, "Enable agent logging (env var: SCALE_VERBOSE_AGENTS).") + flag.BoolVar(&cfg.tlsInsecure, "tls-insecure", false, "Disable the client transport security (env var: SCALE_TLS_INSECURE).") + flag.BoolVar(&cfg.tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate (env var: SCALE_TLS_INSECURE_SKIP_VERIFY).") + flag.StringVar(&cfg.tlsCAFile, "tls-ca_file", "", "Path to the OpAMP server CA cert (env var: SCALE_TLS_CA_FILE).") + + flag.Parse() + loadEnv(&cfg) + + logger := log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) + if err := cfg.verifyArgs(); err != nil { + logger.Fatalf("Arg verification error: %v", err) + } + + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + // Silence the otel errors agents can generate. + // i.e.: failed to upload metrics: ... + if !cfg.verboseAgents { + otel.SetErrorHandler(&nopErrorHandler{}) + } + + logger.Printf("Starting %d agents", cfg.agentCount) + + agents, err := runAgents(ctx, cfg) + if err != nil { + logger.Printf("Error starting agents: %v", err) + } logger.Printf("%d agents started", len(agents)) - <-ctx.Done() + <-ctx.Done() // wait for SIGINT for _, a := range agents { a.Shutdown() } diff --git a/makefile b/makefile index c614a334..9962a406 100644 --- a/makefile +++ b/makefile @@ -50,6 +50,10 @@ build-example-supervisor: build-example-server: $(MAKE) -C internal/examples build-example-server +.PHONY: build-example-scale +build-example-scale: + $(MAKE) -C internal/examples build-example-scale + .PHONY: run-examples run-examples: $(MAKE) -C internal/examples run-examples From 3e3d5ac91ba6eb14ff67ae9ccb1086c8fa957670 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 16 Jan 2026 12:41:22 -0800 Subject: [PATCH 13/22] Allow example agent to be started in scale-test mode --- internal/examples/agent/README.md | 42 ++++ internal/examples/agent/main.go | 310 ++++++++++++++++++++++++--- internal/examples/docker-compose.yml | 18 +- internal/examples/makefile | 16 +- internal/examples/scale/README.md | 32 --- internal/examples/scale/main.go | 210 ------------------ makefile | 4 - 7 files changed, 329 insertions(+), 303 deletions(-) create mode 100644 internal/examples/agent/README.md delete mode 100644 internal/examples/scale/README.md delete mode 100644 internal/examples/scale/main.go diff --git a/internal/examples/agent/README.md b/internal/examples/agent/README.md new file mode 100644 index 00000000..4d1fa934 --- /dev/null +++ b/internal/examples/agent/README.md @@ -0,0 +1,42 @@ +# agent + +Agent provides provides and example agent implementation for the OpAMP protocol. + +Both HTTP and Websocket connections are supported. + +The example agent can be in a normal mode; where the binary starts a single agent, or in scale mode (when `-run-scale` is passed or `AGENT_RUN_SCALE=true` is set). + +When in scale mode the process will start multiple agents (up to `-agent-scale-count/AGENT_SCALE_COUNT`) in the same process. +All agents will use the same scheme when connection to the OpAMP server (HTTP/Websocket). + +In scale mode, the agent orchestartor will log to stdout, and the agents to stderr. + +## Usage + +``` +Usage of agent: + -endpoint string + OpAMP server endpoint URL (env var: AGENT_ENDPOINT). (default "wss://127.0.0.1:4320/v1/opamp") + -heartbeat duration + Heartbeat duration (env var: AGENT_HEARTBEAT). (default 30s) + -quite-agent + Disable agent logger (env var: AGENT_QUITE). + -run-scale + Run in scale-test mode (env var: AGENT_RUN_SCALE). + -scale-count uint + The number of agents to start in scale mode (env var: AGENT_SCALE_COUNT). (default 1000) + -t string + Agent Type String (env var: AGENT_TYPE). (default "io.opentelemetry.collector") + -tls-ca_file string + Path to the CA cert. It verifies the server certificate (env var: AGENT_TLS_CA_FILE). + -tls-cert_file string + Path to the TLS cert (env var: AGENT_TLS_CERT_FILE). + -tls-insecure + Disable the client transport security (env var: AGENT_TLS_INSECURE). + -tls-insecure_skip_verify + Will enable TLS but not verify the certificate (env var: AGENT_TLS_INSECURE_SKIP_VERIFY). + -tls-key_file string + Path to the TLS key (env var: AGENT_TLS_KEY_FILE). + -v string + Agent Version String (env var: AGENT_VERSION). (default "1.0.0") +``` diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index 88c56bd6..b0b91de9 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -1,63 +1,311 @@ package main import ( + "context" + "errors" "flag" + "fmt" "log" + "net/url" "os" "os/signal" + "strconv" + "time" + opampinternal "github.com/open-telemetry/opamp-go/internal" "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" "github.com/open-telemetry/opamp-go/internal/examples/config" + + "github.com/google/uuid" "go.opentelemetry.io/collector/config/configtls" + "go.opentelemetry.io/otel" ) -func main() { - var agentType string - flag.StringVar(&agentType, "t", "io.opentelemetry.collector", "Agent Type String") +// nopErrorHandler is used to turn any otel errors generated by scale agents into a nop. +type nopErrorHandler struct{} + +func (l *nopErrorHandler) Handle(err error) {} + +// flagConfig are all the flags/env vars that can be used to configure the agent process +type flagConfig struct { + // Agent config options + agentType string + agentVersion string + tlsInsecure bool + tlsInsecureSkipVerify bool + tlsCertFile string + tlsKeyFile string + tlsCAFile string + endpoint string + heartbeat time.Duration + quiteAgent bool + + // scale test options + runScale bool + scaleCount uint64 +} + +func (cfg flagConfig) verifyArgs() error { + if cfg.tlsCertFile != "" { + fi, err := os.Stat(cfg.tlsCertFile) + if err != nil { + return fmt.Errorf("tls-cert_file stat failed: %w", err) + } + if fi.IsDir() { + return fmt.Errorf("tls-cert_file: %s is a directory", cfg.tlsCertFile) + } + } + + if cfg.tlsKeyFile != "" { + fi, err := os.Stat(cfg.tlsKeyFile) + if err != nil { + return fmt.Errorf("tls-key_file stat failed: %w", err) + } + if fi.IsDir() { + return fmt.Errorf("tls-key_file: %s is a directory", cfg.tlsKeyFile) + } + } + + if cfg.tlsCAFile != "" { + fi, err := os.Stat(cfg.tlsCAFile) + if err != nil { + return fmt.Errorf("tls-ca_file stat failed: %w", err) + } + if fi.IsDir() { + return fmt.Errorf("tls-ca_file: %s is a directory", cfg.tlsCAFile) + } + } + + parsedURL, err := url.Parse(cfg.endpoint) + if err != nil { + return fmt.Errorf("endpoint failed to parse: %w", err) + } + switch parsedURL.Scheme { + case "http", "https": + case "ws", "wss": + default: + return fmt.Errorf("endpoint has an unknown scheme: %s", parsedURL.Scheme) + } + + if cfg.heartbeat < 0 { + return fmt.Errorf("heartbeat must be non-negative, got: %s", cfg.heartbeat) + } + + if cfg.runScale { + if cfg.scaleCount == 0 { + return errors.New("scale count must not be zero") + } + } + return nil +} + +// loadEnv will attempt to load config options from environment variables. +func loadEnv(cfg *flagConfig) { + if s, ok := os.LookupEnv("AGENT_TYPE"); ok { + cfg.agentType = s + } + + if s, ok := os.LookupEnv("AGENT_VERSION"); ok { + cfg.agentVersion = s + } - var agentVersion string - flag.StringVar(&agentVersion, "v", "1.0.0", "Agent Version String") + if s, ok := os.LookupEnv("AGENT_TLS_INSECURE"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.tlsInsecure = b + } + } + + if s, ok := os.LookupEnv("AGENT_TLS_INSECURE_SKIP_VERIFY"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.tlsInsecureSkipVerify = b + } + } + + if s, ok := os.LookupEnv("AGENT_TLS_CERT_FILE"); ok { + cfg.tlsCertFile = s + } + + if s, ok := os.LookupEnv("AGENT_TLS_KEY_FILE"); ok { + cfg.tlsKeyFile = s + } - var tlsInsecure bool - flag.BoolVar(&tlsInsecure, "tls-insecure", false, "Disable the client transport security.") + if s, ok := os.LookupEnv("AGENT_TLS_CA_FILE"); ok { + cfg.tlsCAFile = s + } - var tlsInsecureSkipVerify bool - flag.BoolVar(&tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate.") + if s, ok := os.LookupEnv("AGENT_ENDPOINT"); ok { + cfg.endpoint = s + } - var tlsCertFile string - flag.StringVar(&tlsCertFile, "tls-cert_file", "", "Path to the TLS cert") + if s, ok := os.LookupEnv("AGENT_HEARTBEAT"); ok { + dur, err := time.ParseDuration(s) + if err == nil { + cfg.heartbeat = dur + } + } - var tlsKeyFile string - flag.StringVar(&tlsKeyFile, "tls-key_file", "", "Path to the TLS key") + if s, ok := os.LookupEnv("AGENT_QUITE"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.quiteAgent = b + } + } - var tlsCAFile string - flag.StringVar(&tlsCAFile, "tls-ca_file", "", "Path to the CA cert. It verifies the server certificate") + if s, ok := os.LookupEnv("AGENT_RUN_SCALE"); ok { + b, err := strconv.ParseBool(s) + if err == nil { + cfg.runScale = b + } + } - var endpoint string - flag.StringVar(&endpoint, "endpoint", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server endpoint URL") + if s, ok := os.LookupEnv("AGENT_SCALE_COUNT"); ok { + count, err := strconv.ParseUint(s, 10, 64) + if err == nil { + cfg.scaleCount = count + } + } +} +func main() { + var cfg flagConfig + flag.StringVar(&cfg.agentType, "t", "io.opentelemetry.collector", "Agent Type String (env var: AGENT_TYPE).") + flag.StringVar(&cfg.agentVersion, "v", "1.0.0", "Agent Version String (env var: AGENT_VERSION).") + flag.BoolVar(&cfg.tlsInsecure, "tls-insecure", false, "Disable the client transport security (env var: AGENT_TLS_INSECURE).") + flag.BoolVar(&cfg.tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate (env var: AGENT_TLS_INSECURE_SKIP_VERIFY).") + flag.StringVar(&cfg.tlsCertFile, "tls-cert_file", "", "Path to the TLS cert (env var: AGENT_TLS_CERT_FILE).") + flag.StringVar(&cfg.tlsKeyFile, "tls-key_file", "", "Path to the TLS key (env var: AGENT_TLS_KEY_FILE).") + flag.StringVar(&cfg.tlsCAFile, "tls-ca_file", "", "Path to the CA cert. It verifies the server certificate (env var: AGENT_TLS_CA_FILE).") + flag.StringVar(&cfg.endpoint, "endpoint", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server endpoint URL (env var: AGENT_ENDPOINT).") + flag.DurationVar(&cfg.heartbeat, "heartbeat", time.Second*30, "Heartbeat duration (env var: AGENT_HEARTBEAT).") + flag.BoolVar(&cfg.quiteAgent, "quite-agent", false, "Disable agent logger (env var: AGENT_QUITE).") + flag.BoolVar(&cfg.runScale, "run-scale", false, "Run in scale-test mode (env var: AGENT_RUN_SCALE).") + flag.Uint64Var(&cfg.scaleCount, "scale-count", 1000, "The number of agents to start in scale mode (env var: AGENT_SCALE_COUNT).") flag.Parse() + loadEnv(&cfg) + + logger := log.Default() + if cfg.runScale { + logger = log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) + } + + if err := cfg.verifyArgs(); err != nil { + logger.Fatalf("Arg verification error: %v", err) + } + + if cfg.quiteAgent { + // Silence the otel errors agents can generate. + // i.e.: failed to upload metrics: ... + otel.SetErrorHandler(&nopErrorHandler{}) + } + + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) + defer cancel() + + var agents []*agent.Agent + if cfg.runScale { + var err error + agents, err = runScale(ctx, cfg) + if err != nil { + logger.Printf("Error starting agents: %v", err) + } + logger.Printf("%d agents started", len(agents)) + } else { + a, err := runAgent(cfg) + if err != nil { + logger.Fatalf("Agent encountered error when starting: %v", err) + } + agents = []*agent.Agent{a} + } - config := &config.AgentConfig{ - Endpoint: endpoint, + <-ctx.Done() + for _, a := range agents { + a.Shutdown() + } + logger.Println("All agents stopped") +} + +// runScale starts and returns the configured amount of agents for a scale test. +// If an error is encountered when starting an agent, it is return along with all started agents. +func runScale(ctx context.Context, cfg flagConfig) ([]*agent.Agent, error) { + nopLogger := &opampinternal.NopLogger{} + agentConfig := &config.AgentConfig{ + Endpoint: cfg.endpoint, + HeartbeatInterval: &cfg.heartbeat, + TLSSetting: configtls.ClientConfig{ + Insecure: cfg.tlsInsecure, + InsecureSkipVerify: cfg.tlsInsecureSkipVerify, + Config: configtls.Config{ + KeyFile: cfg.tlsKeyFile, + CertFile: cfg.tlsCertFile, + CAFile: cfg.tlsCAFile, + }, + }, + } + + // Use of slice instead of a concurrent goroutine to reduce memory usage. + agents := make([]*agent.Agent, 0, cfg.scaleCount) + var err error + for range cfg.scaleCount { + select { + case <-ctx.Done(): + return agents, err + default: + } + + id, err := uuid.NewV7() + if err != nil { + return nil, err + } + + opts := []agent.Option{ + agent.WithAgentType(cfg.agentType), + agent.WithAgentVersion(cfg.agentVersion), + agent.WithNoClientCertRequest(), + agent.WithInstanceID(id), + } + + if cfg.quiteAgent { + opts = append(opts, agent.WithLogger(nopLogger)) + } else { + opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) + } + a := agent.NewAgent(agentConfig, opts...) + if startErr := a.Start(); err != nil { + err = errors.Join(err, startErr) + continue + } + agents = append(agents, a) + } + return agents, err +} + +// runAgent starts and runs a single agent with the passed config. +func runAgent(cfg flagConfig) (*agent.Agent, error) { + agentConfig := &config.AgentConfig{ + Endpoint: cfg.endpoint, + HeartbeatInterval: &cfg.heartbeat, TLSSetting: configtls.ClientConfig{ - Insecure: tlsInsecure, - InsecureSkipVerify: tlsInsecureSkipVerify, + Insecure: cfg.tlsInsecure, + InsecureSkipVerify: cfg.tlsInsecureSkipVerify, Config: configtls.Config{ - KeyFile: tlsKeyFile, - CertFile: tlsCertFile, - CAFile: tlsCAFile, + KeyFile: cfg.tlsKeyFile, + CertFile: cfg.tlsCertFile, + CAFile: cfg.tlsCAFile, }, }, } - agent := agent.NewAgent(config, agent.WithAgentType(agentType), agent.WithAgentVersion(agentVersion)) - if err := agent.Start(); err != nil { - log.Fatalf("Agent encountered error when starting: %v", err) + opts := []agent.Option{ + agent.WithAgentType(cfg.agentType), + agent.WithAgentVersion(cfg.agentVersion), + } + if cfg.quiteAgent { + opts = append(opts, agent.WithLogger(&opampinternal.NopLogger{})) } - interrupt := make(chan os.Signal, 1) - signal.Notify(interrupt, os.Interrupt) - <-interrupt - agent.Shutdown() + agent := agent.NewAgent(agentConfig, opts...) + err := agent.Start() + return agent, err } diff --git a/internal/examples/docker-compose.yml b/internal/examples/docker-compose.yml index 1581fb55..eed7724a 100644 --- a/internal/examples/docker-compose.yml +++ b/internal/examples/docker-compose.yml @@ -15,7 +15,6 @@ services: build: context: ../.. dockerfile: internal/examples/Dockerfile.agent - container_name: opamp-agent entrypoint: - "./agent" - "--endpoint" @@ -26,21 +25,8 @@ services: depends_on: - opamp-server restart: unless-stopped - - opamp-scale: - build: - context: ../.. - dockerfile: internal/examples/Dockerfile.scale - container_name: opamp-scale - environment: - SCALE_AGENT_COUNT: "${SCALE_AGENT_COUNT:-1000}" - networks: - - opamp-network - profiles: - - scale - depends_on: - - opamp-server - restart: unless-stopped + deploy: + replicas: 1 networks: opamp-network: diff --git a/internal/examples/makefile b/internal/examples/makefile index 1d36c621..20815b1d 100644 --- a/internal/examples/makefile +++ b/internal/examples/makefile @@ -7,7 +7,7 @@ SRC_ROOT := $(shell git rev-parse --show-toplevel) # Build targets .PHONY: build-examples -build-examples: build-example-agent build-example-supervisor build-example-server build-example-scale +build-examples: build-example-agent build-example-supervisor build-example-server .PHONY: build-example-agent build-example-agent: @@ -21,10 +21,6 @@ build-example-supervisor: build-example-server: $(GOCMD) build -o server/bin/server server/main.go -.PHONY: build-example-scale -build-example-scale: - $(GOCMD) build -o scale/bin/scale scale/main.go - .PHONY: run-examples run-examples: build-examples server/bin/server & @@ -65,7 +61,7 @@ docker-compose-up: .PHONY: docker-compose-down docker-compose-down: - cd $(SRC_ROOT)/internal/examples && docker compose --profile scale down + cd $(SRC_ROOT)/internal/examples && docker compose down .PHONY: docker-compose-logs docker-compose-logs: @@ -73,9 +69,9 @@ docker-compose-logs: .PHONY: docker-compose-scale docker-compose-scale: - @echo "Usage: make docker-compose-scale [AGENTS=]" + @echo "Usage: make docker-compose-scale AGENTS=" @if [ -z "$(AGENTS)" ]; then \ - cd $(SRC_ROOT)/internal/examples && docker compose --profile scale up -d --build; \ - else \ - cd $(SRC_ROOT)/internal/examples && SCALE_AGENT_COUNT=$(AGENTS) docker compose --profile scale up -d --build; \ + echo "Error: AGENTS variable not set"; \ + exit 1; \ fi + cd $(SRC_ROOT)/internal/examples && docker compose up --scale opamp-agent=$(AGENTS) -d diff --git a/internal/examples/scale/README.md b/internal/examples/scale/README.md deleted file mode 100644 index a67730c4..00000000 --- a/internal/examples/scale/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# scale - -Scale provides agents to scale test an OpAMP server. - -Websocket and HTTP servers are supported, but all agents must use the same connection type. -Each agent uses it's own OpAMP agent client, and runs in a goroutine. - -The main driver logs to stdout, and all agents log to stderr. - -## Usage - -Configuration may be specified through command line flags, or environment variables. - -Configuration load priority is: `environment variables > flags > defaults`. - -``` -Usage of scale: - -agent-count uint - The number of agents to start (env var: SCALE_AGENT_COUNT). (default 1000) - -heartbeat duration - Heartbeat duration (env var: SCALE_HEARTBEAT). (default 30s) - -server-url string - OpAMP server URL (env var: SCALE_SERVER_URL). (default "wss://127.0.0.1:4320/v1/opamp") - -tls-ca_file string - Path to the OpAMP server CA cert (env var: SCALE_TLS_CA_FILE). - -tls-insecure - Disable the client transport security (env var: SCALE_TLS_INSECURE). - -tls-insecure_skip_verify - Will enable TLS but not verify the certificate (env var: SCALE_TLS_INSECURE_SKIP_VERIFY). - -verbose-agents - Enable agent logging (env var: SCALE_VERBOSE_AGENTS). -``` diff --git a/internal/examples/scale/main.go b/internal/examples/scale/main.go deleted file mode 100644 index 89d41bcd..00000000 --- a/internal/examples/scale/main.go +++ /dev/null @@ -1,210 +0,0 @@ -package main - -import ( - "context" - "errors" - "flag" - "fmt" - "log" - "net/url" - "os" - "os/signal" - "strconv" - "time" - - opampinternal "github.com/open-telemetry/opamp-go/internal" - "github.com/open-telemetry/opamp-go/internal/examples/agent/agent" - "github.com/open-telemetry/opamp-go/internal/examples/config" - - "github.com/google/uuid" - "go.opentelemetry.io/collector/config/configtls" - "go.opentelemetry.io/otel" -) - -// nopErrorHandler is used to turn any otel errors generated by scale agents into a nop. -type nopErrorHandler struct{} - -func (l *nopErrorHandler) Handle(err error) {} - -// scaleConfig are all the flags/env vars that can be used to configure the scale test driver. -type scaleConfig struct { - agentCount uint64 - serverURL string - heartbeat time.Duration - verboseAgents bool - tlsInsecure bool - tlsInsecureSkipVerify bool - tlsCAFile string -} - -// verifyArgs checks that all scaleConfig options are valid. -func (cfg scaleConfig) verifyArgs() error { - if cfg.agentCount == 0 { - return errors.New("agent count must not be zero") - } - parsedURL, err := url.Parse(cfg.serverURL) - if err != nil { - return fmt.Errorf("server-url failed to parse: %w", err) - } - switch parsedURL.Scheme { - case "http", "https": - case "ws", "wss": - default: - return fmt.Errorf("server-url has an unknown scheme: %s", parsedURL.Scheme) - } - - if cfg.heartbeat < 0 { - return fmt.Errorf("heartbeat must be non-negative, got: %s", cfg.heartbeat) - } - - if cfg.tlsCAFile != "" { - fi, err := os.Stat(cfg.tlsCAFile) - if err != nil { - return fmt.Errorf("tls-ca_file stat failed: %w", err) - } - if fi.IsDir() { - return fmt.Errorf("tls-ca_file: %s is a directory", cfg.tlsCAFile) - } - } - - return nil -} - -// loadEnv will attempt to load scaleConfig options from environment variables. -func loadEnv(cfg *scaleConfig) { - if s, ok := os.LookupEnv("SCALE_AGENT_COUNT"); ok { - count, err := strconv.ParseUint(s, 10, 64) - if err == nil { - cfg.agentCount = count - } - } - - if s, ok := os.LookupEnv("SCALE_SERVER_URL"); ok { - cfg.serverURL = s - } - - if s, ok := os.LookupEnv("SCALE_HEARTBEAT"); ok { - dur, err := time.ParseDuration(s) - if err == nil { - cfg.heartbeat = dur - } - } - - if s, ok := os.LookupEnv("SCALE_VERBOSE_AGENTS"); ok { - b, err := strconv.ParseBool(s) - if err == nil { - cfg.verboseAgents = b - } - } - - if s, ok := os.LookupEnv("SCALE_TLS_INSECURE"); ok { - b, err := strconv.ParseBool(s) - if err == nil { - cfg.tlsInsecure = b - } - } - - if s, ok := os.LookupEnv("SCALE_TLS_INSECURE_SKIP_VERIFY"); ok { - b, err := strconv.ParseBool(s) - if err == nil { - cfg.tlsInsecureSkipVerify = b - } - } - - if s, ok := os.LookupEnv("SCALE_TLS_CA_FILE"); ok { - cfg.tlsCAFile = s - } -} - -// runAgents starts and returns the configured amount of agents. -// If an error is encountered when starting an agent, it is return along with all started agents. -func runAgents(ctx context.Context, cfg scaleConfig) ([]*agent.Agent, error) { - nopLogger := &opampinternal.NopLogger{} - agentConfig := &config.AgentConfig{ - Endpoint: cfg.serverURL, - HeartbeatInterval: &cfg.heartbeat, - TLSSetting: configtls.ClientConfig{ - Insecure: cfg.tlsInsecure, - InsecureSkipVerify: cfg.tlsInsecureSkipVerify, - Config: configtls.Config{ - CAFile: cfg.tlsCAFile, - }, - }, - } - - // Use of slice instead of a concurrent goroutine to reduce memory usage. - agents := make([]*agent.Agent, 0, cfg.agentCount) - var err error - for range cfg.agentCount { - select { - case <-ctx.Done(): - return agents, err - default: - } - - id, err := uuid.NewV7() - if err != nil { - return nil, err - } - - opts := []agent.Option{ - agent.WithNoClientCertRequest(), - agent.WithInstanceID(id), - } - - if cfg.verboseAgents { - opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) - } else { - opts = append(opts, agent.WithLogger(nopLogger)) - } - a := agent.NewAgent(agentConfig, opts...) - if startErr := a.Start(); err != nil { - err = errors.Join(err, startErr) - continue - } - agents = append(agents, a) - } - return agents, err -} - -func main() { - var cfg scaleConfig - flag.Uint64Var(&cfg.agentCount, "agent-count", 1000, "The number of agents to start (env var: SCALE_AGENT_COUNT).") - flag.StringVar(&cfg.serverURL, "server-url", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server URL (env var: SCALE_SERVER_URL).") - flag.DurationVar(&cfg.heartbeat, "heartbeat", time.Second*30, "Heartbeat duration (env var: SCALE_HEARTBEAT).") - flag.BoolVar(&cfg.verboseAgents, "verbose-agents", false, "Enable agent logging (env var: SCALE_VERBOSE_AGENTS).") - flag.BoolVar(&cfg.tlsInsecure, "tls-insecure", false, "Disable the client transport security (env var: SCALE_TLS_INSECURE).") - flag.BoolVar(&cfg.tlsInsecureSkipVerify, "tls-insecure_skip_verify", false, "Will enable TLS but not verify the certificate (env var: SCALE_TLS_INSECURE_SKIP_VERIFY).") - flag.StringVar(&cfg.tlsCAFile, "tls-ca_file", "", "Path to the OpAMP server CA cert (env var: SCALE_TLS_CA_FILE).") - - flag.Parse() - loadEnv(&cfg) - - logger := log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) - if err := cfg.verifyArgs(); err != nil { - logger.Fatalf("Arg verification error: %v", err) - } - - ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) - defer cancel() - - // Silence the otel errors agents can generate. - // i.e.: failed to upload metrics: ... - if !cfg.verboseAgents { - otel.SetErrorHandler(&nopErrorHandler{}) - } - - logger.Printf("Starting %d agents", cfg.agentCount) - - agents, err := runAgents(ctx, cfg) - if err != nil { - logger.Printf("Error starting agents: %v", err) - } - logger.Printf("%d agents started", len(agents)) - - <-ctx.Done() // wait for SIGINT - for _, a := range agents { - a.Shutdown() - } - logger.Println("All agents stopped") -} diff --git a/makefile b/makefile index 9962a406..c614a334 100644 --- a/makefile +++ b/makefile @@ -50,10 +50,6 @@ build-example-supervisor: build-example-server: $(MAKE) -C internal/examples build-example-server -.PHONY: build-example-scale -build-example-scale: - $(MAKE) -C internal/examples build-example-scale - .PHONY: run-examples run-examples: $(MAKE) -C internal/examples run-examples From 0fa4c8672009014edb9ef7a2b0cca6339c6bc6d2 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 16 Jan 2026 12:48:28 -0800 Subject: [PATCH 14/22] Remove unused dockerfile --- internal/examples/Dockerfile.scale | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 internal/examples/Dockerfile.scale diff --git a/internal/examples/Dockerfile.scale b/internal/examples/Dockerfile.scale deleted file mode 100644 index 63099f4b..00000000 --- a/internal/examples/Dockerfile.scale +++ /dev/null @@ -1,21 +0,0 @@ -FROM golang:1.24-bookworm AS builder - -WORKDIR /src - -COPY ./ ./ - -WORKDIR /src/internal/examples - -ENV GOPROXY="https://proxy.golang.org,direct" - -RUN go mod download - -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -o scale/bin/scale scale/main.go - -FROM gcr.io/distroless/static-debian12:nonroot - -WORKDIR /app - -COPY --from=builder /src/internal/examples/scale/bin/scale ./ - -ENTRYPOINT ["./scale"] From 0fd21e7ce93b369494464020ff139b0270c27ced Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 16 Jan 2026 12:51:50 -0800 Subject: [PATCH 15/22] fix fmt --- internal/examples/agent/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index b0b91de9..26e7bdbb 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -167,6 +167,7 @@ func loadEnv(cfg *flagConfig) { } } } + func main() { var cfg flagConfig flag.StringVar(&cfg.agentType, "t", "io.opentelemetry.collector", "Agent Type String (env var: AGENT_TYPE).") From 3d7d253a6fb0345a3db6ca433a85e7fc61a55a5e Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Mon, 19 Jan 2026 10:34:41 -0800 Subject: [PATCH 16/22] Review feedback --- internal/examples/agent/README.md | 2 +- internal/examples/server/opampsrv/metrics.go | 84 +++++++++++++++++++ internal/examples/server/opampsrv/opampsrv.go | 62 +++----------- 3 files changed, 97 insertions(+), 51 deletions(-) create mode 100644 internal/examples/server/opampsrv/metrics.go diff --git a/internal/examples/agent/README.md b/internal/examples/agent/README.md index 4d1fa934..9fed2e41 100644 --- a/internal/examples/agent/README.md +++ b/internal/examples/agent/README.md @@ -2,7 +2,7 @@ Agent provides provides and example agent implementation for the OpAMP protocol. -Both HTTP and Websocket connections are supported. +Both HTTP and Websocket connections are supported by the agent, however the [example/server](../example/server) only supports Websocket.. The example agent can be in a normal mode; where the binary starts a single agent, or in scale mode (when `-run-scale` is passed or `AGENT_RUN_SCALE=true` is set). diff --git a/internal/examples/server/opampsrv/metrics.go b/internal/examples/server/opampsrv/metrics.go new file mode 100644 index 00000000..934d93ed --- /dev/null +++ b/internal/examples/server/opampsrv/metrics.go @@ -0,0 +1,84 @@ +package opampsrv + +import ( + "context" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + otelresource "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" +) + +var ( + opampReadErrAttr = attribute.String("error.type", "read") + opampRespErrAttr = attribute.String("error.type", "resp") +) + +// metricsTracker is a struct to encasulate all metrics the OpAMP server tracks. +type metricsTracker struct { + connCounter metric.Int64UpDownCounter + errCounter metric.Int64Counter + meter metric.Meter + + resource *otelresource.Resource + exporter sdkmetric.Exporter +} + +func NewMetricsTracker() (*metricsTracker, error) { + resource, err := otelresource.New(context.Background(), + otelresource.WithAttributes( + semconv.ServiceNameKey.String("io.opentelemetry.opampserver"), + semconv.ServiceVersionKey.String("0.1.0"), + ), + ) + if err != nil { + return nil, err + } + exporter, err := stdoutmetric.New(stdoutmetric.WithPrettyPrint()) + if err != nil { + return nil, err + } + + meterProvider := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(resource), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), + ) + otel.SetMeterProvider(meterProvider) + meter := otel.Meter("opamp") + connCounter, err := meter.Int64UpDownCounter("connections.active.count") + if err != nil { + return nil, err + } + + errCounter, err := meter.Int64Counter("connections.error.count") + if err != nil { + return nil, err + } + + return &metricsTracker{ + connCounter: connCounter, + errCounter: errCounter, + meter: meter, + resource: resource, + exporter: exporter, + }, nil +} + +func (m *metricsTracker) OnConnected(ctx context.Context) { + m.connCounter.Add(ctx, 1) +} + +func (m *metricsTracker) OnDisconnect(ctx context.Context) { + m.connCounter.Add(ctx, -1) +} + +func (m *metricsTracker) OnReadMessageError(ctx context.Context) { + m.errCounter.Add(ctx, 1, metric.WithAttributes(opampReadErrAttr)) +} + +func (m *metricsTracker) OnMessageResponseError(ctx context.Context) { + m.errCounter.Add(ctx, 1, metric.WithAttributes(opampRespErrAttr)) +} diff --git a/internal/examples/server/opampsrv/opampsrv.go b/internal/examples/server/opampsrv/opampsrv.go index f83b2d04..427ef968 100644 --- a/internal/examples/server/opampsrv/opampsrv.go +++ b/internal/examples/server/opampsrv/opampsrv.go @@ -8,13 +8,6 @@ import ( "github.com/oklog/ulid/v2" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" - "go.opentelemetry.io/otel/metric" - sdkmetric "go.opentelemetry.io/otel/sdk/metric" - otelresource "go.opentelemetry.io/otel/sdk/resource" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" "github.com/open-telemetry/opamp-go/internal/examples/certs" "github.com/open-telemetry/opamp-go/internal/examples/server/data" @@ -23,17 +16,11 @@ import ( "github.com/open-telemetry/opamp-go/server/types" ) -var ( - opampReadErrAttr = attribute.String("error.type", "read") - opampRespErrAttr = attribute.String("error.type", "resp") -) - type Server struct { - opampSrv server.OpAMPServer - agents *data.Agents - logger *Logger - counter metric.Int64UpDownCounter - errCounter metric.Int64Counter + opampSrv server.OpAMPServer + agents *data.Agents + logger *Logger + metrics *metricsTracker } func NewServer(agents *data.Agents) *Server { @@ -44,41 +31,16 @@ func NewServer(agents *data.Agents) *Server { log.Default().Flags()|log.Lmsgprefix|log.Lmicroseconds, ), } - resource, err := otelresource.New(context.Background(), - otelresource.WithAttributes( - semconv.ServiceNameKey.String("io.opentelemetry.opampserver"), - semconv.ServiceVersionKey.String("0.1.0"), - ), - ) - if err != nil { - panic(err) - } - exporter, err := stdoutmetric.New(stdoutmetric.WithPrettyPrint()) - if err != nil { - panic(err) - } - - meterProvider := sdkmetric.NewMeterProvider( - sdkmetric.WithResource(resource), - sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), - ) - otel.SetMeterProvider(meterProvider) - meter := otel.Meter("opamp") - counter, err := meter.Int64UpDownCounter("connections.active.count") - if err != nil { - panic(err) - } - errCounter, err := meter.Int64Counter("connections.error.count") + metrics, err := NewMetricsTracker() if err != nil { panic(err) } srv := &Server{ - agents: agents, - logger: logger, - counter: counter, - errCounter: errCounter, + agents: agents, + logger: logger, + metrics: metrics, } srv.opampSrv = server.New(logger) @@ -94,14 +56,14 @@ func (srv *Server) Start() { return types.ConnectionResponse{ Accept: true, ConnectionCallbacks: types.ConnectionCallbacks{ - OnConnected: func(ctx context.Context, _ types.Connection) { srv.counter.Add(ctx, 1) }, + OnConnected: func(ctx context.Context, _ types.Connection) { srv.metrics.OnConnected(ctx) }, OnMessage: srv.onMessage, OnConnectionClose: srv.onDisconnect, OnReadMessageError: func(_ types.Connection, _ int, _ []byte, _ error) { - srv.errCounter.Add(context.Background(), 1, metric.WithAttributes(opampReadErrAttr)) + srv.metrics.OnReadMessageError(context.Background()) }, OnMessageResponseError: func(_ types.Connection, _ *protobufs.ServerToAgent, _ error) { - srv.errCounter.Add(context.Background(), 1, metric.WithAttributes(opampRespErrAttr)) + srv.metrics.OnMessageResponseError(context.Background()) }, }, } @@ -132,7 +94,7 @@ func (srv *Server) Stop() { } func (srv *Server) onDisconnect(conn types.Connection) { - srv.counter.Add(context.Background(), -1) + srv.metrics.OnDisconnect(context.Background()) srv.agents.RemoveConnection(conn) } From 1797daef4e833e1c3346b38a094cfbc56e1c4f5e Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Mon, 19 Jan 2026 10:36:12 -0800 Subject: [PATCH 17/22] Fix README typos --- internal/examples/agent/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/examples/agent/README.md b/internal/examples/agent/README.md index 9fed2e41..ebf7bef9 100644 --- a/internal/examples/agent/README.md +++ b/internal/examples/agent/README.md @@ -2,7 +2,7 @@ Agent provides provides and example agent implementation for the OpAMP protocol. -Both HTTP and Websocket connections are supported by the agent, however the [example/server](../example/server) only supports Websocket.. +Both HTTP and Websocket connections are supported by the agent, however the [examples/server](../examples/server) only supports Websocket. The example agent can be in a normal mode; where the binary starts a single agent, or in scale mode (when `-run-scale` is passed or `AGENT_RUN_SCALE=true` is set). From 53be1ed2145dc61c89e92e61852ceb311dac3dbe Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 21 Jan 2026 09:17:40 -0800 Subject: [PATCH 18/22] Remove metrics from example server --- internal/examples/go.mod | 1 - internal/examples/go.sum | 2 - internal/examples/server/opampsrv/metrics.go | 84 ------------------- internal/examples/server/opampsrv/opampsrv.go | 19 +---- 4 files changed, 2 insertions(+), 104 deletions(-) delete mode 100644 internal/examples/server/opampsrv/metrics.go diff --git a/internal/examples/go.mod b/internal/examples/go.mod index f5d220e0..0d9e1ec1 100644 --- a/internal/examples/go.mod +++ b/internal/examples/go.mod @@ -15,7 +15,6 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 go.opentelemetry.io/otel v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 - go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 go.opentelemetry.io/otel/metric v1.39.0 go.opentelemetry.io/otel/sdk v1.39.0 go.opentelemetry.io/otel/sdk/metric v1.39.0 diff --git a/internal/examples/go.sum b/internal/examples/go.sum index d70a26d7..5c391bbb 100644 --- a/internal/examples/go.sum +++ b/internal/examples/go.sum @@ -164,8 +164,6 @@ go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 h1:mM8nKi6/iFQ0iqst80wDHU2ge198Ye/TfN0WBS5U24Y= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0/go.mod h1:0PrIIzDteLSmNyxqcGYRL4mDIo8OTuBAOI/Bn1URxac= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 h1:5gn2urDL/FBnK8OkCfD1j3/ER79rUuTYmCvlXBKeYL8= -go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0/go.mod h1:0fBG6ZJxhqByfFZDwSwpZGzJU671HkwpWaNe2t4VUPI= go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= diff --git a/internal/examples/server/opampsrv/metrics.go b/internal/examples/server/opampsrv/metrics.go deleted file mode 100644 index 934d93ed..00000000 --- a/internal/examples/server/opampsrv/metrics.go +++ /dev/null @@ -1,84 +0,0 @@ -package opampsrv - -import ( - "context" - - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/stdout/stdoutmetric" - "go.opentelemetry.io/otel/metric" - sdkmetric "go.opentelemetry.io/otel/sdk/metric" - otelresource "go.opentelemetry.io/otel/sdk/resource" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" -) - -var ( - opampReadErrAttr = attribute.String("error.type", "read") - opampRespErrAttr = attribute.String("error.type", "resp") -) - -// metricsTracker is a struct to encasulate all metrics the OpAMP server tracks. -type metricsTracker struct { - connCounter metric.Int64UpDownCounter - errCounter metric.Int64Counter - meter metric.Meter - - resource *otelresource.Resource - exporter sdkmetric.Exporter -} - -func NewMetricsTracker() (*metricsTracker, error) { - resource, err := otelresource.New(context.Background(), - otelresource.WithAttributes( - semconv.ServiceNameKey.String("io.opentelemetry.opampserver"), - semconv.ServiceVersionKey.String("0.1.0"), - ), - ) - if err != nil { - return nil, err - } - exporter, err := stdoutmetric.New(stdoutmetric.WithPrettyPrint()) - if err != nil { - return nil, err - } - - meterProvider := sdkmetric.NewMeterProvider( - sdkmetric.WithResource(resource), - sdkmetric.WithReader(sdkmetric.NewPeriodicReader(exporter)), - ) - otel.SetMeterProvider(meterProvider) - meter := otel.Meter("opamp") - connCounter, err := meter.Int64UpDownCounter("connections.active.count") - if err != nil { - return nil, err - } - - errCounter, err := meter.Int64Counter("connections.error.count") - if err != nil { - return nil, err - } - - return &metricsTracker{ - connCounter: connCounter, - errCounter: errCounter, - meter: meter, - resource: resource, - exporter: exporter, - }, nil -} - -func (m *metricsTracker) OnConnected(ctx context.Context) { - m.connCounter.Add(ctx, 1) -} - -func (m *metricsTracker) OnDisconnect(ctx context.Context) { - m.connCounter.Add(ctx, -1) -} - -func (m *metricsTracker) OnReadMessageError(ctx context.Context) { - m.errCounter.Add(ctx, 1, metric.WithAttributes(opampReadErrAttr)) -} - -func (m *metricsTracker) OnMessageResponseError(ctx context.Context) { - m.errCounter.Add(ctx, 1, metric.WithAttributes(opampRespErrAttr)) -} diff --git a/internal/examples/server/opampsrv/opampsrv.go b/internal/examples/server/opampsrv/opampsrv.go index 427ef968..b68eb433 100644 --- a/internal/examples/server/opampsrv/opampsrv.go +++ b/internal/examples/server/opampsrv/opampsrv.go @@ -20,7 +20,6 @@ type Server struct { opampSrv server.OpAMPServer agents *data.Agents logger *Logger - metrics *metricsTracker } func NewServer(agents *data.Agents) *Server { @@ -32,15 +31,9 @@ func NewServer(agents *data.Agents) *Server { ), } - metrics, err := NewMetricsTracker() - if err != nil { - panic(err) - } - srv := &Server{ - agents: agents, - logger: logger, - metrics: metrics, + agents: agents, + logger: logger, } srv.opampSrv = server.New(logger) @@ -56,15 +49,8 @@ func (srv *Server) Start() { return types.ConnectionResponse{ Accept: true, ConnectionCallbacks: types.ConnectionCallbacks{ - OnConnected: func(ctx context.Context, _ types.Connection) { srv.metrics.OnConnected(ctx) }, OnMessage: srv.onMessage, OnConnectionClose: srv.onDisconnect, - OnReadMessageError: func(_ types.Connection, _ int, _ []byte, _ error) { - srv.metrics.OnReadMessageError(context.Background()) - }, - OnMessageResponseError: func(_ types.Connection, _ *protobufs.ServerToAgent, _ error) { - srv.metrics.OnMessageResponseError(context.Background()) - }, }, } }, @@ -94,7 +80,6 @@ func (srv *Server) Stop() { } func (srv *Server) onDisconnect(conn types.Connection) { - srv.metrics.OnDisconnect(context.Background()) srv.agents.RemoveConnection(conn) } From 182cdd3ce3b11e2cf523a0af2ac10355bc26c9ba Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Wed, 21 Jan 2026 09:19:06 -0800 Subject: [PATCH 19/22] fix go.mod --- internal/examples/go.mod | 18 ++++++++---------- internal/examples/go.sum | 40 ++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/internal/examples/go.mod b/internal/examples/go.mod index 0d9e1ec1..6d6fe4ce 100644 --- a/internal/examples/go.mod +++ b/internal/examples/go.mod @@ -9,25 +9,24 @@ require ( github.com/oklog/ulid/v2 v2.1.0 github.com/open-telemetry/opamp-go v0.1.0 github.com/shirou/gopsutil v3.21.11+incompatible - github.com/stretchr/testify v1.11.1 + github.com/stretchr/testify v1.10.0 go.opentelemetry.io/collector/config/configopaque v1.38.0 go.opentelemetry.io/collector/config/configtls v1.38.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 - go.opentelemetry.io/otel v1.39.0 + go.opentelemetry.io/otel v1.24.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 - go.opentelemetry.io/otel/metric v1.39.0 - go.opentelemetry.io/otel/sdk v1.39.0 - go.opentelemetry.io/otel/sdk/metric v1.39.0 + go.opentelemetry.io/otel/metric v1.24.0 + go.opentelemetry.io/otel/sdk v1.24.0 + go.opentelemetry.io/otel/sdk/metric v1.24.0 google.golang.org/protobuf v1.36.11 ) require ( - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/foxboron/go-tpm-keyfiles v0.0.0-20250323135004-b31fac66206e // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -42,12 +41,11 @@ require ( github.com/tklauser/go-sysconf v0.3.9 // indirect github.com/tklauser/numcpus v0.3.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/otel/trace v1.39.0 // indirect + go.opentelemetry.io/otel/trace v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.1.0 // indirect golang.org/x/crypto v0.35.0 // indirect golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.39.0 // indirect + golang.org/x/sys v0.30.0 // indirect golang.org/x/text v0.22.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect diff --git a/internal/examples/go.sum b/internal/examples/go.sum index 5c391bbb..3f1d263e 100644 --- a/internal/examples/go.sum +++ b/internal/examples/go.sum @@ -15,8 +15,6 @@ github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAm github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -34,8 +32,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-ldap/ldap v3.0.2+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -130,8 +128,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/rhnvrm/simples3 v0.6.1/go.mod h1:Y+3vYm2V7Y4VijFoJHHTrja6OgPrJ2cBti8dPGkC3sA= -github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= -github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= @@ -144,34 +142,32 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tklauser/go-sysconf v0.3.9 h1:JeUVdAOWhhxVcU6Eqr/ATFHgXk/mmiItdKeJPev3vTo= github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs= github.com/tklauser/numcpus v0.3.0 h1:ILuRUQBtssgnxw0XXIjKUC56fgnOrFoQQ/4+DeU2biQ= github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= -go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/collector/config/configopaque v1.38.0 h1:qLefkP4XNCud1Dge6b6lOU1KptUfAHtVWNs9iGAYYqY= go.opentelemetry.io/collector/config/configopaque v1.38.0/go.mod h1:aAOmM/mSWE2F3A58x4MUw1bYW8TIjVxn5/WfgxRgMu0= go.opentelemetry.io/collector/config/configtls v1.38.0 h1:bn5/oCLpAI+0LVg9q7dySZXi2swNWn6qmvkoq7A8/84= go.opentelemetry.io/collector/config/configtls v1.38.0/go.mod h1:dkV33BhlveIfNTNUjBMYtRrVNVsRwnXpPLxkhLbZcPk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= -go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= -go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= +go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 h1:mM8nKi6/iFQ0iqst80wDHU2ge198Ye/TfN0WBS5U24Y= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0/go.mod h1:0PrIIzDteLSmNyxqcGYRL4mDIo8OTuBAOI/Bn1URxac= -go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= -go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= -go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= -go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= -go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= -go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= -go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= -go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= +go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= +go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw= +go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg= +go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8= +go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0= +go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= +go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -208,8 +204,8 @@ golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= From 16f7ad11797b13e4cad5f35e093e3d63325a4e49 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 6 Feb 2026 10:23:48 -0800 Subject: [PATCH 20/22] Review feedback --- internal/examples/agent/main.go | 117 ++++++++++---------------------- 1 file changed, 37 insertions(+), 80 deletions(-) diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index 26e7bdbb..924e4a05 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -38,10 +38,9 @@ type flagConfig struct { tlsCAFile string endpoint string heartbeat time.Duration - quiteAgent bool - - // scale test options - runScale bool + quietAgent bool + // scaleCount = 1 runs a normal agent + // scaleCount > 1 runs scale test agents (pre-assigned IDs, no initial cert request) scaleCount uint64 } @@ -91,15 +90,14 @@ func (cfg flagConfig) verifyArgs() error { return fmt.Errorf("heartbeat must be non-negative, got: %s", cfg.heartbeat) } - if cfg.runScale { - if cfg.scaleCount == 0 { - return errors.New("scale count must not be zero") - } + if cfg.scaleCount == 0 { + return errors.New("scale count must not be zero") } return nil } // loadEnv will attempt to load config options from environment variables. +// used to specifiy options when running the agent in a container. func loadEnv(cfg *flagConfig) { if s, ok := os.LookupEnv("AGENT_TYPE"); ok { cfg.agentType = s @@ -146,17 +144,10 @@ func loadEnv(cfg *flagConfig) { } } - if s, ok := os.LookupEnv("AGENT_QUITE"); ok { + if s, ok := os.LookupEnv("AGENT_QUIET"); ok { b, err := strconv.ParseBool(s) if err == nil { - cfg.quiteAgent = b - } - } - - if s, ok := os.LookupEnv("AGENT_RUN_SCALE"); ok { - b, err := strconv.ParseBool(s) - if err == nil { - cfg.runScale = b + cfg.quietAgent = b } } @@ -179,23 +170,22 @@ func main() { flag.StringVar(&cfg.tlsCAFile, "tls-ca_file", "", "Path to the CA cert. It verifies the server certificate (env var: AGENT_TLS_CA_FILE).") flag.StringVar(&cfg.endpoint, "endpoint", "wss://127.0.0.1:4320/v1/opamp", "OpAMP server endpoint URL (env var: AGENT_ENDPOINT).") flag.DurationVar(&cfg.heartbeat, "heartbeat", time.Second*30, "Heartbeat duration (env var: AGENT_HEARTBEAT).") - flag.BoolVar(&cfg.quiteAgent, "quite-agent", false, "Disable agent logger (env var: AGENT_QUITE).") - flag.BoolVar(&cfg.runScale, "run-scale", false, "Run in scale-test mode (env var: AGENT_RUN_SCALE).") - flag.Uint64Var(&cfg.scaleCount, "scale-count", 1000, "The number of agents to start in scale mode (env var: AGENT_SCALE_COUNT).") + flag.BoolVar(&cfg.quietAgent, "quite-agent", false, "Disable agent logger (env var: AGENT_QUIET).") + flag.Uint64Var(&cfg.scaleCount, "scale-count", 1, "The number of agents to start in scale mode (env var: AGENT_SCALE_COUNT).") flag.Parse() loadEnv(&cfg) logger := log.Default() - if cfg.runScale { - logger = log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) - } - if err := cfg.verifyArgs(); err != nil { logger.Fatalf("Arg verification error: %v", err) } - if cfg.quiteAgent { + if cfg.scaleCount > 1 { + logger = log.New(os.Stdout, "scale-test: ", log.Ldate|log.Lmicroseconds|log.Lmsgprefix) + } + + if cfg.quietAgent { // Silence the otel errors agents can generate. // i.e.: failed to upload metrics: ... otel.SetErrorHandler(&nopErrorHandler{}) @@ -204,21 +194,11 @@ func main() { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) defer cancel() - var agents []*agent.Agent - if cfg.runScale { - var err error - agents, err = runScale(ctx, cfg) - if err != nil { - logger.Printf("Error starting agents: %v", err) - } - logger.Printf("%d agents started", len(agents)) - } else { - a, err := runAgent(cfg) - if err != nil { - logger.Fatalf("Agent encountered error when starting: %v", err) - } - agents = []*agent.Agent{a} + agents, err := runScale(ctx, cfg) + if err != nil { + logger.Printf("Error starting agents: %v", err) } + logger.Printf("%d agents started", len(agents)) <-ctx.Done() for _, a := range agents { @@ -227,7 +207,7 @@ func main() { logger.Println("All agents stopped") } -// runScale starts and returns the configured amount of agents for a scale test. +// runScale starts and returns the configured amount of agents. // If an error is encountered when starting an agent, it is return along with all started agents. func runScale(ctx context.Context, cfg flagConfig) ([]*agent.Agent, error) { nopLogger := &opampinternal.NopLogger{} @@ -255,23 +235,29 @@ func runScale(ctx context.Context, cfg flagConfig) ([]*agent.Agent, error) { default: } - id, err := uuid.NewV7() - if err != nil { - return nil, err - } - opts := []agent.Option{ agent.WithAgentType(cfg.agentType), agent.WithAgentVersion(cfg.agentVersion), - agent.WithNoClientCertRequest(), - agent.WithInstanceID(id), } - - if cfg.quiteAgent { + if cfg.quietAgent { opts = append(opts, agent.WithLogger(nopLogger)) - } else { - opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) } + + // Only pass in id and logger assocaited with id when running more than one agent. + if cfg.scaleCount > 1 { + id, err := uuid.NewV7() + if err != nil { + return nil, err + } + opts = append(opts, + agent.WithNoClientCertRequest(), + agent.WithInstanceID(id), + ) + if !cfg.quiteAgent { + opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) + } + } + a := agent.NewAgent(agentConfig, opts...) if startErr := a.Start(); err != nil { err = errors.Join(err, startErr) @@ -281,32 +267,3 @@ func runScale(ctx context.Context, cfg flagConfig) ([]*agent.Agent, error) { } return agents, err } - -// runAgent starts and runs a single agent with the passed config. -func runAgent(cfg flagConfig) (*agent.Agent, error) { - agentConfig := &config.AgentConfig{ - Endpoint: cfg.endpoint, - HeartbeatInterval: &cfg.heartbeat, - TLSSetting: configtls.ClientConfig{ - Insecure: cfg.tlsInsecure, - InsecureSkipVerify: cfg.tlsInsecureSkipVerify, - Config: configtls.Config{ - KeyFile: cfg.tlsKeyFile, - CertFile: cfg.tlsCertFile, - CAFile: cfg.tlsCAFile, - }, - }, - } - - opts := []agent.Option{ - agent.WithAgentType(cfg.agentType), - agent.WithAgentVersion(cfg.agentVersion), - } - if cfg.quiteAgent { - opts = append(opts, agent.WithLogger(&opampinternal.NopLogger{})) - } - - agent := agent.NewAgent(agentConfig, opts...) - err := agent.Start() - return agent, err -} From b3a3dedb4ef3ed023b79ed5b8d6fd5e21c429f42 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 6 Feb 2026 11:11:16 -0800 Subject: [PATCH 21/22] Fix typo --- internal/examples/agent/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/examples/agent/main.go b/internal/examples/agent/main.go index 924e4a05..9a221a05 100644 --- a/internal/examples/agent/main.go +++ b/internal/examples/agent/main.go @@ -253,7 +253,7 @@ func runScale(ctx context.Context, cfg flagConfig) ([]*agent.Agent, error) { agent.WithNoClientCertRequest(), agent.WithInstanceID(id), ) - if !cfg.quiteAgent { + if !cfg.quietAgent { opts = append(opts, agent.WithLogger(agent.NewScaleLogger(id))) } } From ded57abe89708b68da875a0752607e2974c9f647 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Fri, 6 Feb 2026 11:33:52 -0800 Subject: [PATCH 22/22] Fix merge --- internal/examples/agent/agent/agent.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/examples/agent/agent/agent.go b/internal/examples/agent/agent/agent.go index cfa17b82..c830b36d 100644 --- a/internal/examples/agent/agent/agent.go +++ b/internal/examples/agent/agent/agent.go @@ -238,7 +238,7 @@ func (agent *Agent) connect(ops ...settingsOp) error { customCapability_Health, }, } - err = agent.opampClient.SetCustomCapabilities(customCapabilities) + err = agent.client.SetCustomCapabilities(customCapabilities) if err != nil { return err } @@ -635,7 +635,7 @@ func (agent *Agent) processCustomMessage(ctx context.Context, customMessage *pro func (agent *Agent) sendCustomMessage(ctx context.Context, message *protobufs.CustomMessage) error { for { - sendingChan, err := agent.opampClient.SendCustomMessage(message) + sendingChan, err := agent.client.SendCustomMessage(message) switch { case err == nil: