Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/release/5.19'
Browse files Browse the repository at this point in the history
  • Loading branch information
Nikki Attea committed Apr 9, 2020
2 parents 41a04e3 + 59a4cde commit 1787cd7
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 6 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ and sensu-agent start.
- Keepalives can now be published via the HTTP API.
- Token substitution templates can now express escape-quoted strings.
- [Web] Fixes issue where labels with links could lead to a crash.
- sensu-agent will not longer allow configuring keepalive timeouts less than
the keepalive interval.
- Eventd can no longer mistake keepalive events for checks with TTL.
- Keepalives now generate a new event UUID for each keepalive failure event.
- Agents now correctly reset keepalive switches on reconnect, fixing a bug
where old keepalive timeout settings would persist too long.
- The system's libc_type attribute is now populated on alpine containers.

## [5.19.0] - 2020-03-26

Expand Down
7 changes: 7 additions & 0 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package agent
import (
"context"
"encoding/base64"
"errors"
"fmt"
"io/ioutil"
"log"
Expand Down Expand Up @@ -76,6 +77,12 @@ func NewAgent(config *Config) (*Agent, error) {
// NewAgentContext is like NewAgent, but allows threading a context through
// the system.
func NewAgentContext(ctx context.Context, config *Config) (*Agent, error) {
if to := config.KeepaliveCriticalTimeout; to > 0 && to <= config.KeepaliveInterval {
return nil, errors.New("keepalive critical timeout must be greater than keepalive interval")
}
if to := config.KeepaliveWarningTimeout; to > 0 && to <= config.KeepaliveInterval {
return nil, errors.New("keepalive warning timeout must be greater than keepalive interval")
}
agent := &Agent{
backendSelector: &RandomBackendSelector{Backends: config.BackendURLs},
connected: false,
Expand Down
17 changes: 17 additions & 0 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,20 @@ func TestInvalidAgentName_GH2022(t *testing.T) {
err = ta.Run(context.Background())
require.Error(t, err)
}

func TestInvalidKeepaliveTimeout(t *testing.T) {
cfg, cleanup := FixtureConfig()
defer cleanup()
cfg.KeepaliveWarningTimeout = cfg.KeepaliveInterval - 1
if _, err := NewAgent(cfg); err == nil {
t.Error("expected non-nil error")
}
cfg.KeepaliveWarningTimeout = cfg.KeepaliveInterval + 1
if _, err := NewAgent(cfg); err != nil {
t.Fatal(err)
}
cfg.KeepaliveCriticalTimeout = cfg.KeepaliveInterval - 1
if _, err := NewAgent(cfg); err == nil {
t.Error("expected non-nil error")
}
}
34 changes: 34 additions & 0 deletions backend/agentd/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"github.com/sirupsen/logrus"
)

const deletedEventSentinel = -1

var (
sessionCounter = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down Expand Up @@ -122,10 +124,42 @@ func NewSession(ctx context.Context, cfg SessionConfig, conn transport.Transport
unmarshal: unmarshal,
marshal: marshal,
}
if err := s.bus.Publish(messaging.TopicKeepalive, makeEntitySwitchBurialEvent(cfg)); err != nil {
return nil, err
}
s.handler = newSessionHandler(s)
return s, nil
}

// When the session is created, it will send this event to keepalived, ensuring
// that any previously existing switch is buried. This is necessary to make
// sure that the switch is properly recreated if the timeouts have changed.
//
// Keepalived checks for deletedEventSentinel, so that other components can
// message to it that a particular entity's switch can be buried.
func makeEntitySwitchBurialEvent(cfg SessionConfig) *corev2.Event {
return &corev2.Event{
ObjectMeta: corev2.ObjectMeta{
Namespace: cfg.Namespace,
},
Entity: &corev2.Entity{
ObjectMeta: corev2.ObjectMeta{
Namespace: cfg.Namespace,
Name: cfg.AgentName,
},
Subscriptions: cfg.Subscriptions,
EntityClass: corev2.EntityAgentClass,
},
Check: &corev2.Check{
ObjectMeta: corev2.ObjectMeta{
Namespace: cfg.Namespace,
Name: corev2.KeepaliveCheckName,
},
},
Timestamp: deletedEventSentinel,
}
}

// Receiver returns the check channel for the session.
func (s *Session) Receiver() chan<- interface{} {
return s.checkChannel
Expand Down
18 changes: 18 additions & 0 deletions backend/agentd/session_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,21 @@ func TestSessionTerminateOnSendError(t *testing.T) {
t.Fatal("broken session never stopped")
}
}

func TestMakeEntitySwitchBurialEvent(t *testing.T) {
cfg := SessionConfig{
Namespace: "default",
AgentName: "entity",
Subscriptions: []string{"default"},
}
event := makeEntitySwitchBurialEvent(cfg)
if err := event.Validate(); err != nil {
t.Fatal(err)
}
if err := event.Entity.Validate(); err != nil {
t.Fatal(err)
}
if got, want := event.Timestamp, int64(deletedEventSentinel); got != want {
t.Errorf("bad timestamp: got %d, want %d", got, want)
}
}
7 changes: 7 additions & 0 deletions backend/eventd/eventd.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ func (e *Eventd) handleMessage(msg interface{}) error {
switches := e.livenessFactory("eventd", e.dead, e.alive, logger)
switchKey := eventKey(event)

if event.Check.Name == corev2.KeepaliveCheckName {
goto NOTTL
}

if event.Check.Ttl > 0 {
// Reset the switch
timeout := int64(event.Check.Ttl)
Expand All @@ -274,13 +278,16 @@ func (e *Eventd) handleMessage(msg interface{}) error {
}
} else if (prevEvent != nil && prevEvent.Check.Ttl > 0) || event.Check.Ttl == deletedEventSentinel {
// The check TTL has been disabled, there is no longer a need to track it
logger.Debug("check ttl disabled")
if err := switches.Bury(context.TODO(), switchKey); err != nil {
// It's better to publish the event even if this fails, so
// don't return the error here.
logger.WithError(err).Error("error burying switch")
}
}

NOTTL:

EventsProcessed.WithLabelValues(EventsProcessedLabelSuccess).Inc()

return e.bus.Publish(messaging.TopicEvent, event)
Expand Down
7 changes: 2 additions & 5 deletions backend/keepalived/keepalived.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,10 @@ func createKeepaliveEvent(rawEvent *corev2.Event) *corev2.Event {
Timestamp: time.Now().Unix(),
Entity: rawEvent.Entity,
Check: keepaliveCheck,
ID: rawEvent.ID,
}

if len(keepaliveEvent.ID) == 0 {
uid, _ := uuid.NewRandom()
keepaliveEvent.ID = uid[:]
}
uid, _ := uuid.NewRandom()
keepaliveEvent.ID = uid[:]

return keepaliveEvent
}
Expand Down
5 changes: 4 additions & 1 deletion system/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ func Info() (types.System, error) {

func getLibCType() (string, error) {
output, err := exec.Command("ldd", "--version").CombinedOutput()
if err != nil {
// The command above will return an exit code of 1 on alpine, but still output
// the relevant information. Therefore, as a workaround, we can inspect stderr
// and ignore the error if it contains pertinent data about the C library
if err != nil && !strings.Contains(strings.ToLower(string(output)), "libc") {
return "", err
}
text := strings.ToLower(string(output))
Expand Down

0 comments on commit 1787cd7

Please sign in to comment.