diff --git a/cmd/spire-server/cli/healthcheck/healthcheck.go b/cmd/spire-server/cli/healthcheck/healthcheck.go index 44e3fd19a6..fec5d9a855 100644 --- a/cmd/spire-server/cli/healthcheck/healthcheck.go +++ b/cmd/spire-server/cli/healthcheck/healthcheck.go @@ -83,6 +83,8 @@ func (c *healthCheckCommand) run() error { } return errors.New("cannot create registration client") } + defer client.Release() + bundleClient := client.NewBundleClient() // Currently using the ability to fetch a bundle as the health check. This diff --git a/conf/agent/agent_full.conf b/conf/agent/agent_full.conf index 65d6b2acb0..d776f7ac9d 100644 --- a/conf/agent/agent_full.conf +++ b/conf/agent/agent_full.conf @@ -1,5 +1,5 @@ # This is the SPIRE Agent configuration file including all possible configuration -# options. +# options. # agent: Contains core configuration parameters. agent { @@ -24,16 +24,16 @@ agent { # server_address: DNS name or IP address of the SPIRE server. server_address = "127.0.0.1" - + # server_port: Port number of the SPIRE server. server_port = "8081" - + # socket_path: Location to bind the workload API socket. Default: /tmp/agent.sock. socket_path = "/tmp/agent.sock" - + # trust_bundle_path: Path to the SPIRE server CA bundle. trust_bundle_path = "./conf/agent/dummy_root_ca.crt" - + # trust_bundle_url: URL to download the initial SPIRE server trust bundle. # trust_bundle_url = "" @@ -56,9 +56,9 @@ agent { # Each nested object has the following format: # # PluginType "plugin_name" { -# +# # # plugin_cmd: Path to the plugin implementation binary (optional, not -# # needed for built-ins) +# # needed for built-ins) # plugin_cmd = # # # plugin_checksum: An optional sha256 of the plugin binary (optional, @@ -149,7 +149,7 @@ plugins { # cluster: Name of the cluster. It must correspond to a cluster # configured in the server plugin. # cluster = "" - + # token_path: Path to the service account token on disk. # Default: /run/secrets/kubernetes.io/serviceaccount/token. # token_path = "/run/secrets/kubernetes.io/serviceaccount/token" @@ -180,7 +180,7 @@ plugins { # certificate_path: The path to the certificate bundle on disk. The # file must contain one or more PEM blocks, starting with the identity # certificate followed by any intermediate certificates necessary for - # chain-of-trust validation. + # chain-of-trust validation. # certificate_path = "" # intermediates_path: Optional. The path to a chain of intermediate @@ -204,7 +204,7 @@ plugins { # docker_version = "" } } - + # WorkloadAttestor "k8s": A workload attestor which allows selectors based # on Kubernetes constructs such ns (namespace) and sa (service account). WorkloadAttestor "k8s" { @@ -244,7 +244,7 @@ plugins { # node_name_env = "MY_NODE_NAME" # node_name: The name of the node. Overrides the value obtained by - # the environment variable specified by node_name_env. + # the environment variable specified by node_name_env. # node_name = "" } } @@ -276,7 +276,7 @@ plugins { # port = 9988 # } -# DogStatsd = [ +# DogStatsd = [ # # List of DogStatsd addresses. # { address = "localhost:8125" }, # { address = "collector.example.org:1337" }, @@ -301,7 +301,7 @@ plugins { # } # health_checks: If health checking is desired use this section to configure -# and expose an additional server endpoint for such purpose. +# and expose an additional agent endpoint for such purpose. # health_checks { # # listener_enabled: Enables health checks endpoint. # listener_enabled = true @@ -312,9 +312,9 @@ plugins { # # bind_port: HTTP Port number of the health checks endpoint. Default: 80. # # bind_port = "80" -# # live_path: HTTP resource path for checking server liveness. Default: /live. +# # live_path: HTTP resource path for checking agent liveness. Default: /live. # # live_path = "/live" -# # ready_path: HTTP resource path for checking server readiness. Default: /ready. +# # ready_path: HTTP resource path for checking agent readiness. Default: /ready. # # ready_path = "/ready" # } diff --git a/doc/spire_agent.md b/doc/spire_agent.md index 5864609d92..04021160e1 100644 --- a/doc/spire_agent.md +++ b/doc/spire_agent.md @@ -109,7 +109,7 @@ The agent can expose additional endpoint that can be used for health checking. I health_checks { listener_enabled = true bind_address = "localhost" - bind_port = "80" + bind_port = "8080" live_path = "/live" ready_path = "/ready" } diff --git a/doc/spire_server.md b/doc/spire_server.md index 50a80f9f88..0f162b6cd5 100644 --- a/doc/spire_server.md +++ b/doc/spire_server.md @@ -185,7 +185,7 @@ The server can expose an additional endpoint that can be used for health checkin health_checks { listener_enabled = true bind_address = "localhost" - bind_port = "80" + bind_port = "8080" live_path = "/live" ready_path = "/ready" } diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 171953f8ed..d1610a95b0 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -2,6 +2,7 @@ package agent import ( "context" + "errors" "fmt" "net/http" _ "net/http/pprof" //nolint: gosec // import registers routes on DefaultServeMux @@ -9,9 +10,9 @@ import ( "path" "runtime" "sync" - "time" "github.com/spiffe/go-spiffe/v2/spiffeid" + api_workload "github.com/spiffe/spire/api/workload" admin_api "github.com/spiffe/spire/pkg/agent/api" node_attestor "github.com/spiffe/spire/pkg/agent/attestor/node" workload_attestor "github.com/spiffe/spire/pkg/agent/attestor/workload" @@ -30,6 +31,8 @@ import ( "github.com/spiffe/spire/proto/spire/api/server/bundle/v1" _ "golang.org/x/net/trace" // registers handlers on the DefaultServeMux "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) type Agent struct { @@ -97,7 +100,7 @@ func (a *Agent) Run(ctx context.Context) error { endpoints := a.newEndpoints(cat, metrics, manager) - if err := healthChecks.AddCheck("agent", a, time.Minute); err != nil { + if err := healthChecks.AddCheck("agent", a); err != nil { return fmt.Errorf("failed adding healthcheck: %v", err) } @@ -261,5 +264,23 @@ func (a *Agent) agentSVIDPath() string { // Status is used as a top-level health check for the Agent. func (a *Agent) Status() (interface{}, error) { - return nil, nil + client := api_workload.NewX509Client(&api_workload.X509ClientConfig{ + Addr: a.c.BindAddress, + FailOnError: true, + }) + defer client.Stop() + + errCh := make(chan error, 1) + go func() { + errCh <- client.Start() + }() + + err := <-errCh + if status.Code(err) == codes.Unavailable { + return nil, errors.New("workload api is unavailable") //nolint: golint // error is (ab)used for CLI output + } + + return health.Details{ + Message: "successfully created a workload api client to fetch x509 svid", + }, nil } diff --git a/pkg/common/health/config.go b/pkg/common/health/config.go index 1c2ce2db5c..af3eea05b0 100644 --- a/pkg/common/health/config.go +++ b/pkg/common/health/config.go @@ -18,6 +18,21 @@ type Config struct { UnusedKeys []string `hcl:",unusedKeys"` } +// getAddress returns an address suitable for use as http.Server.Addr. +func (c *Config) getAddress() string { + host := "localhost" + if c.BindAddress != "" { + host = c.BindAddress + } + + port := "80" + if c.BindPort != "" { + port = c.BindPort + } + + return fmt.Sprintf("%s:%s", host, port) +} + // getReadyPath returns the configured value or a default func (c *Config) getReadyPath() string { if c.ReadyPath == "" { @@ -36,17 +51,7 @@ func (c *Config) getLivePath() string { return c.LivePath } -// getAddress returns an address suitable for use as http.Server.Addr. -func (c *Config) getAddress() string { - host := "localhost" - if c.BindAddress != "" { - host = c.BindAddress - } - - port := "80" - if c.BindPort != "" { - port = c.BindPort - } - - return fmt.Sprintf("%s:%s", host, port) +// Details are additional data to be used when the system is ready +type Details struct { + Message string `json:"message,omitempty"` } diff --git a/pkg/common/health/health.go b/pkg/common/health/health.go index 8b8ecd663c..7201bb0c01 100644 --- a/pkg/common/health/health.go +++ b/pkg/common/health/health.go @@ -12,6 +12,8 @@ import ( "github.com/spiffe/spire/pkg/common/telemetry" ) +const readyCheckInterval = time.Minute + // health.Checker is responsible for running health checks and serving the healthcheck HTTP paths type Checker struct { config Config @@ -46,20 +48,21 @@ func NewChecker(config Config, log logrus.FieldLogger) *Checker { } } - hc.StatusListener = &statusListener{} - hc.Logger = &logadapter{FieldLogger: log.WithField(telemetry.SubsystemName, "health")} + l := log.WithField(telemetry.SubsystemName, "health") + hc.StatusListener = &statusListener{log: l} + hc.Logger = &logadapter{FieldLogger: l} return &Checker{config: config, server: server, hc: hc, log: log} } -func (c *Checker) AddCheck(name string, checker health.ICheckable, interval time.Duration) error { +func (c *Checker) AddCheck(name string, checker health.ICheckable) error { c.mutex.Lock() defer c.mutex.Unlock() return c.hc.AddCheck(&health.Config{ Name: name, Checker: checker, - Interval: interval, + Interval: readyCheckInterval, Fatal: true, }) } diff --git a/pkg/server/server.go b/pkg/server/server.go index 4d3062d294..ce54e5208d 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -2,6 +2,7 @@ package server import ( "context" + "errors" "fmt" "net/http" _ "net/http/pprof" //nolint: gosec // import registers routes on DefaultServeMux @@ -9,9 +10,9 @@ import ( "os" "runtime" "sync" - "time" "github.com/andres-erbsen/clock" + server_util "github.com/spiffe/spire/cmd/spire-server/util" "github.com/spiffe/spire/pkg/common/health" "github.com/spiffe/spire/pkg/common/hostservices/metricsservice" common_services "github.com/spiffe/spire/pkg/common/plugin/hostservices" @@ -29,6 +30,7 @@ import ( "github.com/spiffe/spire/pkg/server/plugin/hostservices" "github.com/spiffe/spire/pkg/server/registration" "github.com/spiffe/spire/pkg/server/svid" + "github.com/spiffe/spire/proto/spire/api/server/bundle/v1" "google.golang.org/grpc" ) @@ -158,7 +160,7 @@ func (s *Server) run(ctx context.Context) (err error) { registrationManager := s.newRegistrationManager(cat, metrics) - if err := healthChecks.AddCheck("server", s, time.Minute); err != nil { + if err := healthChecks.AddCheck("server", s); err != nil { return fmt.Errorf("failed adding healthcheck: %v", err) } @@ -384,5 +386,23 @@ func (s *Server) validateTrustDomain(ctx context.Context, ds datastore.DataStore // Status is used as a top-level health check for the Server. func (s *Server) Status() (interface{}, error) { - return nil, nil + client, err := server_util.NewServerClient(s.config.BindUDSAddress.Name) + if err != nil { + return nil, errors.New("cannot create registration client") + } + defer client.Release() + + bundleClient := client.NewBundleClient() + + // Currently using the ability to fetch a bundle as the health check. This + // **could** be problematic if the Upstream CA signing process is lengthy. + // As currently coded however, the API isn't served until after + // the server CA has been signed by upstream. + if _, err := bundleClient.GetBundle(context.Background(), &bundle.GetBundleRequest{}); err != nil { + return nil, errors.New("unable to fetch bundle") + } + + return health.Details{ + Message: "successfully fetched bundle", + }, nil } diff --git a/test/integration/suites/k8s-reconcile/conf/agent/spire-agent.yaml b/test/integration/suites/k8s-reconcile/conf/agent/spire-agent.yaml index eea78dcce1..a0992f27b9 100644 --- a/test/integration/suites/k8s-reconcile/conf/agent/spire-agent.yaml +++ b/test/integration/suites/k8s-reconcile/conf/agent/spire-agent.yaml @@ -78,6 +78,14 @@ data: } } + health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" + } + --- apiVersion: apps/v1 @@ -129,13 +137,15 @@ spec: - name: spire-token mountPath: /var/run/secrets/tokens livenessProbe: - exec: - command: ["/opt/spire/bin/spire-agent", "healthcheck", "-socketPath", "/run/spire/sockets/agent.sock"] + httpGet: + path: /live + port: 8080 initialDelaySeconds: 10 periodSeconds: 10 readinessProbe: - exec: - command: ["/opt/spire/bin/spire-agent", "healthcheck", "-socketPath", "/run/spire/sockets/agent.sock", "--shallow"] + httpGet: + path: /ready + port: 8080 initialDelaySeconds: 10 periodSeconds: 10 volumes: @@ -151,8 +161,8 @@ spec: type: DirectoryOrCreate - name: spire-token projected: - sources: - - serviceAccountToken: - path: spire-agent - expirationSeconds: 7200 - audience: spire-server + sources: + - serviceAccountToken: + path: spire-agent + expirationSeconds: 7200 + audience: spire-server diff --git a/test/integration/suites/k8s-reconcile/conf/server/spire-server.yaml b/test/integration/suites/k8s-reconcile/conf/server/spire-server.yaml index 0dce35b550..9adad1704d 100644 --- a/test/integration/suites/k8s-reconcile/conf/server/spire-server.yaml +++ b/test/integration/suites/k8s-reconcile/conf/server/spire-server.yaml @@ -157,6 +157,14 @@ data: } } + health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" + } + --- apiVersion: v1 @@ -171,6 +179,7 @@ data: cluster = "example-cluster" server_socket_path = "/run/spire/sockets/registration.sock" leader_election = true + metrics_addr = "0.0.0.0:18080" --- @@ -212,13 +221,15 @@ spec: mountPath: /run/spire/sockets readOnly: false livenessProbe: - exec: - command: ["/opt/spire/bin/spire-server", "healthcheck", "-registrationUDSPath", "/run/spire/sockets/registration.sock"] + httpGet: + path: /live + port: 8080 initialDelaySeconds: 5 periodSeconds: 5 readinessProbe: - exec: - command: ["/opt/spire/bin/spire-server", "healthcheck", "-registrationUDSPath", "/run/spire/sockets/registration.sock", "--shallow"] + httpGet: + path: /ready + port: 8080 initialDelaySeconds: 5 periodSeconds: 5 - name: k8s-workload-registrar @@ -279,5 +290,3 @@ spec: ports: - port: 443 targetPort: registrar-port - - diff --git a/test/integration/suites/k8s/conf/agent/spire-agent.yaml b/test/integration/suites/k8s/conf/agent/spire-agent.yaml index eea78dcce1..a0992f27b9 100644 --- a/test/integration/suites/k8s/conf/agent/spire-agent.yaml +++ b/test/integration/suites/k8s/conf/agent/spire-agent.yaml @@ -78,6 +78,14 @@ data: } } + health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" + } + --- apiVersion: apps/v1 @@ -129,13 +137,15 @@ spec: - name: spire-token mountPath: /var/run/secrets/tokens livenessProbe: - exec: - command: ["/opt/spire/bin/spire-agent", "healthcheck", "-socketPath", "/run/spire/sockets/agent.sock"] + httpGet: + path: /live + port: 8080 initialDelaySeconds: 10 periodSeconds: 10 readinessProbe: - exec: - command: ["/opt/spire/bin/spire-agent", "healthcheck", "-socketPath", "/run/spire/sockets/agent.sock", "--shallow"] + httpGet: + path: /ready + port: 8080 initialDelaySeconds: 10 periodSeconds: 10 volumes: @@ -151,8 +161,8 @@ spec: type: DirectoryOrCreate - name: spire-token projected: - sources: - - serviceAccountToken: - path: spire-agent - expirationSeconds: 7200 - audience: spire-server + sources: + - serviceAccountToken: + path: spire-agent + expirationSeconds: 7200 + audience: spire-server diff --git a/test/integration/suites/k8s/conf/server/spire-server.yaml b/test/integration/suites/k8s/conf/server/spire-server.yaml index 2bba5e93f4..cbc804cd66 100644 --- a/test/integration/suites/k8s/conf/server/spire-server.yaml +++ b/test/integration/suites/k8s/conf/server/spire-server.yaml @@ -147,6 +147,14 @@ data: } } + health_checks { + listener_enabled = true + bind_address = "0.0.0.0" + bind_port = "8080" + live_path = "/live" + ready_path = "/ready" + } + --- apiVersion: v1 @@ -156,9 +164,9 @@ metadata: namespace: spire data: k8s-workload-registrar.conf: | - cert_path = "/run/spire/k8s-workload-registrar/certs/server-cert.pem" - key_path = "/run/spire/k8s-workload-registrar/secret/server-key.pem" - cacert_path = "/run/spire/k8s-workload-registrar/certs/cacert.pem" + cert_path = "/run/spire/k8s-workload-registrar/certs/server-cert.pem" + key_path = "/run/spire/k8s-workload-registrar/secret/server-key.pem" + cacert_path = "/run/spire/k8s-workload-registrar/certs/cacert.pem" trust_domain = "example.org" cluster = "example-cluster" server_socket_path = "/run/spire/sockets/registration.sock" @@ -242,13 +250,15 @@ spec: mountPath: /run/spire/sockets readOnly: false livenessProbe: - exec: - command: ["/opt/spire/bin/spire-server", "healthcheck", "-registrationUDSPath", "/run/spire/sockets/registration.sock"] + httpGet: + path: /live + port: 8080 initialDelaySeconds: 5 periodSeconds: 5 readinessProbe: - exec: - command: ["/opt/spire/bin/spire-server", "healthcheck", "-registrationUDSPath", "/run/spire/sockets/registration.sock", "--shallow"] + httpGet: + path: /ready + port: 8080 initialDelaySeconds: 5 periodSeconds: 5 - name: k8s-workload-registrar @@ -321,5 +331,3 @@ spec: ports: - port: 443 targetPort: registrar-port - -