11package static
22
33import (
4- "context"
54 "errors"
6- "fmt"
7- "net"
85 "net/http"
96 "sync"
10- "time"
11-
12- "sigs.k8s.io/controller-runtime/pkg/manager"
13-
14- "github.com/nginx/nginx-gateway-fabric/internal/mode/static/config"
157)
168
179// newGraphBuiltHealthChecker creates a new graphBuiltHealthChecker.
@@ -21,94 +13,37 @@ func newGraphBuiltHealthChecker() *graphBuiltHealthChecker {
2113 }
2214}
2315
24- // graphBuiltHealthChecker is used to check if the NGF Pod is ready. The NGF Pod is ready if the initial graph has
25- // been built and if it is leader.
16+ // graphBuiltHealthChecker is used to check if the initial graph is built and the NGF Pod is ready.
2617type graphBuiltHealthChecker struct {
2718 // readyCh is a channel that is initialized in newGraphBuiltHealthChecker and represents if the NGF Pod is ready.
28- readyCh chan struct {}
29- lock sync.RWMutex
30- graphBuilt bool
31- leader bool
32- }
33-
34- // createHealthProbe creates a Server runnable to serve as our health and readiness checker.
35- func createHealthProbe (cfg config.Config , healthChecker * graphBuiltHealthChecker ) (manager.Server , error ) {
36- // we chose to create our own health probe server instead of using the controller-runtime one because
37- // of repetitive log which would flood our logs on non-ready non-leader NGF Pods. This health probe is
38- // similar to the controller-runtime's health probe.
39-
40- mux := http .NewServeMux ()
41-
42- // copy of controller-runtime sane defaults for new http.Server
43- s := & http.Server {
44- Handler : mux ,
45- MaxHeaderBytes : 1 << 20 ,
46- IdleTimeout : 90 * time .Second , // matches http.DefaultTransport keep-alive timeout
47- ReadHeaderTimeout : 32 * time .Second ,
48- }
49-
50- mux .HandleFunc (readinessEndpointName , healthChecker .readyHandler )
51-
52- ln , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , cfg .HealthConfig .Port ))
53- if err != nil {
54- return manager.Server {},
55- fmt .Errorf ("error listening on %s: %w" , fmt .Sprintf (":%d" , cfg .HealthConfig .Port ), err )
56- }
57-
58- return manager.Server {
59- Name : "health probe" ,
60- Server : s ,
61- Listener : ln ,
62- }, nil
63- }
64-
65- func (h * graphBuiltHealthChecker ) readyHandler (resp http.ResponseWriter , req * http.Request ) {
66- if err := h .readyCheck (req ); err != nil {
67- resp .WriteHeader (http .StatusServiceUnavailable )
68- } else {
69- resp .WriteHeader (http .StatusOK )
70- }
19+ readyCh chan struct {}
20+ lock sync.RWMutex
21+ ready bool
7122}
7223
7324// readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type.
74- // We are considered ready after the first graph is built and if the NGF Pod is leader .
25+ // We are considered ready after the first graph is built.
7526func (h * graphBuiltHealthChecker ) readyCheck (_ * http.Request ) error {
7627 h .lock .RLock ()
7728 defer h .lock .RUnlock ()
7829
79- if ! h .leader {
80- return errors .New ("this Pod is not currently leader" )
81- }
82-
83- if ! h .graphBuilt {
84- return errors .New ("control plane initial graph has not been built" )
30+ if ! h .ready {
31+ return errors .New ("control plane is not yet ready" )
8532 }
8633
8734 return nil
8835}
8936
90- // setGraphBuilt marks the health check as having the initial graph built .
91- func (h * graphBuiltHealthChecker ) setGraphBuilt () {
37+ // setAsReady marks the health check as ready .
38+ func (h * graphBuiltHealthChecker ) setAsReady () {
9239 h .lock .Lock ()
9340 defer h .lock .Unlock ()
9441
95- h .graphBuilt = true
42+ h .ready = true
43+ close (h .readyCh )
9644}
9745
9846// getReadyCh returns a read-only channel, which determines if the NGF Pod is ready.
9947func (h * graphBuiltHealthChecker ) getReadyCh () <- chan struct {} {
10048 return h .readyCh
10149}
102-
103- // setAsLeader marks the health check as leader.
104- func (h * graphBuiltHealthChecker ) setAsLeader (_ context.Context ) {
105- h .lock .Lock ()
106- defer h .lock .Unlock ()
107-
108- h .leader = true
109-
110- // setGraphBuilt should already have been called when processing the resources on startup because the leader
111- // election process takes longer than the initial call to HandleEventBatch. Thus, the NGF Pod should be marked as
112- // ready and have this channel be closed.
113- close (h .readyCh )
114- }
0 commit comments