diff --git a/.github/actions/nightly-release/action.yaml b/.github/actions/nightly-release/action.yaml index 5309383703e..26cafcd870b 100644 --- a/.github/actions/nightly-release/action.yaml +++ b/.github/actions/nightly-release/action.yaml @@ -22,7 +22,7 @@ runs: run: docker login -u "${{ inputs.hub_username }}" -p "${{ inputs.hub_password }}" - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable diff --git a/.github/workflows/cov.yaml b/.github/workflows/cov.yaml index 70cb46eedc9..e90e3757958 100644 --- a/.github/workflows/cov.yaml +++ b/.github/workflows/cov.yaml @@ -23,7 +23,7 @@ jobs: path: src/github.com/nats-io/nats-server - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable cache-dependency-path: src/github.com/nats-io/nats-server/go.sum diff --git a/.github/workflows/long-tests.yaml b/.github/workflows/long-tests.yaml index b5ed18534c7..df8496850d1 100644 --- a/.github/workflows/long-tests.yaml +++ b/.github/workflows/long-tests.yaml @@ -25,7 +25,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable diff --git a/.github/workflows/mqtt-test.yaml b/.github/workflows/mqtt-test.yaml index b693cc62fb7..27a7465a9aa 100644 --- a/.github/workflows/mqtt-test.yaml +++ b/.github/workflows/mqtt-test.yaml @@ -17,7 +17,7 @@ jobs: path: src/github.com/nats-io/nats-server - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable cache-dependency-path: src/github.com/nats-io/nats-server/go.sum diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 9783e415b65..f026aac123d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -21,7 +21,7 @@ jobs: fetch-tags: true - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: "stable" diff --git a/.github/workflows/stale-issues.yaml b/.github/workflows/stale-issues.yaml index ce8aa687f03..4880a50430a 100644 --- a/.github/workflows/stale-issues.yaml +++ b/.github/workflows/stale-issues.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/stale@v9 + - uses: actions/stale@v10 with: stale-issue-label: stale stale-pr-label: stale diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 69bff6525e0..807391cf289 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -70,7 +70,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: "go.mod" @@ -92,7 +92,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -107,7 +107,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: "go.mod" @@ -129,7 +129,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version-file: "go.mod" @@ -146,7 +146,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -163,7 +163,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -180,7 +180,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -197,7 +197,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -214,7 +214,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -231,7 +231,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -248,7 +248,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -265,7 +265,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -282,7 +282,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -299,7 +299,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -316,7 +316,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -333,7 +333,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -350,7 +350,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -367,7 +367,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -384,7 +384,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable @@ -401,7 +401,7 @@ jobs: uses: actions/checkout@v4 - name: Install Go - uses: actions/setup-go@v5 + uses: actions/setup-go@v6 with: go-version: stable diff --git a/.goreleaser.yml b/.goreleaser.yml index 53daaddfa02..0914d075e71 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -21,7 +21,7 @@ builds: env: # This is the toolchain version we use for releases. To override, set the env var, e.g.: # GORELEASER_TOOLCHAIN="go1.22.8" TARGET='linux_amd64' goreleaser build --snapshot --clean --single-target - - GOTOOLCHAIN={{ envOrDefault "GORELEASER_TOOLCHAIN" "go1.24.6" }} + - GOTOOLCHAIN={{ envOrDefault "GORELEASER_TOOLCHAIN" "go1.24.7" }} - GO111MODULE=on - CGO_ENABLED=0 goos: diff --git a/go.mod b/go.mod index 4916ddfc3e3..1f394db5b95 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ module github.com/nats-io/nats-server/v2 -go 1.23.0 - -toolchain go1.23.12 +go 1.24.0 require ( github.com/antithesishq/antithesis-sdk-go v0.4.3-default-no-op @@ -10,11 +8,11 @@ require ( github.com/klauspost/compress v1.18.0 github.com/minio/highwayhash v1.0.3 github.com/nats-io/jwt/v2 v2.7.4 - github.com/nats-io/nats.go v1.44.0 + github.com/nats-io/nats.go v1.45.0 github.com/nats-io/nkeys v0.4.11 github.com/nats-io/nuid v1.0.1 go.uber.org/automaxprocs v1.6.0 golang.org/x/crypto v0.41.0 - golang.org/x/sys v0.35.0 - golang.org/x/time v0.12.0 + golang.org/x/sys v0.36.0 + golang.org/x/time v0.13.0 ) diff --git a/go.sum b/go.sum index f7585ac927b..97c3d8b8906 100644 --- a/go.sum +++ b/go.sum @@ -10,8 +10,8 @@ github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ= github.com/nats-io/jwt/v2 v2.7.4 h1:jXFuDDxs/GQjGDZGhNgH4tXzSUK6WQi2rsj4xmsNOtI= github.com/nats-io/jwt/v2 v2.7.4/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA= -github.com/nats-io/nats.go v1.44.0 h1:ECKVrDLdh/kDPV1g0gAQ+2+m2KprqZK5O/eJAyAnH2M= -github.com/nats-io/nats.go v1.44.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= +github.com/nats-io/nats.go v1.45.0 h1:/wGPbnYXDM0pLKFjZTX+2JOw9TQPoIgTFrUaH97giwA= +github.com/nats-io/nats.go v1.45.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g= github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0= github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= @@ -27,9 +27,9 @@ go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwE golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= -golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= +golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/server/accounts.go b/server/accounts.go index 7baabbb6646..55bd0774746 100644 --- a/server/accounts.go +++ b/server/accounts.go @@ -378,6 +378,21 @@ func (a *Account) getClients() []*client { return clients } +// Returns a slice of external (non-internal) clients stored in the account, or nil if none is present. +// Lock is held on entry. +func (a *Account) getExternalClientsLocked() []*client { + if len(a.clients) == 0 { + return nil + } + var clients []*client + for c := range a.clients { + if !isInternalClient(c.kind) { + clients = append(clients, c) + } + } + return clients +} + // Called to track a remote server and connections and leafnodes it // has for this account. func (a *Account) updateRemoteServer(m *AccountNumConns) []*client { @@ -398,7 +413,8 @@ func (a *Account) updateRemoteServer(m *AccountNumConns) []*client { // conservative and bit harsh here. Clients will reconnect if we over compensate. var clients []*client if mtce { - clients = a.getClientsLocked() + clients = a.getExternalClientsLocked() + // Sort in reverse chronological. slices.SortFunc(clients, func(i, j *client) int { return -i.start.Compare(j.start) }) over := (len(a.clients) - int(a.sysclients) + int(a.nrclients)) - int(a.mconns) diff --git a/server/accounts_test.go b/server/accounts_test.go index 3b564fe2668..c64f6c773d8 100644 --- a/server/accounts_test.go +++ b/server/accounts_test.go @@ -3563,9 +3563,14 @@ func TestAccountMaxConnectionsDisconnectsNewestFirst(t *testing.T) { disconnects := make([]chan error, 0) for i := 1; i <= 3; i++ { disconnectCh := make(chan error) - c, err := nats.Connect(s.ClientURL(), nats.UserInfo(fmt.Sprintf("user%d", i), "foo"), nats.DisconnectErrHandler(func(_ *nats.Conn, err error) { - disconnectCh <- err - })) + c, err := nats.Connect( + s.ClientURL(), + nats.UserInfo(fmt.Sprintf("user%d", i), "foo"), + nats.DisconnectErrHandler(func(_ *nats.Conn, err error) { + disconnectCh <- err + }), + nats.NoReconnect(), + ) require_NoError(t, err) defer c.Close() conns = append(conns, c) diff --git a/server/auth_callout.go b/server/auth_callout.go index cc9e8db8117..038d53bb3f3 100644 --- a/server/auth_callout.go +++ b/server/auth_callout.go @@ -403,7 +403,7 @@ func (s *Server) processClientOrLeafCallout(c *client, opts *Options) (authorize return false, errStr } req := []byte(b) - var hdr map[string]string + var hdr []byte // Check if we have been asked to encrypt. if xkp != nil { @@ -413,7 +413,7 @@ func (s *Server) processClientOrLeafCallout(c *client, opts *Options) (authorize s.Warnf(errStr) return false, errStr } - hdr = map[string]string{AuthRequestXKeyHeader: xkey} + hdr = genHeader(hdr, AuthRequestXKeyHeader, xkey) } // Send out our request. diff --git a/server/client.go b/server/client.go index c12834bc0a8..bad079d345f 100644 --- a/server/client.go +++ b/server/client.go @@ -152,6 +152,7 @@ const ( compressionNegotiated // Marks if this connection has negotiated compression level with remote. didTLSFirst // Marks if this connection requested and was accepted doing the TLS handshake first (prior to INFO). isSlowConsumer // Marks connection as a slow consumer. + firstPong // Marks if this is the first PONG received ) // set the flag (would be equivalent to set the boolean to true) @@ -2563,6 +2564,14 @@ func (c *client) processPong() { c.rtt = computeRTT(c.rttStart) srv := c.srv reorderGWs := c.kind == GATEWAY && c.gw.outbound + firstPong := c.flags.setIfNotSet(firstPong) + var ri *routeInfo + // When receiving the first PONG, for a route with pooling, we may be + // instructed to start a new route. + if firstPong && c.kind == ROUTER && c.route != nil { + ri = c.route.startNewRoute + c.route.startNewRoute = nil + } // If compression is currently active for a route/leaf connection, if the // compression configuration is s2_auto, check if we should change // the compression level. @@ -2581,6 +2590,11 @@ func (c *client) processPong() { if reorderGWs { srv.gateway.orderOutboundConnections() } + if ri != nil { + srv.startGoRoutine(func() { + srv.connectToRoute(ri.url, ri.rtype, true, ri.gossipMode, _EMPTY_) + }) + } } // Select the s2 compression level based on the client's current RTT and the configured @@ -3084,6 +3098,13 @@ func (c *client) addShadowSub(sub *subscription, ime *ime, enact bool) (*subscri // Update our route map here. But only if we are not a leaf node or a hub leafnode. if c.kind != LEAF || c.isHubLeafNode() { c.srv.updateRemoteSubscription(im.acc, &nsub, 1) + } else if c.kind == LEAF { + // Update all leafnodes that connect to this server. Note that we could have + // used the updateLeafNodes() function since when it does invoke updateSmap() + // this function already takes care of not sending to a spoke leafnode since + // the `nsub` here is already from a spoke leafnode, but to be explicit, we + // use this version that updates only leafnodes that connect to this server. + im.acc.updateLeafNodesEx(&nsub, 1, true) } return &nsub, nil @@ -3192,14 +3213,12 @@ func (c *client) unsubscribe(acc *Account, sub *subscription, force, remove bool // Check to see if we have shadow subscriptions. var updateRoute bool - var updateGWs bool + var isSpokeLeaf bool shadowSubs := sub.shadow sub.shadow = nil if len(shadowSubs) > 0 { - updateRoute = (c.kind == CLIENT || c.kind == SYSTEM || c.kind == LEAF) && c.srv != nil - if updateRoute { - updateGWs = c.srv.gateway.enabled - } + isSpokeLeaf = c.isSpokeLeafNode() + updateRoute = !isSpokeLeaf && (c.kind == CLIENT || c.kind == SYSTEM || c.kind == LEAF) && c.srv != nil } sub.close() c.mu.Unlock() @@ -3208,16 +3227,12 @@ func (c *client) unsubscribe(acc *Account, sub *subscription, force, remove bool for _, nsub := range shadowSubs { if err := nsub.im.acc.sl.Remove(nsub); err != nil { c.Debugf("Could not remove shadow import subscription for account %q", nsub.im.acc.Name) - } else { - if updateRoute { - c.srv.updateRouteSubscriptionMap(nsub.im.acc, nsub, -1) - } - if updateGWs { - c.srv.gatewayUpdateSubInterest(nsub.im.acc.Name, nsub, -1) - } } - // Now check on leafnode updates. - nsub.im.acc.updateLeafNodes(nsub, -1) + if updateRoute { + c.srv.updateRemoteSubscription(nsub.im.acc, nsub, -1) + } else if isSpokeLeaf { + nsub.im.acc.updateLeafNodesEx(nsub, -1, true) + } } // Now check to see if this was part of a respMap entry for service imports. diff --git a/server/consumer.go b/server/consumer.go index 7e7977a5b94..e945bb517f5 100644 --- a/server/consumer.go +++ b/server/consumer.go @@ -20,6 +20,8 @@ import ( "errors" "fmt" "math/rand" + "os" + "path/filepath" "reflect" "regexp" "slices" @@ -70,6 +72,13 @@ type ConsumerInfo struct { PriorityGroups []PriorityGroupState `json:"priority_groups,omitempty"` } +// consumerInfoClusterResponse is a response used in a cluster to communicate the consumer info +// back to the meta leader as part of a consumer list request. +type consumerInfoClusterResponse struct { + ConsumerInfo + OfflineReason string `json:"offline_reason,omitempty"` // Reporting when a consumer is offline. +} + type PriorityGroupState struct { Group string `json:"group"` PinnedClientID string `json:"pinned_client_id,omitempty"` @@ -452,6 +461,7 @@ type consumer struct { dthresh time.Duration mch chan struct{} // Message channel qch chan struct{} // Quit channel + mqch chan struct{} // The monitor's quit channel. inch chan bool // Interest change channel sfreq int32 ackEventT string @@ -497,6 +507,10 @@ type consumer struct { /// pinnedTtl is the remaining time before the current PinId expires. pinnedTtl *time.Timer pinnedTS time.Time + + // If standalone/single-server, the offline reason needs to be stored directly in the consumer. + // Otherwise, if clustered it will be part of the consumer assignment. + offlineReason string } // A single subject filter. @@ -1021,10 +1035,11 @@ func (mset *stream) addConsumerWithAssignment(config *ConsumerConfig, oname stri outq: mset.outq, active: true, qch: make(chan struct{}), + mqch: make(chan struct{}), uch: make(chan struct{}, 1), mch: make(chan struct{}, 1), sfreq: int32(sampleFreq), - maxdc: uint64(config.MaxDeliver), + maxdc: uint64(max(config.MaxDeliver, 0)), // MaxDeliver is negative (-1) when infinite. maxp: config.MaxAckPending, retention: cfg.Retention, created: time.Now().UTC(), @@ -1285,6 +1300,26 @@ func (o *consumer) setConsumerAssignment(ca *consumerAssignment) { } } +func (o *consumer) monitorQuitC() <-chan struct{} { + if o == nil { + return nil + } + o.mu.RLock() + defer o.mu.RUnlock() + return o.mqch +} + +// signalMonitorQuit signals to exit the monitor loop. If there's no Raft node, +// this will be the only way to stop the monitor goroutine. +func (o *consumer) signalMonitorQuit() { + o.mu.Lock() + defer o.mu.Unlock() + if o.mqch != nil { + close(o.mqch) + o.mqch = nil + } +} + func (o *consumer) updateC() <-chan struct{} { o.mu.RLock() defer o.mu.RUnlock() @@ -2239,7 +2274,8 @@ func (o *consumer) updateConfig(cfg *ConsumerConfig) error { } // Set MaxDeliver if changed if cfg.MaxDeliver != o.cfg.MaxDeliver { - o.maxdc = uint64(cfg.MaxDeliver) + // MaxDeliver is negative (-1) when infinite. + o.maxdc = uint64(max(cfg.MaxDeliver, 0)) } // Set InactiveThreshold if changed. if val := cfg.InactiveThreshold; val != o.cfg.InactiveThreshold { @@ -4836,7 +4872,7 @@ func (o *consumer) setMaxPendingBytes(limit int) { // This does some quick sanity checks to see if we should re-calculate num pending. // Lock should be held. func (o *consumer) checkNumPending() uint64 { - if o.mset != nil { + if o.mset != nil && o.mset.store != nil { var state StreamState o.mset.store.FastState(&state) npc := o.numPending() @@ -5758,6 +5794,13 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error { } o.closed = true + // Signal to the monitor loop. + // Can't use only qch here, since that's used when stepping down as a leader. + if o.mqch != nil { + close(o.mqch) + o.mqch = nil + } + // Check if we are the leader and are being deleted (as a node). if dflag && o.isLeader() { // If we are clustered and node leader (probable from above), stepdown. @@ -5880,6 +5923,14 @@ func (o *consumer) stopWithFlags(dflag, sdflag, doSignal, advisory bool) error { } else { err = store.Stop() } + } else if dflag { + // If there's no store (for example, when it's offline), manually delete the directories. + o.mu.RLock() + stream, consumer := o.stream, o.name + o.mu.RUnlock() + accDir := filepath.Join(js.config.StoreDir, a.GetName()) + consumersDir := filepath.Join(accDir, streamsDir, stream, consumerDir) + os.RemoveAll(filepath.Join(consumersDir, consumer)) } return err diff --git a/server/errors.json b/server/errors.json index 3a80cc4d642..a9eaf3be685 100644 --- a/server/errors.json +++ b/server/errors.json @@ -1668,5 +1668,25 @@ "help": "", "url": "", "deprecates": "" + }, + { + "constant": "JSStreamOfflineReasonErrF", + "code": 500, + "error_code": 10194, + "description": "stream is offline: {err}", + "comment": "", + "help": "", + "url": "", + "deprecates": "" + }, + { + "constant": "JSConsumerOfflineReasonErrF", + "code": 500, + "error_code": 10195, + "description": "consumer is offline: {err}", + "comment": "", + "help": "", + "url": "", + "deprecates": "" } ] diff --git a/server/events.go b/server/events.go index 53400b9f5c6..c22cbe3a57e 100644 --- a/server/events.go +++ b/server/events.go @@ -419,7 +419,7 @@ type pubMsg struct { sub string rply string si *ServerInfo - hdr map[string]string + hdr []byte msg any oct compressionType echo bool @@ -428,7 +428,7 @@ type pubMsg struct { var pubMsgPool sync.Pool -func newPubMsg(c *client, sub, rply string, si *ServerInfo, hdr map[string]string, +func newPubMsg(c *client, sub, rply string, si *ServerInfo, hdr []byte, msg any, oct compressionType, echo, last bool) *pubMsg { var m *pubMsg @@ -601,17 +601,28 @@ RESET: // Add in NL b = append(b, _CRLF_...) + // Optional raw header addition. + if pm.hdr != nil { + b = append(pm.hdr, b...) + nhdr := len(pm.hdr) + nsize := len(b) - LEN_CR_LF + // MQTT producers don't have CRLF, so add it back. + if c.isMqtt() { + nsize += LEN_CR_LF + } + // Update pubArgs + // If others will use this later we need to save and restore original. + c.pa.hdr = nhdr + c.pa.size = nsize + c.pa.hdb = []byte(strconv.Itoa(nhdr)) + c.pa.szb = []byte(strconv.Itoa(nsize)) + } + // Check if we should set content-encoding if contentHeader != _EMPTY_ { b = c.setHeader(contentEncodingHeader, contentHeader, b) } - // Optional header processing. - if pm.hdr != nil { - for k, v := range pm.hdr { - b = c.setHeader(k, v, b) - } - } // Tracing if trace { c.traceInOp(fmt.Sprintf("PUB %s %s %d", c.pa.subject, c.pa.reply, c.pa.size), nil) @@ -688,7 +699,7 @@ func (s *Server) sendInternalAccountMsg(a *Account, subject string, msg any) err } // Used to send an internal message with an optional reply to an arbitrary account. -func (s *Server) sendInternalAccountMsgWithReply(a *Account, subject, reply string, hdr map[string]string, msg any, echo bool) error { +func (s *Server) sendInternalAccountMsgWithReply(a *Account, subject, reply string, hdr []byte, msg any, echo bool) error { s.mu.RLock() if s.sys == nil || s.sys.sendq == nil { s.mu.RUnlock() diff --git a/server/jetstream.go b/server/jetstream.go index c119da63337..d676f64022e 100644 --- a/server/jetstream.go +++ b/server/jetstream.go @@ -14,6 +14,7 @@ package server import ( + "bytes" "crypto/hmac" "crypto/sha256" "encoding/binary" @@ -1333,8 +1334,54 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro } var cfg FileStreamInfo - if err := json.Unmarshal(buf, &cfg); err != nil { - s.Warnf(" Error unmarshalling stream metafile %q: %v", metafile, err) + decoder := json.NewDecoder(bytes.NewReader(buf)) + decoder.DisallowUnknownFields() + strictErr := decoder.Decode(&cfg) + if strictErr != nil { + cfg = FileStreamInfo{} + if err := json.Unmarshal(buf, &cfg); err != nil { + s.Warnf(" Error unmarshalling stream metafile %q: %v", metafile, err) + continue + } + } + if supported := supportsRequiredApiLevel(cfg.Metadata); !supported || strictErr != nil { + var offlineReason string + if !supported { + apiLevel := getRequiredApiLevel(cfg.Metadata) + offlineReason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", apiLevel, JSApiLevel) + s.Warnf(" Detected unsupported stream '%s > %s', delete the stream or upgrade the server to API level %s", a.Name, cfg.StreamConfig.Name, apiLevel) + } else { + offlineReason = fmt.Sprintf("decoding error: %v", strictErr) + s.Warnf(" Error unmarshalling stream metafile %q: %v", metafile, strictErr) + } + singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode() + if singleServerMode { + // Fake a stream, so we can respond to API requests as single-server. + mset := &stream{ + acc: a, + jsa: jsa, + cfg: cfg.StreamConfig, + js: js, + srv: s, + stype: cfg.Storage, + consumers: make(map[string]*consumer), + active: false, + created: time.Now().UTC(), + offlineReason: offlineReason, + } + if !cfg.Created.IsZero() { + mset.created = cfg.Created + } + mset.closed.Store(true) + + jsa.mu.Lock() + jsa.streams[cfg.Name] = mset + jsa.mu.Unlock() + + // Now do the consumers. + odir := filepath.Join(sdir, fi.Name(), consumerDir) + consumers = append(consumers, &ce{mset, odir}) + } continue } @@ -1455,13 +1502,66 @@ func (a *Account) EnableJetStream(limits map[string]JetStreamAccountLimits) erro } var cfg FileConsumerInfo - if err := json.Unmarshal(buf, &cfg); err != nil { - s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, err) + decoder := json.NewDecoder(bytes.NewReader(buf)) + decoder.DisallowUnknownFields() + strictErr := decoder.Decode(&cfg) + if strictErr != nil { + cfg = FileConsumerInfo{} + if err := json.Unmarshal(buf, &cfg); err != nil { + s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, err) + continue + } + } + if supported := supportsRequiredApiLevel(cfg.Metadata); !supported || strictErr != nil { + var offlineReason string + if !supported { + apiLevel := getRequiredApiLevel(cfg.Metadata) + offlineReason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", apiLevel, JSApiLevel) + s.Warnf(" Detected unsupported consumer '%s > %s > %s', delete the consumer or upgrade the server to API level %s", a.Name, e.mset.name(), cfg.Name, apiLevel) + } else { + offlineReason = fmt.Sprintf("decoding error: %v", strictErr) + s.Warnf(" Error unmarshalling consumer metafile %q: %v", metafile, strictErr) + } + singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode() + if singleServerMode { + if !e.mset.closed.Load() { + s.Warnf(" Stopping unsupported stream '%s > %s'", a.Name, e.mset.name()) + e.mset.mu.Lock() + e.mset.offlineReason = "stopped" + e.mset.mu.Unlock() + e.mset.stop(false, false) + } + + // Fake a consumer, so we can respond to API requests as single-server. + o := &consumer{ + mset: e.mset, + js: s.getJetStream(), + acc: a, + srv: s, + cfg: cfg.ConsumerConfig, + active: false, + stream: e.mset.name(), + name: cfg.Name, + dseq: 1, + sseq: 1, + created: time.Now().UTC(), + closed: true, + offlineReason: offlineReason, + } + if !cfg.Created.IsZero() { + o.created = cfg.Created + } + + e.mset.mu.Lock() + e.mset.setConsumer(o) + e.mset.mu.Unlock() + } continue } + isEphemeral := !isDurableConsumer(&cfg.ConsumerConfig) if isEphemeral { - // This is an ephermal consumer and this could fail on restart until + // This is an ephemeral consumer and this could fail on restart until // the consumer can reconnect. We will create it as a durable and switch it. cfg.ConsumerConfig.Durable = ofi.Name() } diff --git a/server/jetstream_api.go b/server/jetstream_api.go index 555678a8f0d..66145181d37 100644 --- a/server/jetstream_api.go +++ b/server/jetstream_api.go @@ -485,8 +485,9 @@ type JSApiStreamListRequest struct { type JSApiStreamListResponse struct { ApiResponse ApiPaged - Streams []*StreamInfo `json:"streams"` - Missing []string `json:"missing,omitempty"` + Streams []*StreamInfo `json:"streams"` + Missing []string `json:"missing,omitempty"` + Offline map[string]string `json:"offline,omitempty"` } const JSApiStreamListResponseType = "io.nats.jetstream.api.v1.stream_list_response" @@ -747,8 +748,9 @@ const JSApiConsumerNamesResponseType = "io.nats.jetstream.api.v1.consumer_names_ type JSApiConsumerListResponse struct { ApiResponse ApiPaged - Consumers []*ConsumerInfo `json:"consumers"` - Missing []string `json:"missing,omitempty"` + Consumers []*ConsumerInfo `json:"consumers"` + Missing []string `json:"missing,omitempty"` + Offline map[string]string `json:"offline,omitempty"` } const JSApiConsumerListResponseType = "io.nats.jetstream.api.v1.consumer_list_response" @@ -1042,9 +1044,11 @@ type delayedAPIResponse struct { subject string reply string request string + hdr []byte response string rg *raftGroup deadline time.Time + noJs bool next *delayedAPIResponse } @@ -1147,7 +1151,12 @@ func (s *Server) delayedAPIResponder() { next() case <-tm.C: if r != nil { - s.sendAPIErrResponse(r.ci, r.acc, r.subject, r.reply, r.request, r.response) + // If it's not a JS API error, send it as a raw response without additional API/audit tracking. + if r.noJs { + s.sendInternalAccountMsgWithReply(r.acc, r.subject, _EMPTY_, r.hdr, r.response, false) + } else { + s.sendAPIErrResponse(r.ci, r.acc, r.subject, r.reply, r.request, r.response) + } pop() } next() @@ -1157,7 +1166,13 @@ func (s *Server) delayedAPIResponder() { func (s *Server) sendDelayedAPIErrResponse(ci *ClientInfo, acc *Account, subject, reply, request, response string, rg *raftGroup, duration time.Duration) { s.delayedAPIResponses.push(&delayedAPIResponse{ - ci, acc, subject, reply, request, response, rg, time.Now().Add(duration), nil, + ci, acc, subject, reply, request, nil, response, rg, time.Now().Add(duration), false, nil, + }) +} + +func (s *Server) sendDelayedErrResponse(acc *Account, subject string, hdr []byte, response string, duration time.Duration) { + s.delayedAPIResponses.push(&delayedAPIResponse{ + nil, acc, subject, _EMPTY_, _EMPTY_, hdr, response, nil, time.Now().Add(duration), true, nil, }) } @@ -1727,6 +1742,11 @@ func (s *Server) jsStreamUpdateRequest(sub *subscription, c *client, _ *Account, s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if mset.offlineReason != _EMPTY_ { + resp.Error = NewJSStreamOfflineReasonError(errors.New(mset.offlineReason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + return + } // Update asset version metadata. setStaticStreamMetadata(&cfg) @@ -1958,7 +1978,17 @@ func (s *Server) jsStreamListRequest(sub *subscription, c *client, _ *Account, s offset = scnt } + var missingNames []string for _, mset := range msets[offset:] { + if mset.offlineReason != _EMPTY_ { + if resp.Offline == nil { + resp.Offline = make(map[string]string, 1) + } + resp.Offline[mset.getCfgName()] = mset.offlineReason + missingNames = append(missingNames, mset.getCfgName()) + continue + } + config := mset.config() resp.Streams = append(resp.Streams, &StreamInfo{ Created: mset.createdTime(), @@ -1976,6 +2006,7 @@ func (s *Server) jsStreamListRequest(sub *subscription, c *client, _ *Account, s resp.Total = scnt resp.Limit = JSApiListLimit resp.Offset = offset + resp.Missing = missingNames s.sendAPIResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(resp)) } @@ -2015,6 +2046,13 @@ func (s *Server) jsStreamInfoRequest(sub *subscription, c *client, a *Account, s if sa != nil { clusterWideConsCount = len(sa.consumers) offline = s.allPeersOffline(sa.Group) + if sa.unsupported != nil && sa.Group != nil && cc.meta != nil && sa.Group.isMember(cc.meta.ID()) { + // If we're a member for this stream, and it's not supported, report it as offline. + resp.Error = NewJSStreamOfflineReasonError(errors.New(sa.unsupported.reason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + js.mu.RUnlock() + return + } } js.mu.RUnlock() @@ -2120,6 +2158,12 @@ func (s *Server) jsStreamInfoRequest(sub *subscription, c *client, a *Account, s } } + if mset.offlineReason != _EMPTY_ { + resp.Error = NewJSStreamOfflineReasonError(errors.New(mset.offlineReason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + return + } + config := mset.config() resp.StreamInfo = &StreamInfo{ Created: mset.createdTime(), @@ -3447,6 +3491,10 @@ func (s *Server) jsMsgGetRequest(sub *subscription, c *client, _ *Account, subje s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if mset.offlineReason != _EMPTY_ { + // Just let the request time out. + return + } var svp StoreMsg var sm *StoreMsg @@ -3533,6 +3581,11 @@ func (s *Server) jsConsumerUnpinRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if sa.unsupported != nil { + js.mu.RUnlock() + // Just let the request time out. + return + } ca, ok := sa.consumers[consumer] if !ok || ca == nil { @@ -3541,6 +3594,11 @@ func (s *Server) jsConsumerUnpinRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if ca.unsupported != nil { + js.mu.RUnlock() + // Just let the request time out. + return + } js.mu.RUnlock() // Then check if we are the leader. @@ -3572,12 +3630,20 @@ func (s *Server) jsConsumerUnpinRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if mset.offlineReason != _EMPTY_ { + // Just let the request time out. + return + } o := mset.lookupConsumer(consumer) if o == nil { resp.Error = NewJSConsumerNotFoundError() s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if o.offlineReason != _EMPTY_ { + // Just let the request time out. + return + } var foundPriority bool for _, group := range o.config().PriorityGroups { @@ -4437,11 +4503,23 @@ func (s *Server) jsConsumerCreateRequest(sub *subscription, c *client, a *Accoun s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if stream.offlineReason != _EMPTY_ { + resp.Error = NewJSStreamOfflineReasonError(errors.New(stream.offlineReason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + return + } if o := stream.lookupConsumer(consumerName); o != nil { + if o.offlineReason != _EMPTY_ { + resp.Error = NewJSConsumerOfflineReasonError(errors.New(o.offlineReason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + return + } // If the consumer already exists then don't allow updating the PauseUntil, just set // it back to whatever the current configured value is. + o.mu.RLock() req.Config.PauseUntil = o.cfg.PauseUntil + o.mu.RUnlock() } // Initialize/update asset version metadata. @@ -4462,9 +4540,11 @@ func (s *Server) jsConsumerCreateRequest(sub *subscription, c *client, a *Accoun resp.ConsumerInfo = setDynamicConsumerInfoMetadata(o.initialInfo()) s.sendAPIResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(resp)) + o.mu.RLock() if o.cfg.PauseUntil != nil && !o.cfg.PauseUntil.IsZero() && time.Now().Before(*o.cfg.PauseUntil) { o.sendPauseAdvisoryLocked(&o.cfg) } + o.mu.RUnlock() } // Request for the list of all consumer names. @@ -4668,7 +4748,16 @@ func (s *Server) jsConsumerListRequest(sub *subscription, c *client, _ *Account, offset = ocnt } + var missingNames []string for _, o := range obs[offset:] { + if o.offlineReason != _EMPTY_ { + if resp.Offline == nil { + resp.Offline = make(map[string]string, 1) + } + resp.Offline[o.name] = o.offlineReason + missingNames = append(missingNames, o.name) + continue + } if cinfo := o.info(); cinfo != nil { resp.Consumers = append(resp.Consumers, cinfo) } @@ -4679,6 +4768,7 @@ func (s *Server) jsConsumerListRequest(sub *subscription, c *client, _ *Account, resp.Total = ocnt resp.Limit = JSApiListLimit resp.Offset = offset + resp.Missing = missingNames s.sendAPIResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(resp)) } @@ -4730,6 +4820,13 @@ func (s *Server) jsConsumerInfoRequest(sub *subscription, c *client, _ *Account, offline = s.allPeersOffline(rg) isMember = rg.isMember(ourID) } + if ca.unsupported != nil && isMember { + // If we're a member for this consumer, and it's not supported, report it as offline. + resp.Error = NewJSConsumerOfflineReasonError(errors.New(ca.unsupported.reason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + js.mu.RUnlock() + return + } } // Capture consumer leader here. isConsumerLeader := cc.isConsumerLeader(acc.Name, streamName, consumerName) @@ -4856,6 +4953,12 @@ func (s *Server) jsConsumerInfoRequest(sub *subscription, c *client, _ *Account, return } + if obs.offlineReason != _EMPTY_ { + resp.Error = NewJSConsumerOfflineReasonError(errors.New(obs.offlineReason)) + s.sendDelayedAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp), nil, errRespDelay) + return + } + if resp.ConsumerInfo = setDynamicConsumerInfoMetadata(obs.info()); resp.ConsumerInfo == nil { // This consumer returned nil which means it's closed. Respond with not found. resp.Error = NewJSConsumerNotFoundError() @@ -4997,6 +5100,11 @@ func (s *Server) jsConsumerPauseRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if sa.unsupported != nil { + js.mu.RUnlock() + // Just let the request time out. + return + } ca, ok := sa.consumers[consumer] if !ok || ca == nil { @@ -5005,6 +5113,11 @@ func (s *Server) jsConsumerPauseRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if ca.unsupported != nil { + js.mu.RUnlock() + // Just let the request time out. + return + } nca := *ca ncfg := *ca.Config @@ -5038,6 +5151,10 @@ func (s *Server) jsConsumerPauseRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if mset.offlineReason != _EMPTY_ { + // Just let the request time out. + return + } obs := mset.lookupConsumer(consumer) if obs == nil { @@ -5045,6 +5162,10 @@ func (s *Server) jsConsumerPauseRequest(sub *subscription, c *client, _ *Account s.sendAPIErrResponse(ci, acc, subject, reply, string(msg), s.jsonResponse(&resp)) return } + if obs.offlineReason != _EMPTY_ { + // Just let the request time out. + return + } ncfg := obs.cfg pauseUTC := req.PauseUntil.UTC() diff --git a/server/jetstream_cluster.go b/server/jetstream_cluster.go index be927b38cf2..7d1c2063966 100644 --- a/server/jetstream_cluster.go +++ b/server/jetstream_cluster.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "math" "math/rand" "os" @@ -75,6 +76,7 @@ type jetStreamCluster struct { type inflightInfo struct { rg *raftGroup sync string + cfg *StreamConfig } // Used to guide placement of streams and meta controllers in clustered JetStream. @@ -145,6 +147,66 @@ type streamAssignment struct { reassigning bool // i.e. due to placement issues, lack of resources, etc. resetting bool // i.e. there was an error, and we're stopping and starting the stream err error + unsupported *unsupportedStreamAssignment +} + +type unsupportedStreamAssignment struct { + json []byte // The raw JSON content of the assignment, if it's unsupported due to the required API level. + reason string + info StreamInfo + sysc *client + infoSub *subscription +} + +func newUnsupportedStreamAssignment(s *Server, sa *streamAssignment, json []byte) *unsupportedStreamAssignment { + reason := "stopped" + if sa.Config != nil && !supportsRequiredApiLevel(sa.Config.Metadata) { + if req := getRequiredApiLevel(sa.Config.Metadata); req != _EMPTY_ { + reason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", req, JSApiLevel) + } + } + return &unsupportedStreamAssignment{ + json: json, + reason: reason, + info: StreamInfo{ + Created: sa.Created, + Config: *setDynamicStreamMetadata(sa.Config), + Domain: s.getOpts().JetStreamDomain, + TimeStamp: time.Now().UTC(), + }, + } +} + +func (usa *unsupportedStreamAssignment) setupInfoSub(s *Server, sa *streamAssignment) { + if usa.infoSub != nil { + return + } + + // Bind to the system account. + ic := s.createInternalJetStreamClient() + ic.registerWithAccount(s.SystemAccount()) + usa.sysc = ic + + // Note below the way we subscribe here is so that we can send requests to ourselves. + isubj := fmt.Sprintf(clusterStreamInfoT, sa.Client.serviceAccount(), sa.Config.Name) + usa.infoSub, _ = s.systemSubscribe(isubj, _EMPTY_, false, ic, usa.handleClusterStreamInfoRequest) +} + +func (usa *unsupportedStreamAssignment) handleClusterStreamInfoRequest(_ *subscription, c *client, _ *Account, _, reply string, _ []byte) { + s, acc := c.srv, c.acc + info := streamInfoClusterResponse{OfflineReason: usa.reason, StreamInfo: usa.info} + s.sendDelayedErrResponse(acc, reply, nil, s.jsonResponse(&info), errRespDelay) +} + +func (usa *unsupportedStreamAssignment) closeInfoSub(s *Server) { + if usa.infoSub != nil { + s.sysUnsubscribe(usa.infoSub) + usa.infoSub = nil + } + if usa.sysc != nil { + usa.sysc.closeConnection(ClientClosed) + usa.sysc = nil + } } // consumerAssignment is what the meta controller uses to assign consumers to streams. @@ -159,11 +221,104 @@ type consumerAssignment struct { Reply string `json:"reply,omitempty"` State *ConsumerState `json:"state,omitempty"` // Internal - responded bool - recovering bool - pending bool - deleted bool - err error + responded bool + recovering bool + pending bool + deleted bool + err error + unsupported *unsupportedConsumerAssignment +} + +type unsupportedConsumerAssignment struct { + json []byte // The raw JSON content of the assignment, if it's unsupported due to the required API level. + reason string + info ConsumerInfo + sysc *client + infoSub *subscription +} + +func newUnsupportedConsumerAssignment(ca *consumerAssignment, json []byte) *unsupportedConsumerAssignment { + reason := "stopped" + if ca.Config != nil && !supportsRequiredApiLevel(ca.Config.Metadata) { + if req := getRequiredApiLevel(ca.Config.Metadata); req != _EMPTY_ { + reason = fmt.Sprintf("unsupported - required API level: %s, current API level: %d", getRequiredApiLevel(ca.Config.Metadata), JSApiLevel) + } + } + return &unsupportedConsumerAssignment{ + json: json, + reason: reason, + info: ConsumerInfo{ + Stream: ca.Stream, + Name: ca.Name, + Created: ca.Created, + Config: setDynamicConsumerMetadata(ca.Config), + TimeStamp: time.Now().UTC(), + }, + } +} + +func (uca *unsupportedConsumerAssignment) setupInfoSub(s *Server, ca *consumerAssignment) { + if uca.infoSub != nil { + return + } + + // Bind to the system account. + ic := s.createInternalJetStreamClient() + ic.registerWithAccount(s.SystemAccount()) + uca.sysc = ic + + // Note below the way we subscribe here is so that we can send requests to ourselves. + isubj := fmt.Sprintf(clusterConsumerInfoT, ca.Client.serviceAccount(), ca.Stream, ca.Name) + uca.infoSub, _ = s.systemSubscribe(isubj, _EMPTY_, false, ic, uca.handleClusterConsumerInfoRequest) +} + +func (uca *unsupportedConsumerAssignment) handleClusterConsumerInfoRequest(_ *subscription, c *client, _ *Account, _, reply string, _ []byte) { + s, acc := c.srv, c.acc + info := consumerInfoClusterResponse{OfflineReason: uca.reason, ConsumerInfo: uca.info} + s.sendDelayedErrResponse(acc, reply, nil, s.jsonResponse(&info), errRespDelay) +} + +func (uca *unsupportedConsumerAssignment) closeInfoSub(s *Server) { + if uca.infoSub != nil { + s.sysUnsubscribe(uca.infoSub) + uca.infoSub = nil + } + if uca.sysc != nil { + uca.sysc.closeConnection(ClientClosed) + uca.sysc = nil + } +} + +type writeableConsumerAssignment struct { + consumerAssignment + // Internal + unsupportedJson []byte // The raw JSON content of the assignment, if it's unsupported due to the required API level. +} + +func (wca *writeableConsumerAssignment) MarshalJSON() ([]byte, error) { + if wca.unsupportedJson != nil { + return wca.unsupportedJson, nil + } + return json.Marshal(wca.consumerAssignment) +} + +func (wca *writeableConsumerAssignment) UnmarshalJSON(data []byte) error { + var unsupported bool + var ca consumerAssignment + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&ca); err != nil { + unsupported = true + ca = consumerAssignment{} + if err = json.Unmarshal(data, &ca); err != nil { + return err + } + } + wca.consumerAssignment = ca + if unsupported || (wca.Config != nil && !supportsRequiredApiLevel(wca.Config.Metadata)) { + wca.unsupportedJson = data + } + return nil } // streamPurge is what the stream leader will replicate when purging a stream. @@ -448,6 +603,10 @@ func (cc *jetStreamCluster) isStreamCurrent(account, stream string) bool { // For R1 it will make sure the stream is present on this server. func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) error { js.mu.RLock() + if sa != nil && sa.unsupported != nil { + js.mu.RUnlock() + return nil + } s, cc := js.srv, js.cluster if cc == nil { // Non-clustered mode @@ -501,10 +660,14 @@ func (js *jetStream) isStreamHealthy(acc *Account, sa *streamAssignment) error { // isConsumerHealthy will determine if the consumer is up to date. // For R1 it will make sure the consunmer is present on this server. func (js *jetStream) isConsumerHealthy(mset *stream, consumer string, ca *consumerAssignment) error { + js.mu.RLock() + if ca != nil && ca.unsupported != nil { + js.mu.RUnlock() + return nil + } if mset == nil { return errors.New("stream missing") } - js.mu.RLock() s, cc := js.srv, js.cluster if cc == nil { // Non-clustered mode @@ -1386,12 +1549,44 @@ func (js *jetStream) checkClusterSize() { // Represents our stable meta state that we can write out. type writeableStreamAssignment struct { + backingStreamAssignment + // Internal + unsupportedJson []byte // The raw JSON content of the assignment, if it's unsupported due to the required API level. +} + +type backingStreamAssignment struct { Client *ClientInfo `json:"client,omitempty"` Created time.Time `json:"created"` Config *StreamConfig `json:"stream"` Group *raftGroup `json:"group"` Sync string `json:"sync"` - Consumers []*consumerAssignment + Consumers []*writeableConsumerAssignment +} + +func (wsa *writeableStreamAssignment) MarshalJSON() ([]byte, error) { + if wsa.unsupportedJson != nil { + return wsa.unsupportedJson, nil + } + return json.Marshal(wsa.backingStreamAssignment) +} + +func (wsa *writeableStreamAssignment) UnmarshalJSON(data []byte) error { + var unsupported bool + var bsa backingStreamAssignment + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&bsa); err != nil { + unsupported = true + bsa = backingStreamAssignment{} + if err = json.Unmarshal(data, &bsa); err != nil { + return err + } + } + wsa.backingStreamAssignment = bsa + if unsupported || (wsa.Config != nil && !supportsRequiredApiLevel(wsa.Config.Metadata)) { + wsa.unsupportedJson = data + } + return nil } func (js *jetStream) clusterStreamConfig(accName, streamName string) (StreamConfig, bool) { @@ -1416,13 +1611,19 @@ func (js *jetStream) metaSnapshot() ([]byte, error) { streams := make([]writeableStreamAssignment, 0, nsa) for _, asa := range cc.streams { for _, sa := range asa { + if sa.unsupported != nil && sa.unsupported.json != nil { + streams = append(streams, writeableStreamAssignment{unsupportedJson: sa.unsupported.json}) + continue + } wsa := writeableStreamAssignment{ - Client: sa.Client.forAssignmentSnap(), - Created: sa.Created, - Config: sa.Config, - Group: sa.Group, - Sync: sa.Sync, - Consumers: make([]*consumerAssignment, 0, len(sa.consumers)), + backingStreamAssignment: backingStreamAssignment{ + Client: sa.Client.forAssignmentSnap(), + Created: sa.Created, + Config: sa.Config, + Group: sa.Group, + Sync: sa.Sync, + Consumers: make([]*writeableConsumerAssignment, 0, len(sa.consumers)), + }, } for _, ca := range sa.consumers { // Skip if the consumer is pending, we can't include it in our snapshot. @@ -1430,11 +1631,16 @@ func (js *jetStream) metaSnapshot() ([]byte, error) { if ca.pending { continue } + if ca.unsupported != nil && ca.unsupported.json != nil { + wsa.Consumers = append(wsa.Consumers, &writeableConsumerAssignment{unsupportedJson: ca.unsupported.json}) + nca++ + continue + } cca := *ca cca.Stream = wsa.Config.Name // Needed for safe roll-backs. cca.Client = cca.Client.forAssignmentSnap() cca.Subject, cca.Reply = _EMPTY_, _EMPTY_ - wsa.Consumers = append(wsa.Consumers, &cca) + wsa.Consumers = append(wsa.Consumers, &writeableConsumerAssignment{consumerAssignment: cca}) nca++ } streams = append(streams, wsa) @@ -1493,11 +1699,18 @@ func (js *jetStream) applyMetaSnapshot(buf []byte, ru *recoveryUpdates, isRecove streams[wsa.Client.serviceAccount()] = as } sa := &streamAssignment{Client: wsa.Client, Created: wsa.Created, Config: wsa.Config, Group: wsa.Group, Sync: wsa.Sync} + if wsa.unsupportedJson != nil { + sa.unsupported = newUnsupportedStreamAssignment(js.srv, sa, wsa.unsupportedJson) + } if len(wsa.Consumers) > 0 { sa.consumers = make(map[string]*consumerAssignment) - for _, ca := range wsa.Consumers { - if ca.Stream == _EMPTY_ { - ca.Stream = sa.Config.Name // Rehydrate from the stream name. + for _, wca := range wsa.Consumers { + if wca.Stream == _EMPTY_ { + wca.Stream = sa.Config.Name // Rehydrate from the stream name. + } + ca := &consumerAssignment{Client: wca.Client, Created: wca.Created, Name: wca.Name, Stream: wca.Stream, Config: wca.Config, Group: wca.Group, Subject: wca.Subject, Reply: wca.Reply, State: wca.State} + if wca.unsupportedJson != nil { + ca.unsupported = newUnsupportedConsumerAssignment(ca, wca.unsupportedJson) } sa.consumers[ca.Name] = ca } @@ -1698,6 +1911,9 @@ func (js *jetStream) processAddPeer(peer string) { for _, asa := range cc.streams { for _, sa := range asa { + if sa.unsupported != nil { + continue + } if sa.missingPeers() { // Make sure the right cluster etc. if si.cluster != sa.Client.Cluster { @@ -1709,6 +1925,9 @@ func (js *jetStream) processAddPeer(peer string) { // Send our proposal for this csa. Also use same group definition for all the consumers as well. cc.meta.Propose(encodeAddStreamAssignment(csa)) for _, ca := range sa.consumers { + if ca.unsupported != nil { + continue + } // Ephemerals are R=1, so only auto-remap durables, or R>1. if ca.Config.Durable != _EMPTY_ || len(ca.Group.Peers) > 1 { cca := ca.copyGroup() @@ -1766,6 +1985,9 @@ func (js *jetStream) processRemovePeer(peer string) { for _, asa := range cc.streams { for _, sa := range asa { + if sa.unsupported != nil { + continue + } if rg := sa.Group; rg.isMember(peer) { js.removePeerFromStreamLocked(sa, peer) } @@ -1799,6 +2021,9 @@ func (js *jetStream) removePeerFromStreamLocked(sa *streamAssignment, peer strin cc.meta.Propose(encodeAddStreamAssignment(csa)) rg := csa.Group for _, ca := range sa.consumers { + if ca.unsupported != nil { + continue + } // Ephemerals are R=1, so only auto-remap durables, or R>1. if ca.Config.Durable != _EMPTY_ { cca := ca.copyGroup() @@ -1865,7 +2090,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo buf := e.Data switch entryOp(buf[0]) { case assignStreamOp: - sa, err := decodeStreamAssignment(buf[1:]) + sa, err := decodeStreamAssignment(js.srv, buf[1:]) if err != nil { js.srv.Errorf("JetStream cluster failed to decode stream assignment: %q", buf[1:]) return didSnap, didRemoveStream, didRemoveConsumer, err @@ -1875,11 +2100,11 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo key := sa.recoveryKey() ru.addStreams[key] = sa delete(ru.removeStreams, key) - } else if js.processStreamAssignment(sa) { - didRemoveStream = true + } else { + js.processStreamAssignment(sa) } case removeStreamOp: - sa, err := decodeStreamAssignment(buf[1:]) + sa, err := decodeStreamAssignment(js.srv, buf[1:]) if err != nil { js.srv.Errorf("JetStream cluster failed to decode stream assignment: %q", buf[1:]) return didSnap, didRemoveStream, didRemoveConsumer, err @@ -1958,7 +2183,7 @@ func (js *jetStream) applyMetaEntries(entries []*Entry, ru *recoveryUpdates) (bo didRemoveConsumer = true } case updateStreamOp: - sa, err := decodeStreamAssignment(buf[1:]) + sa, err := decodeStreamAssignment(js.srv, buf[1:]) if err != nil { js.srv.Errorf("JetStream cluster failed to decode stream assignment: %q", buf[1:]) return didSnap, didRemoveStream, didRemoveConsumer, err @@ -2640,6 +2865,9 @@ func (js *jetStream) monitorStream(mset *stream, sa *streamAssignment, sendSnaps js.mu.RLock() var needToWait bool for name, c := range sa.consumers { + if c.unsupported != nil { + continue + } for _, peer := range c.Group.Peers { // If we have peers still in the old set block. if oldPeerSet[peer] { @@ -2877,6 +3105,7 @@ func (mset *stream) resetClusteredState(err error) bool { // Need to do the rest in a separate Go routine. go func() { + mset.signalMonitorQuit() mset.monitorWg.Wait() mset.resetAndWaitOnConsumers() // Stop our stream. @@ -2899,6 +3128,9 @@ func (mset *stream) resetClusteredState(err error) bool { if cc := js.cluster; cc != nil && cc.meta != nil { ourID := cc.meta.ID() for _, ca := range sa.consumers { + if ca.unsupported != nil { + continue + } if rg := ca.Group; rg != nil && rg.isMember(ourID) { rg.node = nil // Erase group raft/node state. consumers = append(consumers, ca) @@ -3433,7 +3665,7 @@ func (js *jetStream) streamAssignment(account, stream string) (sa *streamAssignm } // processStreamAssignment is called when followers have replicated an assignment. -func (js *jetStream) processStreamAssignment(sa *streamAssignment) bool { +func (js *jetStream) processStreamAssignment(sa *streamAssignment) { js.mu.Lock() s, cc := js.srv, js.cluster accName, stream := sa.Client.serviceAccount(), sa.Config.Name @@ -3452,26 +3684,57 @@ func (js *jetStream) processStreamAssignment(sa *streamAssignment) bool { if s == nil || noMeta { js.mu.Unlock() - return false + return } accStreams := cc.streams[accName] if accStreams == nil { accStreams = make(map[string]*streamAssignment) - } else if osa := accStreams[stream]; osa != nil && osa != sa { - // Copy over private existing state from former SA. - if sa.Group != nil { - sa.Group.node = osa.Group.node + } else if osa := accStreams[stream]; osa != nil { + if osa != sa { + // Copy over private existing state from former SA. + if sa.Group != nil { + sa.Group.node = osa.Group.node + } + sa.consumers = osa.consumers + sa.responded = osa.responded + sa.err = osa.err + } + // Unsubscribe if it was previously unsupported. + if osa.unsupported != nil { + osa.unsupported.closeInfoSub(js.srv) + // If we've seen unsupported once, it remains for the lifetime of this server process. + if sa.unsupported == nil { + sa.unsupported = osa.unsupported + } } - sa.consumers = osa.consumers - sa.responded = osa.responded - sa.err = osa.err } // Update our state. accStreams[stream] = sa cc.streams[accName] = accStreams hasResponded := sa.responded + + // If unsupported, we can't register any further. + if sa.unsupported != nil { + sa.unsupported.setupInfoSub(s, sa) + apiLevel := getRequiredApiLevel(sa.Config.Metadata) + s.Warnf("Detected unsupported stream '%s > %s', delete the stream or upgrade the server to API level %s", accName, stream, apiLevel) + js.mu.Unlock() + + // Need to stop the stream, we can't keep running with an old config. + acc, err := s.LookupAccount(accName) + if err != nil { + return + } + mset, err := acc.lookupStream(stream) + if err != nil || mset.closed.Load() { + return + } + s.Warnf("Stopping unsupported stream '%s > %s'", accName, stream) + mset.stop(false, false) + return + } js.mu.Unlock() acc, err := s.LookupAccount(accName) @@ -3492,11 +3755,9 @@ func (js *jetStream) processStreamAssignment(sa *streamAssignment) bool { } else { s.Debugf(ll) } - return false + return } - var didRemove bool - // Check if this is for us.. if isMember { js.processClusterCreateStream(acc, sa) @@ -3510,10 +3771,7 @@ func (js *jetStream) processStreamAssignment(sa *streamAssignment) bool { js.mu.Lock() cc.streamsCheck = true js.mu.Unlock() - return false } - - return didRemove } // processUpdateStreamAssignment is called when followers have replicated an updated assignment. @@ -3577,6 +3835,36 @@ func (js *jetStream) processUpdateStreamAssignment(sa *streamAssignment) { sa.Group.node = nil } } + + // Unsubscribe if it was previously unsupported. + if osa.unsupported != nil { + osa.unsupported.closeInfoSub(js.srv) + // If we've seen unsupported once, it remains for the lifetime of this server process. + if sa.unsupported == nil { + sa.unsupported = osa.unsupported + } + } + + // If unsupported, we can't register any further. + if sa.unsupported != nil { + sa.unsupported.setupInfoSub(s, sa) + apiLevel := getRequiredApiLevel(sa.Config.Metadata) + s.Warnf("Detected unsupported stream '%s > %s', delete the stream or upgrade the server to API level %s", accName, stream, apiLevel) + js.mu.Unlock() + + // Need to stop the stream, we can't keep running with an old config. + acc, err := s.LookupAccount(accName) + if err != nil { + return + } + mset, err := acc.lookupStream(stream) + if err != nil || mset.closed.Load() { + return + } + s.Warnf("Stopping unsupported stream '%s > %s'", accName, stream) + mset.stop(false, false) + return + } js.mu.Unlock() acc, err := s.LookupAccount(accName) @@ -3619,6 +3907,7 @@ func (s *Server) removeStream(mset *stream, nsa *streamAssignment) { if !isShuttingDown { // wait for monitor to be shutdown. + mset.signalMonitorQuit() mset.monitorWg.Wait() } mset.stop(true, false) @@ -3672,7 +3961,7 @@ func (js *jetStream) processClusterUpdateStream(acc *Account, osa, sa *streamAss } mset.monitorWg.Add(1) // Start monitoring.. - s.startGoRoutine( + started := s.startGoRoutine( func() { js.monitorStream(mset, sa, needsNode) }, pprofLabels{ "type": "stream", @@ -3680,6 +3969,9 @@ func (js *jetStream) processClusterUpdateStream(acc *Account, osa, sa *streamAss "stream": mset.name(), }, ) + if !started { + mset.monitorWg.Done() + } } else if numReplicas == 1 && alreadyRunning { // We downgraded to R1. Make sure we cleanup the raft node and the stream monitor. mset.removeNode() @@ -3930,7 +4222,7 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme if mset != nil { mset.monitorWg.Add(1) } - s.startGoRoutine( + started := s.startGoRoutine( func() { js.monitorStream(mset, sa, false) }, pprofLabels{ "type": "stream", @@ -3938,6 +4230,9 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme "stream": mset.name(), }, ) + if !started && mset != nil { + mset.monitorWg.Done() + } } } else { // Single replica stream, process manually here. @@ -4049,6 +4344,13 @@ func (js *jetStream) processStreamRemoval(sa *streamAssignment) { accStreams := cc.streams[sa.Client.serviceAccount()] needDelete := accStreams != nil && accStreams[stream] != nil if needDelete { + if osa := accStreams[stream]; osa != nil && osa.unsupported != nil { + osa.unsupported.closeInfoSub(js.srv) + // Remember we used to be unsupported, just so we can send a successful delete response. + if sa.unsupported == nil { + sa.unsupported = osa.unsupported + } + } delete(accStreams, stream) if len(accStreams) == 0 { delete(cc.streams, sa.Client.serviceAccount()) @@ -4069,7 +4371,7 @@ func (js *jetStream) processClusterDeleteStream(sa *streamAssignment, isMember, s := js.srv node := sa.Group.node hadLeader := node == nil || !node.Leaderless() - offline := s.allPeersOffline(sa.Group) + offline := s.allPeersOffline(sa.Group) || sa.unsupported != nil var isMetaLeader bool if cc := js.cluster; cc != nil { isMetaLeader = cc.isLeader() @@ -4090,6 +4392,7 @@ func (js *jetStream) processClusterDeleteStream(sa *streamAssignment, isMember, n.Delete() } // wait for monitor to be shut down + mset.signalMonitorQuit() mset.monitorWg.Wait() err = mset.stop(true, wasLeader) stopped = true @@ -4107,7 +4410,7 @@ func (js *jetStream) processClusterDeleteStream(sa *streamAssignment, isMember, } // This is a stop gap cleanup in case - // 1) the account does not exist (and mset couldn't be stopped) and/or + // 1) the account or mset does not exist and/or // 2) node was nil (and couldn't be deleted) if !stopped || node == nil { if sacc := s.SystemAccount(); sacc != nil { @@ -4202,6 +4505,15 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) { } ca.responded = oca.responded ca.err = oca.err + + // Unsubscribe if it was previously unsupported. + if oca.unsupported != nil { + oca.unsupported.closeInfoSub(s) + // If we've seen unsupported once, it remains for the lifetime of this server process. + if ca.unsupported == nil { + ca.unsupported = oca.unsupported + } + } } // Capture the optional state. We will pass it along if we are a member to apply. @@ -4213,6 +4525,35 @@ func (js *jetStream) processConsumerAssignment(ca *consumerAssignment) { // Ok to replace an existing one, we check on process call below. sa.consumers[ca.Name] = ca ca.pending = false + + // If unsupported, we can't register any further. + if ca.unsupported != nil { + ca.unsupported.setupInfoSub(s, ca) + apiLevel := getRequiredApiLevel(ca.Config.Metadata) + s.Warnf("Detected unsupported consumer '%s > %s > %s', delete the consumer or upgrade the server to API level %s", accName, stream, ca.Name, apiLevel) + + // Mark stream as unsupported as well + if sa.unsupported == nil { + sa.unsupported = newUnsupportedStreamAssignment(s, sa, nil) + } + sa.unsupported.setupInfoSub(s, sa) + js.mu.Unlock() + + // Be conservative by protecting the whole stream, even if just one consumer is unsupported. + // This ensures it's safe, even with Interest-based retention where it would otherwise + // continue accepting but dropping messages. + acc, err := s.LookupAccount(accName) + if err != nil { + return + } + mset, err := acc.lookupStream(stream) + if err != nil || mset.closed.Load() { + return + } + s.Warnf("Stopping unsupported stream '%s > %s'", accName, stream) + mset.stop(false, false) + return + } js.mu.Unlock() acc, err := s.LookupAccount(accName) @@ -4308,6 +4649,7 @@ func (js *jetStream) processConsumerRemoval(ca *consumerAssignment) { js.mu.Unlock() return } + wasLeader := cc.isConsumerLeader(ca.Client.serviceAccount(), ca.Stream, ca.Name) // Delete from our state. @@ -4320,6 +4662,10 @@ func (js *jetStream) processConsumerRemoval(ca *consumerAssignment) { needDelete = true oca.deleted = true delete(sa.consumers, ca.Name) + // Remember we used to be unsupported, just so we can send a successful delete response. + if ca.unsupported == nil { + ca.unsupported = oca.unsupported + } } } } @@ -4562,7 +4908,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state // Clustered consumer. // Start our monitoring routine if needed. if !alreadyRunning && o.shouldStartMonitor() { - s.startGoRoutine( + started := s.startGoRoutine( func() { js.monitorConsumer(o, ca) }, pprofLabels{ "type": "consumer", @@ -4571,6 +4917,9 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state "consumer": ca.Name, }, ) + if !started { + o.clearMonitorRunning() + } } // For existing consumer, only send response if not recovering. if wasExisting && !js.isMetaRecovering() { @@ -4597,7 +4946,7 @@ func (js *jetStream) processClusterDeleteConsumer(ca *consumerAssignment, wasLea js.mu.RLock() s := js.srv node := ca.Group.node - offline := s.allPeersOffline(ca.Group) + offline := s.allPeersOffline(ca.Group) || ca.unsupported != nil var isMetaLeader bool if cc := js.cluster; cc != nil { isMetaLeader = cc.isLeader() @@ -4605,6 +4954,7 @@ func (js *jetStream) processClusterDeleteConsumer(ca *consumerAssignment, wasLea recovering := ca.recovering js.mu.RUnlock() + stopped := false var resp = JSApiConsumerDeleteResponse{ApiResponse: ApiResponse{Type: JSApiConsumerDeleteResponseType}} var err error var acc *Account @@ -4614,13 +4964,9 @@ func (js *jetStream) processClusterDeleteConsumer(ca *consumerAssignment, wasLea if mset, _ := acc.lookupStream(ca.Stream); mset != nil { if o := mset.lookupConsumer(ca.Name); o != nil { err = o.stopWithFlags(true, false, true, wasLeader) + stopped = true } } - } else if ca.Group != nil { - // We have a missing account, see if we can cleanup. - if sacc := s.SystemAccount(); sacc != nil { - os.RemoveAll(filepath.Join(js.config.StoreDir, sacc.GetName(), defaultStoreDirName, ca.Group.Name)) - } } // Always delete the node if present. @@ -4628,6 +4974,19 @@ func (js *jetStream) processClusterDeleteConsumer(ca *consumerAssignment, wasLea node.Delete() } + // This is a stop gap cleanup in case + // 1) the account, mset, or consumer does not exist and/or + // 2) node was nil (and couldn't be deleted) + if !stopped || node == nil { + if sacc := s.SystemAccount(); sacc != nil { + os.RemoveAll(filepath.Join(js.config.StoreDir, sacc.GetName(), defaultStoreDirName, ca.Group.Name)) + } + } + + accDir := filepath.Join(js.config.StoreDir, ca.Client.serviceAccount()) + consumersDir := filepath.Join(accDir, streamsDir, ca.Stream, consumerDir) + os.RemoveAll(filepath.Join(consumersDir, ca.Name)) + if !wasLeader || ca.Reply == _EMPTY_ { if !(offline && isMetaLeader) { return @@ -4774,7 +5133,7 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) { // from underneath the one that is running since it will be the same raft node. defer n.Stop() - qch, lch, aq, uch, ourPeerId := n.QuitC(), n.LeadChangeC(), n.ApplyQ(), o.updateC(), meta.ID() + qch, mqch, lch, aq, uch, ourPeerId := n.QuitC(), o.monitorQuitC(), n.LeadChangeC(), n.ApplyQ(), o.updateC(), meta.ID() s.Debugf("Starting consumer monitor for '%s > %s > %s' [%s]", o.acc.Name, ca.Stream, ca.Name, n.Group()) defer s.Debugf("Exiting consumer monitor for '%s > %s > %s' [%s]", o.acc.Name, ca.Stream, ca.Name, n.Group()) @@ -4863,6 +5222,10 @@ func (js *jetStream) monitorConsumer(o *consumer, ca *consumerAssignment) { // Server shutting down, but we might receive this before qch, so try to snapshot. doSnapshot(false) return + case <-mqch: + // Clean signal from shutdown routine so do best effort attempt to snapshot. + doSnapshot(false) + return case <-qch: // Clean signal from shutdown routine so do best effort attempt to snapshot. doSnapshot(false) @@ -5300,9 +5663,11 @@ func (js *jetStream) processConsumerLeaderChange(o *consumer, isLeader bool) err // Only send a pause advisory on consumer create if we're // actually paused. The timer would have been kicked by now // by the call to o.setLeader() above. + o.mu.RLock() if isLeader && o.cfg.PauseUntil != nil && !o.cfg.PauseUntil.IsZero() && time.Now().Before(*o.cfg.PauseUntil) { o.sendPauseAdvisoryLocked(&o.cfg) } + o.mu.RUnlock() return nil } @@ -5594,6 +5959,10 @@ func (s *Server) sendDomainLeaderElectAdvisory() { node := cc.meta js.mu.RUnlock() + if node == nil { + return + } + adv := &JSDomainLeaderElectedAdvisory{ TypedEvent: TypedEvent{ Type: JSDomainLeaderElectedAdvisoryType, @@ -5662,6 +6031,9 @@ func (js *jetStream) processLeaderChange(isLeader bool) { cc := js.cluster for acc, asa := range cc.streams { for _, sa := range asa { + if sa.unsupported != nil { + continue + } if sa.Sync == _EMPTY_ { s.Warnf("Stream assignment corrupt for stream '%s > %s'", acc, sa.Config.Name) nsa := &streamAssignment{Group: sa.Group, Config: sa.Config, Subject: sa.Subject, Reply: sa.Reply, Client: sa.Client} @@ -5877,6 +6249,9 @@ func (cc *jetStreamCluster) selectPeerGroup(r int, cluster string, cfg *StreamCo peerHA := make(map[string]int, len(peers)) for _, asa := range cc.streams { for _, sa := range asa { + if sa.unsupported != nil { + continue + } isHA := len(sa.Group.Peers) > 1 for _, peer := range sa.Group.Peers { peerStreams[peer]++ @@ -6228,6 +6603,11 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject, if rg == nil { // Check inflight before proposing in case we have an existing inflight proposal. if existing, ok := streams[cfg.Name]; ok { + if !reflect.DeepEqual(existing.cfg, cfg) { + resp.Error = NewJSStreamNameExistError() + s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) + return + } // We have existing for same stream. Re-use same group and syncSubject. rg, syncSubject = existing.rg, existing.sync } @@ -6255,7 +6635,7 @@ func (s *Server) jsClusteredStreamRequest(ci *ClientInfo, acc *Account, subject, // on concurrent create requests while this stream assignment has // possibly not been processed yet. if streams, ok := cc.inflight[acc.Name]; ok && self == nil { - streams[cfg.Name] = &inflightInfo{rg, syncSubject} + streams[cfg.Name] = &inflightInfo{rg, syncSubject, cfg} } } } @@ -6461,7 +6841,7 @@ func (s *Server) jsClusteredStreamUpdateRequest(ci *ClientInfo, acc *Account, su // try to pick one. This could happen with older streams that were assigned by // previous servers. if rg.Cluster == _EMPTY_ { - // Prefer placement directrives if we have them. + // Prefer placement directives if we have them. if newCfg.Placement != nil && newCfg.Placement.Cluster != _EMPTY_ { rg.Cluster = newCfg.Placement.Cluster } else { @@ -6895,11 +7275,11 @@ func (s *Server) jsClusteredStreamListRequest(acc *Account, ci *ClientInfo, filt // Create an inbox for our responses and send out our requests. s.mu.Lock() inbox := s.newRespInbox() - rc := make(chan *StreamInfo, len(streams)) + rc := make(chan *streamInfoClusterResponse, len(streams)) // Store our handler. s.sys.replies[inbox] = func(sub *subscription, _ *client, _ *Account, subject, _ string, msg []byte) { - var si StreamInfo + var si streamInfoClusterResponse if err := json.Unmarshal(msg, &si); err != nil { s.Warnf("Error unmarshalling clustered stream info response:%v", err) return @@ -6967,10 +7347,14 @@ LOOP: si.State.Consumers = consCount } delete(sent, si.Config.Name) - resp.Streams = append(resp.Streams, si) - // Check to see if we are done. - if len(resp.Streams) == len(streams) { - break LOOP + if si.OfflineReason == _EMPTY_ { + resp.Streams = append(resp.Streams, &si.StreamInfo) + } else if _, ok := resp.Offline[si.Config.Name]; !ok { + if resp.Offline == nil { + resp.Offline = make(map[string]string, 1) + } + resp.Offline[si.Config.Name] = si.OfflineReason + missingNames = append(missingNames, si.Config.Name) } } } @@ -7042,11 +7426,11 @@ func (s *Server) jsClusteredConsumerListRequest(acc *Account, ci *ClientInfo, of // Create an inbox for our responses and send out requests. s.mu.Lock() inbox := s.newRespInbox() - rc := make(chan *ConsumerInfo, len(consumers)) + rc := make(chan *consumerInfoClusterResponse, len(consumers)) // Store our handler. s.sys.replies[inbox] = func(sub *subscription, _ *client, _ *Account, subject, _ string, msg []byte) { - var ci ConsumerInfo + var ci consumerInfoClusterResponse if err := json.Unmarshal(msg, &ci); err != nil { s.Warnf("Error unmarshaling clustered consumer info response:%v", err) return @@ -7110,10 +7494,14 @@ LOOP: break LOOP case ci := <-rc: delete(sent, ci.Name) - resp.Consumers = append(resp.Consumers, ci) - // Check to see if we are done. - if len(resp.Consumers) == len(consumers) { - break LOOP + if ci.OfflineReason == _EMPTY_ { + resp.Consumers = append(resp.Consumers, &ci.ConsumerInfo) + } else if _, ok := resp.Offline[ci.Name]; !ok { + if resp.Offline == nil { + resp.Offline = make(map[string]string, 1) + } + resp.Offline[ci.Name] = ci.OfflineReason + missingNames = append(missingNames, ci.Name) } } } @@ -7266,14 +7654,24 @@ func encodeDeleteStreamAssignment(sa *streamAssignment) []byte { return bb.Bytes() } -func decodeStreamAssignment(buf []byte) (*streamAssignment, error) { +func decodeStreamAssignment(s *Server, buf []byte) (*streamAssignment, error) { + var unsupported bool var sa streamAssignment - err := json.Unmarshal(buf, &sa) - if err != nil { - return nil, err + decoder := json.NewDecoder(bytes.NewReader(buf)) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&sa); err != nil { + unsupported = true + sa = streamAssignment{} + if err = json.Unmarshal(buf, &sa); err != nil { + return nil, err + } } fixCfgMirrorWithDedupWindow(sa.Config) - return &sa, err + + if unsupported || (sa.Config != nil && !supportsRequiredApiLevel(sa.Config.Metadata)) { + sa.unsupported = newUnsupportedStreamAssignment(s, &sa, copyBytes(buf)) + } + return &sa, nil } func encodeDeleteRange(dr *DeleteRange) []byte { @@ -7405,6 +7803,9 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec // Don't count DIRECTS. total := 0 for cn, ca := range sa.consumers { + if ca.unsupported != nil { + continue + } // If the consumer name is specified and we think it already exists, then // we're likely updating an existing consumer, so don't count it. Otherwise // we will incorrectly return NewJSMaximumConsumersLimitError for an update. @@ -7618,8 +8019,15 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec if rBefore < rAfter { newPeerSet := nca.Group.Peers - // scale up by adding new members from the stream peer set that are not yet in the consumer peer set + // Scale up by adding new members from the stream peer set that are not yet in the consumer peer set. streamPeerSet := copyStrings(sa.Group.Peers) + + // Respond with error when there is a config mismatch between the intended config and expected peer size. + if len(streamPeerSet) < rAfter { + resp.Error = NewJSConsumerReplicasExceedsStreamError() + s.sendAPIErrResponse(ci, acc, subject, reply, string(rmsg), s.jsonResponse(&resp)) + return + } rand.Shuffle(rAfter, func(i, j int) { streamPeerSet[i], streamPeerSet[j] = streamPeerSet[j], streamPeerSet[i] }) for _, p := range streamPeerSet { found := false @@ -7693,9 +8101,22 @@ func encodeDeleteConsumerAssignment(ca *consumerAssignment) []byte { } func decodeConsumerAssignment(buf []byte) (*consumerAssignment, error) { + var unsupported bool var ca consumerAssignment - err := json.Unmarshal(buf, &ca) - return &ca, err + decoder := json.NewDecoder(bytes.NewReader(buf)) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&ca); err != nil { + unsupported = true + ca = consumerAssignment{} + if err = json.Unmarshal(buf, &ca); err != nil { + return nil, err + } + } + + if unsupported || (ca.Config != nil && !supportsRequiredApiLevel(ca.Config.Metadata)) { + ca.unsupported = newUnsupportedConsumerAssignment(&ca, copyBytes(buf)) + } + return &ca, nil } func encodeAddConsumerAssignmentCompressed(ca *consumerAssignment) []byte { @@ -7710,10 +8131,33 @@ func encodeAddConsumerAssignmentCompressed(ca *consumerAssignment) []byte { } func decodeConsumerAssignmentCompressed(buf []byte) (*consumerAssignment, error) { + var unsupported bool var ca consumerAssignment bb := bytes.NewBuffer(buf) s2d := s2.NewReader(bb) - return &ca, json.NewDecoder(s2d).Decode(&ca) + decoder := json.NewDecoder(s2d) + decoder.DisallowUnknownFields() + if err := decoder.Decode(&ca); err != nil { + unsupported = true + ca = consumerAssignment{} + bb = bytes.NewBuffer(buf) + s2d = s2.NewReader(bb) + if err = json.NewDecoder(s2d).Decode(&ca); err != nil { + return nil, err + } + } + + if unsupported || (ca.Config != nil && !supportsRequiredApiLevel(ca.Config.Metadata)) { + bb = bytes.NewBuffer(buf) + s2d = s2.NewReader(bb) + dec, err := io.ReadAll(s2d) + if err != nil { + return nil, err + } + ca.unsupported = newUnsupportedConsumerAssignment(&ca, copyBytes(dec)) + } + + return &ca, nil } var errBadStreamMsg = errors.New("jetstream cluster bad replicated stream msg") @@ -8948,6 +9392,9 @@ func (js *jetStream) streamAlternates(ci *ClientInfo, stream string) []StreamAlt var alts []StreamAlternate for _, sa := range cc.streams[acc.Name] { + if sa.unsupported != nil { + continue + } // Add in ourselves and any mirrors. if sa.Config.Name == stream || (sa.Config.Mirror != nil && sa.Config.Mirror.Name == stream) { alts = append(alts, StreamAlternate{Name: sa.Config.Name, Domain: domain, Cluster: sa.Group.Cluster}) diff --git a/server/jetstream_cluster_1_test.go b/server/jetstream_cluster_1_test.go index c08b11a06a5..799d59b4f6d 100644 --- a/server/jetstream_cluster_1_test.go +++ b/server/jetstream_cluster_1_test.go @@ -23,16 +23,19 @@ import ( "encoding/json" "errors" "fmt" + "math" "math/rand" "os" "path/filepath" "reflect" + "strconv" "strings" "sync" "sync/atomic" "testing" "time" + "github.com/klauspost/compress/s2" "github.com/nats-io/jwt/v2" "github.com/nats-io/nats.go" ) @@ -8974,6 +8977,694 @@ func TestJetStreamClusterCreateEphemeralConsumerWithOfflineNodes(t *testing.T) { } } +func TestJetStreamClusterOfflineStreamAndConsumerAfterAssetCreateOrUpdate(t *testing.T) { + clusterName := "R3S" + c := createJetStreamClusterExplicit(t, clusterName, 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.randomServer()) + defer nc.Close() + + ml := c.leader() + require_NotNil(t, ml) + + sjs := ml.getJetStream() + require_NotNil(t, sjs) + sjs.mu.Lock() + cc := sjs.cluster + if cc == nil || cc.meta == nil { + sjs.mu.Unlock() + t.Fatalf("Expected cluster to be initialized") + } + + restart := func() { + t.Helper() + for _, s := range c.servers { + sjs = s.getJetStream() + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + meta := sjs.getMetaGroup() + meta.InstallSnapshot(snap) + } + + c.stopAll() + c.restartAllSamePorts() + c.waitOnLeader() + ml = c.leader() + require_NotNil(t, ml) + require_NoError(t, nc.ForceReconnect()) + + sjs = ml.getJetStream() + require_NotNil(t, sjs) + sjs.mu.Lock() + cc = sjs.cluster + if cc == nil || cc.meta == nil { + sjs.mu.Unlock() + t.Fatalf("Expected cluster to be initialized") + } + sjs.mu.Unlock() + } + + getValidMetaSnapshot := func() (wsas []writeableStreamAssignment) { + t.Helper() + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + require_True(t, len(snap) > 0) + dec, err := s2.Decode(nil, snap) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(dec, &wsas)) + return wsas + } + + // Create a stream that's unsupported. + ci := &ClientInfo{ + Account: globalAccountName, + Cluster: clusterName, + } + scfg := &StreamConfig{ + Name: "DowngradeStreamTest", + Storage: FileStorage, + Replicas: 3, + Metadata: map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt - 1)}, + } + rg, perr := sjs.createGroupForStream(ci, scfg) + if perr != nil { + sjs.mu.Unlock() + require_NoError(t, perr) + } + sa := &streamAssignment{ + Config: scfg, + Group: rg, + Created: time.Now().UTC(), + Client: ci, + } + err := cc.meta.Propose(encodeAddStreamAssignment(sa)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + unsupported := func(requiredApiLevel int) string { + return fmt.Sprintf("unsupported - required API level: %d, current API level: %d", requiredApiLevel, JSApiLevel) + } + expectStreamInfo := func(offlineReason, streamName string) { + var msg *nats.Msg + checkFor(t, 3*time.Second, 200*time.Millisecond, func() error { + msg, err = nc.Request(fmt.Sprintf(JSApiStreamInfoT, streamName), nil, time.Second) + return err + }) + var si JSApiStreamInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &si)) + require_NotNil(t, si.Error) + require_Error(t, si.Error, NewJSStreamOfflineReasonError(errors.New(offlineReason))) + + var sn JSApiStreamNamesResponse + msg, err = nc.Request(JSApiStreams, nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &sn)) + require_Len(t, len(sn.Streams), 1) + require_Equal(t, sn.Streams[0], streamName) + + var sl JSApiStreamListResponse + msg, err = nc.Request(JSApiStreamList, nil, 2*time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &sl)) + require_Len(t, len(sl.Streams), 0) + require_Len(t, len(sl.Missing), 1) + require_Equal(t, sl.Missing[0], streamName) + require_Len(t, len(sl.Offline), 1) + require_Equal(t, sl.Offline[streamName], offlineReason) + } + + // Stream should be reported as offline, but healthz should report healthy to not block downgrades. + expectStreamInfo(unsupported(math.MaxInt-1), "DowngradeStreamTest") + health := ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectStreamInfo(unsupported(math.MaxInt-1), "DowngradeStreamTest") + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas := getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeStreamTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], strconv.Itoa(math.MaxInt-1)) + + // Update a stream that's unsupported. + sjs.mu.Lock() + scfg.Metadata = map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)} + err = cc.meta.Propose(encodeUpdateStreamAssignment(sa)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + // Stream should be reported as offline, but healthz should report healthy to not block downgrades. + expectStreamInfo(unsupported(math.MaxInt), "DowngradeStreamTest") + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectStreamInfo(unsupported(math.MaxInt), "DowngradeStreamTest") + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeStreamTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], strconv.Itoa(math.MaxInt)) + + // Deleting a stream should always work, even if it is unsupported. + require_NoError(t, js.DeleteStream("DowngradeStreamTest")) + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + require_True(t, snap == nil) + + // Create a supported stream and consumer. + _, err = js.AddStream(&nats.StreamConfig{Name: "DowngradeConsumerTest", Replicas: 3}) + require_NoError(t, err) + _, err = js.AddConsumer("DowngradeConsumerTest", &nats.ConsumerConfig{Name: "consumer"}) + require_NoError(t, err) + + // Create a consumer that's unsupported. + sjs.mu.Lock() + ccfg := &ConsumerConfig{ + Name: "DowngradeConsumerTest", + Replicas: 3, + Metadata: map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt - 1)}, + } + rg = cc.createGroupForConsumer(ccfg, sa) + ca := &consumerAssignment{ + Config: ccfg, + Group: rg, + Stream: "DowngradeConsumerTest", + Name: "DowngradeConsumerTest", + Created: time.Now().UTC(), + Client: ci, + } + err = cc.meta.Propose(encodeAddConsumerAssignment(ca)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + expectConsumerInfo := func(offlineReason string) { + var msg *nats.Msg + checkFor(t, 3*time.Second, 200*time.Millisecond, func() error { + msg, err = nc.Request(fmt.Sprintf(JSApiConsumerInfoT, "DowngradeConsumerTest", "DowngradeConsumerTest"), nil, time.Second) + return err + }) + var ci JSApiConsumerInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &ci)) + require_NotNil(t, ci.Error) + require_Error(t, ci.Error, NewJSConsumerOfflineReasonError(errors.New(offlineReason))) + + var cn JSApiConsumerNamesResponse + msg, err = nc.Request(fmt.Sprintf(JSApiConsumersT, "DowngradeConsumerTest"), nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &cn)) + require_Len(t, len(cn.Consumers), 2) + require_Equal(t, cn.Consumers[0], "DowngradeConsumerTest") + for _, name := range cn.Consumers { + require_True(t, name == "consumer" || name == "DowngradeConsumerTest") + } + + var cl JSApiConsumerListResponse + msg, err = nc.Request(fmt.Sprintf(JSApiConsumerListT, "DowngradeConsumerTest"), nil, 5*time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &cl)) + require_Len(t, len(cl.Consumers), 0) + require_Len(t, len(cl.Missing), 2) + for _, name := range cl.Missing { + require_True(t, name == "consumer" || name == "DowngradeConsumerTest") + } + require_Len(t, len(cl.Offline), 1) + require_Equal(t, cl.Offline["DowngradeConsumerTest"], offlineReason) + + // Stream should also be reported as offline. + // Specifically, as "stopped" because it's still supported, but can't run due to the unsupported consumer. + expectStreamInfo("stopped", "DowngradeConsumerTest") + } + + // Consumer should be reported as offline, but healthz should report healthy to not block downgrades. + expectConsumerInfo(unsupported(math.MaxInt - 1)) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectConsumerInfo(unsupported(math.MaxInt - 1)) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeConsumerTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], "0") + require_Len(t, len(wsas[0].Consumers), 2) + for _, wca := range wsas[0].Consumers { + if wca.Config.Name == "DowngradeConsumerTest" { + require_Equal(t, wca.Config.Metadata["_nats.req.level"], strconv.Itoa(math.MaxInt-1)) + } else { + require_Equal(t, wca.Config.Name, "consumer") + } + } + + // Update a consumer (with compressed data) that's unsupported. + ccfg.Metadata = map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)} + sjs.mu.Lock() + err = cc.meta.Propose(encodeAddConsumerAssignmentCompressed(ca)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + // Consumer should be reported as offline, but healthz should report healthy to not block downgrades. + expectConsumerInfo(unsupported(math.MaxInt)) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectConsumerInfo(unsupported(math.MaxInt)) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeConsumerTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], "0") + require_Len(t, len(wsas[0].Consumers), 2) + for _, wca := range wsas[0].Consumers { + if wca.Config.Name == "DowngradeConsumerTest" { + require_Equal(t, wca.Config.Metadata["_nats.req.level"], strconv.Itoa(math.MaxInt)) + } else { + require_Equal(t, wca.Config.Name, "consumer") + } + } + + // Deleting a consumer should always work, even if it is unsupported. + require_NoError(t, js.DeleteConsumer("DowngradeConsumerTest", "DowngradeConsumerTest")) + c.waitOnAllCurrent() + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeConsumerTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], "0") + require_Len(t, len(wsas[0].Consumers), 1) + require_Equal(t, wsas[0].Consumers[0].Config.Name, "consumer") +} + +func TestJetStreamClusterOfflineStreamAndConsumerAfterDowngrade(t *testing.T) { + clusterName := "R3S" + c := createJetStreamClusterExplicit(t, clusterName, 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.randomServer()) + defer nc.Close() + + ml := c.leader() + require_NotNil(t, ml) + + sjs := ml.getJetStream() + require_NotNil(t, sjs) + sjs.mu.Lock() + cc := sjs.cluster + if cc == nil || cc.meta == nil { + sjs.mu.Unlock() + t.Fatalf("Expected cluster to be initialized") + } + + restart := func() { + t.Helper() + for _, s := range c.servers { + sjs = s.getJetStream() + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + meta := sjs.getMetaGroup() + meta.InstallSnapshot(snap) + } + + c.stopAll() + c.restartAllSamePorts() + c.waitOnLeader() + ml = c.leader() + require_NotNil(t, ml) + require_NoError(t, nc.ForceReconnect()) + + sjs = ml.getJetStream() + require_NotNil(t, sjs) + sjs.mu.Lock() + cc = sjs.cluster + if cc == nil || cc.meta == nil { + sjs.mu.Unlock() + t.Fatalf("Expected cluster to be initialized") + } + sjs.mu.Unlock() + } + + getValidMetaSnapshot := func() (wsas []writeableStreamAssignment) { + t.Helper() + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + require_True(t, len(snap) > 0) + dec, err := s2.Decode(nil, snap) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(dec, &wsas)) + return wsas + } + + // Create a stream that's unsupported. + ci := &ClientInfo{ + Account: globalAccountName, + Cluster: clusterName, + } + scfg := &StreamConfig{ + Name: "DowngradeStreamTest", + Storage: FileStorage, + Replicas: 3, + } + rg, perr := sjs.createGroupForStream(ci, scfg) + if perr != nil { + sjs.mu.Unlock() + require_NoError(t, perr) + } + sa := &streamAssignment{ + Config: scfg, + Group: rg, + Created: time.Now().UTC(), + Client: ci, + } + err := cc.meta.Propose(encodeAddStreamAssignment(sa)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnStreamLeader(globalAccountName, "DowngradeStreamTest") + + expectStreamInfo := func(offline bool) { + if !offline { + c.waitOnStreamLeader(globalAccountName, "DowngradeStreamTest") + } + var msg *nats.Msg + checkFor(t, 3*time.Second, 200*time.Millisecond, func() error { + msg, err = nc.Request(fmt.Sprintf(JSApiStreamInfoT, "DowngradeStreamTest"), nil, time.Second) + return err + }) + var si JSApiStreamInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &si)) + if !offline { + require_True(t, si.Error == nil) + } else { + require_NotNil(t, si.Error) + require_Contains(t, si.Error.Error(), "stream is offline", "unsupported", "required API level") + } + } + + // Stream is still supported, so it should be available and healthz should report healthy. + expectStreamInfo(false) + health := ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectStreamInfo(false) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas := getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeStreamTest") + + // Update a stream to be unsupported. + sjs.mu.Lock() + scfg.Metadata = map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)} + err = cc.meta.Propose(encodeUpdateStreamAssignment(sa)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + // Stream should be reported as offline, but healthz should report healthy to not block downgrades. + expectStreamInfo(true) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectStreamInfo(true) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeStreamTest") + + // Deleting a stream should always work, even if it is unsupported. + require_NoError(t, js.DeleteStream("DowngradeStreamTest")) + snap, err := sjs.metaSnapshot() + require_NoError(t, err) + require_True(t, snap == nil) + + // Create a supported stream and consumer. + _, err = js.AddStream(&nats.StreamConfig{Name: "DowngradeConsumerTest", Replicas: 3}) + require_NoError(t, err) + + sjs.mu.Lock() + ccfg := &ConsumerConfig{ + Name: "DowngradeConsumerTest", + Replicas: 3, + } + rg = cc.createGroupForConsumer(ccfg, sa) + ca := &consumerAssignment{ + Config: ccfg, + Group: rg, + Stream: "DowngradeConsumerTest", + Name: "DowngradeConsumerTest", + Created: time.Now().UTC(), + Client: ci, + } + err = cc.meta.Propose(encodeAddConsumerAssignment(ca)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnConsumerLeader(globalAccountName, "DowngradeConsumerTest", "DowngradeConsumerTest") + + expectConsumerInfo := func(offline bool) { + if !offline { + c.waitOnConsumerLeader(globalAccountName, "DowngradeConsumerTest", "DowngradeConsumerTest") + } + var msg *nats.Msg + checkFor(t, 3*time.Second, 200*time.Millisecond, func() error { + msg, err = nc.Request(fmt.Sprintf(JSApiConsumerInfoT, "DowngradeConsumerTest", "DowngradeConsumerTest"), nil, 2*time.Second) + return err + }) + var ci JSApiConsumerInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &ci)) + if !offline { + require_True(t, ci.Error == nil) + } else { + require_NotNil(t, ci.Error) + require_Contains(t, ci.Error.Error(), "consumer is offline", "unsupported", "required API level") + } + } + + // Consumer is still supported, so it should be available and healthz should report healthy. + expectConsumerInfo(false) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectConsumerInfo(false) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeConsumerTest") + require_Len(t, len(wsas[0].Consumers), 1) + + // Update a consumer to be unsupported. + ccfg.Metadata = map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)} + sjs.mu.Lock() + err = cc.meta.Propose(encodeAddConsumerAssignment(ca)) + sjs.mu.Unlock() + require_NoError(t, err) + c.waitOnAllCurrent() + + // Consumer should be reported as offline, but healthz should report healthy to not block downgrades. + expectConsumerInfo(true) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + restart() + expectConsumerInfo(true) + health = ml.healthz(&HealthzOptions{}) + require_Equal(t, health.StatusCode, 200) + + wsas = getValidMetaSnapshot() + require_Len(t, len(wsas), 1) + require_Equal(t, wsas[0].Config.Name, "DowngradeConsumerTest") + require_Equal(t, wsas[0].Config.Metadata["_nats.req.level"], "0") + require_Len(t, len(wsas[0].Consumers), 1) + require_Equal(t, wsas[0].Consumers[0].Config.Metadata["_nats.req.level"], strconv.Itoa(math.MaxInt)) +} + +func TestJetStreamClusterOfflineStreamAndConsumerStrictDecoding(t *testing.T) { + s := RunBasicJetStreamServer(t) + defer s.Shutdown() + + unsupportedJson := []byte("{\"unknown\": true}") + + sa, err := decodeStreamAssignment(s, unsupportedJson) + require_NoError(t, err) + require_True(t, bytes.Equal(sa.unsupported.json, unsupportedJson)) + + ca, err := decodeConsumerAssignment(unsupportedJson) + require_NoError(t, err) + require_True(t, bytes.Equal(ca.unsupported.json, unsupportedJson)) + + var bb bytes.Buffer + s2e := s2.NewWriter(&bb) + _, err = s2e.Write(unsupportedJson) + require_NoError(t, err) + require_NoError(t, s2e.Close()) + ca, err = decodeConsumerAssignmentCompressed(bb.Bytes()) + require_NoError(t, err) + require_True(t, bytes.Equal(ca.unsupported.json, unsupportedJson)) + + var wsa writeableStreamAssignment + require_NoError(t, wsa.UnmarshalJSON(unsupportedJson)) + require_True(t, bytes.Equal(wsa.unsupportedJson, unsupportedJson)) + + var wca writeableConsumerAssignment + require_NoError(t, wca.UnmarshalJSON(unsupportedJson)) + require_True(t, bytes.Equal(wca.unsupportedJson, unsupportedJson)) +} + +func TestJetStreamClusterStreamMonitorShutdownWithoutRaftNode(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R3S", 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.randomServer()) + defer nc.Close() + + _, err := js.AddStream(&nats.StreamConfig{ + Name: "TEST", + Subjects: []string{"foo"}, + Replicas: 3, + }) + require_NoError(t, err) + checkFor(t, 2*time.Second, 200*time.Millisecond, func() error { + for _, s := range c.servers { + if !s.JetStreamIsStreamAssigned(globalAccountName, "TEST") { + return fmt.Errorf("stream not assigned on %s", s.Name()) + } + } + return nil + }) + + var nodes []RaftNode + for _, s := range c.servers { + mset, err := s.globalAccount().lookupStream("TEST") + require_NoError(t, err) + // Manually nil-out the node. This shouldn't happen normally, + // but tests we can shut down purely with the monitor goroutine quit channel. + mset.mu.Lock() + n := mset.node + mset.node = nil + mset.mu.Unlock() + require_NotNil(t, n) + nodes = append(nodes, n) + } + for _, n := range nodes { + require_NotEqual(t, n.State(), Closed) + } + + require_NoError(t, js.DeleteStream("TEST")) + checkFor(t, 2*time.Second, 200*time.Millisecond, func() error { + for _, n := range nodes { + if state := n.State(); state != Closed { + return fmt.Errorf("node not closed on %s: %s", n.ID(), state.String()) + } + } + return nil + }) +} + +func TestJetStreamClusterConsumerMonitorShutdownWithoutRaftNode(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R3S", 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.randomServer()) + defer nc.Close() + + _, err := js.AddStream(&nats.StreamConfig{ + Name: "TEST", + Subjects: []string{"foo"}, + Replicas: 3, + }) + require_NoError(t, err) + _, err = js.AddConsumer("TEST", &nats.ConsumerConfig{ + Durable: "DURABLE", + Replicas: 3, + }) + require_NoError(t, err) + checkFor(t, 2*time.Second, 200*time.Millisecond, func() error { + for _, s := range c.servers { + _, cc := s.getJetStreamCluster() + if !cc.isConsumerAssigned(s.globalAccount(), "TEST", "DURABLE") { + return fmt.Errorf("consumer not assigned on %s", s.Name()) + } + } + return nil + }) + + var nodes []RaftNode + for _, s := range c.servers { + mset, err := s.globalAccount().lookupStream("TEST") + require_NoError(t, err) + o := mset.lookupConsumer("DURABLE") + require_NotNil(t, o) + // Manually nil-out the node. This shouldn't happen normally, + // but tests we can shut down purely with the monitor goroutine quit channel. + o.mu.Lock() + n := o.node + o.node = nil + o.mu.Unlock() + require_NotNil(t, n) + nodes = append(nodes, n) + } + for _, n := range nodes { + require_NotEqual(t, n.State(), Closed) + } + + require_NoError(t, js.DeleteStream("TEST")) + checkFor(t, 2*time.Second, 200*time.Millisecond, func() error { + for _, n := range nodes { + if state := n.State(); state != Closed { + return fmt.Errorf("node not closed on %s: %s", n.ID(), state.String()) + } + } + return nil + }) +} + +func TestJetStreamClusterUnsetEmptyPlacement(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R3S", 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.randomServer()) + defer nc.Close() + + cfg := &nats.StreamConfig{ + Name: "TEST", + Subjects: []string{"foo"}, + Placement: &nats.Placement{}, + } + si, err := js.AddStream(cfg) + require_NoError(t, err) + require_True(t, si.Config.Placement == nil) + + si, err = js.UpdateStream(cfg) + require_NoError(t, err) + require_True(t, si.Config.Placement == nil) + + // Set a placement level + cfg.Placement = &nats.Placement{Cluster: "R3S"} + si, err = js.UpdateStream(cfg) + require_NoError(t, err) + require_True(t, si.Config.Placement != nil) + require_Equal(t, si.Config.Placement.Cluster, "R3S") + + // And ensure it can be reset. + cfg.Placement = &nats.Placement{} + si, err = js.UpdateStream(cfg) + require_NoError(t, err) + require_True(t, si.Config.Placement == nil) +} + // // DO NOT ADD NEW TESTS IN THIS FILE (unless to balance test times) // Add at the end of jetstream_cluster__test.go, with being the highest value. diff --git a/server/jetstream_cluster_4_test.go b/server/jetstream_cluster_4_test.go index d8ad36b6962..67be2dfc11b 100644 --- a/server/jetstream_cluster_4_test.go +++ b/server/jetstream_cluster_4_test.go @@ -34,6 +34,7 @@ import ( "testing" "time" + "github.com/nats-io/jwt/v2" "github.com/nats-io/nats.go" "github.com/nats-io/nuid" ) @@ -6458,3 +6459,367 @@ func TestJetStreamClusterSDMMaxAgeProposeExpiryShortRetry(t *testing.T) { }) } } + +func TestJetStreamClusterInvalidR1Config(t *testing.T) { + c := createJetStreamClusterExplicit(t, "R1TEST", 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.servers[0]) + defer nc.Close() + + nc2, js2 := jsClientConnect(t, c.servers[2]) + defer nc2.Close() + + createStreams := func(t *testing.T, js nats.JetStreamContext, n, replicas int) { + for i := 0; i < n; i++ { + sname := fmt.Sprintf("S:%d", i) + js.AddStream(&nats.StreamConfig{ + Name: sname, + MaxMsgsPerSubject: 5, + Replicas: replicas, + Subjects: []string{fmt.Sprintf("A.%d.>", i)}, + }) + time.Sleep(10 * time.Millisecond) + js.AddConsumer(sname, &nats.ConsumerConfig{ + Name: sname, + Durable: sname, + FilterSubject: ">", + }) + js.Publish(fmt.Sprintf("A.%d.foo", i), []byte("one")) + } + } + + // Create 5 streams in parallel with different configs, then + // check whether one of them is in an undefined state. + totalStreams := 5 + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + createStreams(t, js, totalStreams, 1) + }() + + wg.Add(1) + go func() { + defer wg.Done() + createStreams(t, js2, totalStreams, 2) + }() + wg.Wait() + + for i := 0; i < totalStreams; i++ { + ci, err := js.StreamInfo(fmt.Sprintf("S:%d", i)) + require_NoError(t, err) + + // Make sure that consumer scale up when peers are missing responds with error. + if ci.Config.Replicas == 2 { + // Starting with a single replica should still be valid. + js.AddConsumer(ci.Config.Name, &nats.ConsumerConfig{ + Name: "test", + Replicas: 1, + }) + _, err = js.UpdateConsumer(ci.Config.Name, &nats.ConsumerConfig{ + Name: "test", + Replicas: 2, + }) + if err != nil { + require_Equal(t, err.Error(), "nats: consumer config replica count exceeds parent stream") + } + } + } +} + +func TestJetStreamClusterMultiLeaderR3Config(t *testing.T) { + conf := ` + listen: 127.0.0.1:-1 + server_name: %s + jetstream: { + store_dir: '%s', + } + cluster { + name: %s + listen: 127.0.0.1:%d + routes = [%s] + } + server_tags: ["test"] + system_account: sys + no_auth_user: js + accounts { + sys { users = [ { user: sys, pass: sys } ] } + js { + jetstream = enabled + users = [ { user: js, pass: js } ] + } + }` + c := createJetStreamClusterWithTemplate(t, conf, "R3TEST", 3) + defer c.shutdown() + + nc, js := jsClientConnect(t, c.servers[0]) + defer nc.Close() + + nc2, js2 := jsClientConnect(t, c.servers[2]) + defer nc2.Close() + + createStreams := func(t *testing.T, js nats.JetStreamContext, n, replicas int) { + for i := 0; i < n; i++ { + sname := fmt.Sprintf("S:%d", i) + js.AddStream(&nats.StreamConfig{ + Name: sname, + MaxMsgsPerSubject: 5, + Replicas: replicas, + Subjects: []string{fmt.Sprintf("A.%d.>", i)}, + }) + time.Sleep(10 * time.Millisecond) + js.AddConsumer(sname, &nats.ConsumerConfig{ + Name: sname, + Durable: sname, + FilterSubject: ">", + }) + js.Publish(fmt.Sprintf("A.%d.foo", i), []byte("one")) + } + } + + // Create streams in parallel with different configs, then + // check whether one of them is in an undefined state. + totalStreams := 5 + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + createStreams(t, js, totalStreams, 3) + }() + + wg.Add(1) + go func() { + defer wg.Done() + createStreams(t, js2, totalStreams, 1) + }() + wg.Wait() + + checkMultiLeader := func(accountName, streamName string) error { + leaders := make(map[string]bool) + for _, srv := range c.servers { + jsz, err := srv.Jsz(&JSzOptions{Accounts: true, Streams: true, Consumer: true}) + if err != nil { + return err + } + for _, acc := range jsz.AccountDetails { + if acc.Name == accountName { + for _, stream := range acc.Streams { + if stream.Name == streamName { + leaders[stream.Cluster.Leader] = true + } + } + } + } + } + if len(leaders) > 1 { + return fmt.Errorf("There are multiple leaders on %s stream: %+v", streamName, leaders) + } + return nil + } + + var invalidStream string + for i := 0; i < totalStreams; i++ { + ci, err := js.StreamInfo(fmt.Sprintf("S:%d", i)) + require_NoError(t, err) + if ci.Config.Replicas == 1 { + if ci.Cluster != nil { + peers := ci.Cluster.Replicas + if len(peers) > 1 { + invalidStream = ci.Config.Name + t.Errorf("Unexpected stream config drift, 1 replica expected but found %v peers", len(peers)) + } + } + } + } + if len(invalidStream) > 0 { + _, err := js.StreamInfo(invalidStream) + require_NoError(t, err) + + // Restart server where first client is connected and almost all R1 replicas landed. + srv := c.servers[0] + srv.Shutdown() + srv.WaitForShutdown() + time.Sleep(2 * time.Second) + c.restartServer(srv) + c.waitOnClusterReady() + checkFor(t, 30*time.Second, 200*time.Millisecond, func() error { + return checkMultiLeader("js", invalidStream) + }) + } +} + +func TestJetStreamClusterAccountMaxConnectionsReconnect(t *testing.T) { + conf := ` + listen: 127.0.0.1:-1 + http: -1 + server_name: %s + jetstream: { + store_dir: '%s', + } + cluster { + name: %s + listen: 127.0.0.1:%d + routes = [%s] + } + server_tags: ["test"] + system_account: sys + no_auth_user: js + accounts { + sys { users = [ { user: sys, pass: sys } ] } + js { + jetstream = enabled + users = [ { user: js, pass: js } ] + limits { + max_connections: 5 + } + } + } + ` + c := createJetStreamClusterWithTemplate(t, conf, "R3CONNECT", 3) + defer c.shutdown() + var conns []*nats.Conn + + disconnects := make([]chan error, 0) + for i := 1; i <= 5; i++ { + disconnectCh := make(chan error) + c, _ := jsClientConnect(t, c.servers[0], nats.UserInfo("js", "js"), nats.DisconnectErrHandler(func(_ *nats.Conn, err error) { + disconnectCh <- err + })) + defer c.Close() + conns = append(conns, c) + disconnects = append(disconnects, disconnectCh) + // Small delay to ensure distinct start times. + time.Sleep(10 * time.Millisecond) + } + s := c.servers[0] + acc, err := s.lookupAccount("js") + require_NoError(t, err) + + acc.mu.RLock() + clients := acc.getClientsLocked() + numConnections := acc.NumConnections() + jsClients := acc.sysclients + totalClients := len(clients) + acc.mu.RUnlock() + + require_Equal(t, numConnections, 5) + require_Equal(t, jsClients, 0) + require_Equal(t, totalClients, 5) + + nc := conns[0] + js, _ := nc.JetStream() + for i := 0; i < 10; i++ { + _, err := js.AddStream(&nats.StreamConfig{ + Name: fmt.Sprintf("foo:%d", i), + Subjects: []string{fmt.Sprintf("foo.%d", i)}, + }) + require_NoError(t, err) + + _, err = js.Publish(fmt.Sprintf("foo.%d", i), []byte("hello"), nats.AckWait(5*time.Second)) + require_NoError(t, err) + } + + acc.mu.RLock() + clients = acc.getClientsLocked() + numConnections = acc.NumConnections() + jsClients = acc.sysclients + totalClients = len(clients) + acc.mu.RUnlock() + + require_Equal(t, numConnections, 5) + require_Equal(t, jsClients, 20) + require_Equal(t, totalClients, 25) + + checkFor(t, 30*time.Second, 200*time.Millisecond, func() error { + for i := 0; i < 10; i++ { + _, err := js.Publish(fmt.Sprintf("foo.%d", i), []byte("hello"), nats.AckWait(5*time.Second)) + if err != nil { + return err + } + } + return nil + }) + + // Force account update to trigger connection limit enforcement. + accClaims := jwt.NewAccountClaims(acc.Name) + accClaims.Limits.Conn = 1 + accClaims.Limits.MemoryStorage = -1 + accClaims.Limits.DiskStorage = -1 + accClaims.Limits.Streams = -1 + accClaims.Limits.Consumer = -1 + + // Update server, before this would have disconnected JS internal clients with + // 'JETSTREAM - maximum account active connections exceeded'. + s.UpdateAccountClaims(acc, accClaims) + + // Allow some time for enforcement. + time.Sleep(100 * time.Millisecond) + + acc, err = s.lookupAccount("js") + require_NoError(t, err) + + acc.mu.RLock() + clients = acc.getClientsLocked() + numConnections = acc.NumConnections() + jsClients = acc.sysclients + totalClients = len(clients) + acc.mu.RUnlock() + + // JETSTREAM internal clients should still linger after reducing connections. + require_Equal(t, numConnections, 5) + require_Equal(t, jsClients, 20) + require_Equal(t, totalClients, 20) + + // Wait for disconnections from the most recent client. + disconnectCh := disconnects[2] + select { + case <-disconnectCh: + case <-time.After(2 * time.Second): + t.Fatal("Expected newest connection to disconnect!") + } + + checkFor(t, 30*time.Second, 200*time.Millisecond, func() error { + activeConnections := 0 + for _, conn := range conns { + if !conn.IsClosed() { + activeConnections++ + } + } + if activeConnections < 5 { + return fmt.Errorf("Unexpected number of connections: %d", activeConnections) + } + return nil + }) + + // Force account update to trigger connection limit enforcement. + accClaims = jwt.NewAccountClaims(acc.Name) + accClaims.Limits.Conn = 10 + accClaims.Limits.MemoryStorage = -1 + accClaims.Limits.DiskStorage = -1 + accClaims.Limits.Streams = -1 + accClaims.Limits.Consumer = -1 + + // Update all servers then confirm that internal JS clients should work + // and clients have reconnected. + for _, s := range c.servers { + acc, err := s.lookupAccount("js") + require_NoError(t, err) + s.UpdateAccountClaims(acc, accClaims) + } + checkFor(t, 30*time.Second, 200*time.Millisecond, func() error { + for _, nc := range conns { + js, _ := nc.JetStream() + for i := 0; i < 10; i++ { + stream := fmt.Sprintf("foo.%d", i) + _, err := js.Publish(stream, []byte("hello"), nats.AckWait(5*time.Second)) + if err != nil { + return err + } + } + } + return nil + }) +} diff --git a/server/jetstream_consumer_test.go b/server/jetstream_consumer_test.go index 7ce6ff23288..e90f7664023 100644 --- a/server/jetstream_consumer_test.go +++ b/server/jetstream_consumer_test.go @@ -9867,3 +9867,47 @@ func TestJetStreamConsumerNotInactiveDuringAckWaitBackoff(t *testing.T) { t.Run("R1", func(t *testing.T) { test(t, 1) }) t.Run("R3", func(t *testing.T) { test(t, 3) }) } + +func TestJetStreamConsumerMaxDeliverUnderflow(t *testing.T) { + s := RunBasicJetStreamServer(t) + defer s.Shutdown() + + nc, js := jsClientConnect(t, s) + defer nc.Close() + + _, err := js.AddStream(&nats.StreamConfig{Name: "TEST", Subjects: []string{"foo"}}) + require_NoError(t, err) + + cfg := &nats.ConsumerConfig{Durable: "CONSUMER", MaxDeliver: -1} + _, err = js.AddConsumer("TEST", cfg) + require_NoError(t, err) + + mset, err := s.globalAccount().lookupStream("TEST") + require_NoError(t, err) + o := mset.lookupConsumer("CONSUMER") + require_NotNil(t, o) + + // Infinite MaxDeliver should be zero. + o.mu.RLock() + maxdc := o.maxdc + o.mu.RUnlock() + require_Equal(t, maxdc, 0) + + // Finite MaxDeliver should be reported the same. + cfg.MaxDeliver = 1 + _, err = js.UpdateConsumer("TEST", cfg) + require_NoError(t, err) + o.mu.RLock() + maxdc = o.maxdc + o.mu.RUnlock() + require_Equal(t, maxdc, 1) + + // Infinite MaxDeliver should be zero. + cfg.MaxDeliver = -1 + _, err = js.UpdateConsumer("TEST", cfg) + require_NoError(t, err) + o.mu.RLock() + maxdc = o.maxdc + o.mu.RUnlock() + require_Equal(t, maxdc, 0) +} diff --git a/server/jetstream_errors_generated.go b/server/jetstream_errors_generated.go index 162ca4f0273..5cebda1f3ca 100644 --- a/server/jetstream_errors_generated.go +++ b/server/jetstream_errors_generated.go @@ -176,6 +176,9 @@ const ( // JSConsumerOfflineErr consumer is offline JSConsumerOfflineErr ErrorIdentifier = 10119 + // JSConsumerOfflineReasonErrF consumer is offline: {err} + JSConsumerOfflineReasonErrF ErrorIdentifier = 10195 + // JSConsumerOnMappedErr consumer direct on a mapped consumer JSConsumerOnMappedErr ErrorIdentifier = 10092 @@ -440,6 +443,9 @@ const ( // JSStreamOfflineErr stream is offline JSStreamOfflineErr ErrorIdentifier = 10118 + // JSStreamOfflineReasonErrF stream is offline: {err} + JSStreamOfflineReasonErrF ErrorIdentifier = 10194 + // JSStreamPurgeFailedF Generic stream purge failure error string ({err}) JSStreamPurgeFailedF ErrorIdentifier = 10110 @@ -566,6 +572,7 @@ var ( JSConsumerNameTooLongErrF: {Code: 400, ErrCode: 10102, Description: "consumer name is too long, maximum allowed is {max}"}, JSConsumerNotFoundErr: {Code: 404, ErrCode: 10014, Description: "consumer not found"}, JSConsumerOfflineErr: {Code: 500, ErrCode: 10119, Description: "consumer is offline"}, + JSConsumerOfflineReasonErrF: {Code: 500, ErrCode: 10195, Description: "consumer is offline: {err}"}, JSConsumerOnMappedErr: {Code: 400, ErrCode: 10092, Description: "consumer direct on a mapped consumer"}, JSConsumerOverlappingSubjectFilters: {Code: 400, ErrCode: 10138, Description: "consumer subject filters cannot overlap"}, JSConsumerPriorityPolicyWithoutGroup: {Code: 400, ErrCode: 10159, Description: "Setting PriorityPolicy requires at least one PriorityGroup to be set"}, @@ -654,6 +661,7 @@ var ( JSStreamNotFoundErr: {Code: 404, ErrCode: 10059, Description: "stream not found"}, JSStreamNotMatchErr: {Code: 400, ErrCode: 10060, Description: "expected stream does not match"}, JSStreamOfflineErr: {Code: 500, ErrCode: 10118, Description: "stream is offline"}, + JSStreamOfflineReasonErrF: {Code: 500, ErrCode: 10194, Description: "stream is offline: {err}"}, JSStreamPurgeFailedF: {Code: 500, ErrCode: 10110, Description: "{err}"}, JSStreamReplicasNotSupportedErr: {Code: 500, ErrCode: 10074, Description: "replicas > 1 not supported in non-clustered mode"}, JSStreamReplicasNotUpdatableErr: {Code: 400, ErrCode: 10061, Description: "Replicas configuration can not be updated"}, @@ -1331,6 +1339,22 @@ func NewJSConsumerOfflineError(opts ...ErrorOption) *ApiError { return ApiErrors[JSConsumerOfflineErr] } +// NewJSConsumerOfflineReasonError creates a new JSConsumerOfflineReasonErrF error: "consumer is offline: {err}" +func NewJSConsumerOfflineReasonError(err error, opts ...ErrorOption) *ApiError { + eopts := parseOpts(opts) + if ae, ok := eopts.err.(*ApiError); ok { + return ae + } + + e := ApiErrors[JSConsumerOfflineReasonErrF] + args := e.toReplacerArgs([]interface{}{"{err}", err}) + return &ApiError{ + Code: e.Code, + ErrCode: e.ErrCode, + Description: strings.NewReplacer(args...).Replace(e.Description), + } +} + // NewJSConsumerOnMappedError creates a new JSConsumerOnMappedErr error: "consumer direct on a mapped consumer" func NewJSConsumerOnMappedError(opts ...ErrorOption) *ApiError { eopts := parseOpts(opts) @@ -2349,6 +2373,22 @@ func NewJSStreamOfflineError(opts ...ErrorOption) *ApiError { return ApiErrors[JSStreamOfflineErr] } +// NewJSStreamOfflineReasonError creates a new JSStreamOfflineReasonErrF error: "stream is offline: {err}" +func NewJSStreamOfflineReasonError(err error, opts ...ErrorOption) *ApiError { + eopts := parseOpts(opts) + if ae, ok := eopts.err.(*ApiError); ok { + return ae + } + + e := ApiErrors[JSStreamOfflineReasonErrF] + args := e.toReplacerArgs([]interface{}{"{err}", err}) + return &ApiError{ + Code: e.Code, + ErrCode: e.ErrCode, + Description: strings.NewReplacer(args...).Replace(e.Description), + } +} + // NewJSStreamPurgeFailedError creates a new JSStreamPurgeFailedF error: "{err}" func NewJSStreamPurgeFailedError(err error, opts ...ErrorOption) *ApiError { eopts := parseOpts(opts) diff --git a/server/jetstream_leafnode_test.go b/server/jetstream_leafnode_test.go index 0c796cbba63..a543820b5e5 100644 --- a/server/jetstream_leafnode_test.go +++ b/server/jetstream_leafnode_test.go @@ -16,6 +16,7 @@ package server import ( + "errors" "fmt" "os" "strings" @@ -1619,7 +1620,7 @@ func TestJetStreamLeafNodeAndMirrorResyncAfterConnectionDown(t *testing.T) { // Reset sourceInfo to have lots of failures and last attempt 2 minutes ago. // Lock should be held on parent stream. resetSourceInfo := func(si *sourceInfo) { - si.sip = false + // Do not reset sip here to make sure that the internal logic clears. si.fails = 100 si.lreq = time.Now().Add(-2 * time.Minute) } @@ -1668,7 +1669,212 @@ func TestJetStreamLeafNodeAndMirrorResyncAfterConnectionDown(t *testing.T) { err = checkStreamMsgs(jsB, "SRC-B", initMsgs*4, err) return err }) - if elapsed := time.Since(start); elapsed > 2*time.Second { - t.Fatalf("Expected to resync all streams <2s but got %v", elapsed) + if elapsed := time.Since(start); elapsed > 3*time.Second { + t.Fatalf("Expected to resync all streams <3s but got %v", elapsed) } } + +// This test will test a 3 node setup where we have a hub node, a gateway node, and a satellite node. +// This is specifically testing re-sync when there is not a direct Domain with JS match for the first +// hop connect LN that is signaling. +// +// HUB <---- GW(+JS/DOMAIN) -----> SAT1 +// ^ +// | +// +------- GW(-JS/NO DOMAIN) --> SAT2 +// +// The Gateway node will solicit the satellites but will act as a LN hub. +func TestJetStreamLeafNodeAndMirrorResyncAfterLeafEstablished(t *testing.T) { + accs := ` + accounts { + JS { users = [ { user: "u", pass: "p" } ]; jetstream: true } + $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } + } + ` + hubT := ` + listen: -1 + server_name: hub + jetstream { store_dir: '%s', domain: HUB } + %s + leaf { port: -1 } + ` + confA := createConfFile(t, []byte(fmt.Sprintf(hubT, t.TempDir(), accs))) + sHub, oHub := RunServerWithConfig(confA) + defer sHub.Shutdown() + + // We run the SAT node second to extract out info for solicitation from targeted GW. + sat1T := ` + listen: -1 + server_name: sat1 + jetstream { store_dir: '%s', domain: SAT1 } + %s + leaf { port: -1 } + ` + confB := createConfFile(t, []byte(fmt.Sprintf(sat1T, t.TempDir(), accs))) + sSat1, oSat1 := RunServerWithConfig(confB) + defer sSat1.Shutdown() + + sat2T := ` + listen: -1 + server_name: sat2 + jetstream { store_dir: '%s', domain: SAT2 } + %s + leaf { port: -1 } + ` + confC := createConfFile(t, []byte(fmt.Sprintf(sat2T, t.TempDir(), accs))) + sSat2, oSat2 := RunServerWithConfig(confC) + defer sSat2.Shutdown() + + hubLeafPort := fmt.Sprintf("nats://u:p@127.0.0.1:%d", oHub.LeafNode.Port) + sat1LeafPort := fmt.Sprintf("nats://u:p@127.0.0.1:%d", oSat1.LeafNode.Port) + sat2LeafPort := fmt.Sprintf("nats://u:p@127.0.0.1:%d", oSat2.LeafNode.Port) + + gw1T := ` + listen: -1 + server_name: gw1 + jetstream { store_dir: '%s', domain: GW } + %s + leaf { remotes [ { url: %s, account: "JS" }, { url: %s, account: "JS", hub: true } ], reconnect: "0.25s" } + ` + confD := createConfFile(t, []byte(fmt.Sprintf(gw1T, t.TempDir(), accs, hubLeafPort, sat1LeafPort))) + sGW1, _ := RunServerWithConfig(confD) + defer sGW1.Shutdown() + + gw2T := ` + listen: -1 + server_name: gw2 + accounts { + JS { users = [ { user: "u", pass: "p" } ] } + $SYS { users = [ { user: "admin", pass: "s3cr3t!" } ] } + } + leaf { remotes [ { url: %s, account: "JS" }, { url: %s, account: "JS", hub: true } ], reconnect: "0.25s" } + ` + confE := createConfFile(t, []byte(fmt.Sprintf(gw2T, hubLeafPort, sat2LeafPort))) + sGW2, _ := RunServerWithConfig(confE) + defer sGW2.Shutdown() + + // Make sure we are connected ok. + checkLeafNodeConnectedCount(t, sHub, 2) + checkLeafNodeConnectedCount(t, sSat1, 1) + checkLeafNodeConnectedCount(t, sSat2, 1) + checkLeafNodeConnectedCount(t, sGW1, 2) + checkLeafNodeConnectedCount(t, sGW2, 2) + + // Let's place a muxed stream on the hub and have it source from a stream on the Satellite. + // Connect to Hub. + ncHub, jsHub := jsClientConnect(t, sHub, nats.UserInfo("u", "p")) + defer ncHub.Close() + + _, err := jsHub.AddStream(&nats.StreamConfig{Name: "HUB", Subjects: []string{"H.>"}}) + require_NoError(t, err) + + // Connect to Sat1. + ncSat1, jsSat1 := jsClientConnect(t, sSat1, nats.UserInfo("u", "p")) + defer ncSat1.Close() + + _, err = jsSat1.AddStream(&nats.StreamConfig{ + Name: "SAT-1", + Subjects: []string{"S1.*"}, + Sources: []*nats.StreamSource{{ + Name: "HUB", + FilterSubject: "H.SAT-1.>", + External: &nats.ExternalStream{APIPrefix: "$JS.HUB.API"}, + }}, + }) + require_NoError(t, err) + + // Connect to Sat2. + ncSat2, jsSat2 := jsClientConnect(t, sSat2, nats.UserInfo("u", "p")) + defer ncSat2.Close() + + _, err = jsSat2.AddStream(&nats.StreamConfig{ + Name: "SAT-2", + Subjects: []string{"S2.*"}, + Sources: []*nats.StreamSource{{ + Name: "HUB", + FilterSubject: "H.SAT-2.>", + External: &nats.ExternalStream{APIPrefix: "$JS.HUB.API"}, + }}, + }) + require_NoError(t, err) + + // Put in 10 msgs each in for each satellite. + for i := 0; i < 10; i++ { + jsHub.Publish("H.SAT-1.foo", []byte("CMD")) + jsHub.Publish("H.SAT-2.foo", []byte("CMD")) + } + // Make sure both are sync'd. + checkFor(t, time.Second, 100*time.Millisecond, func() error { + si, err := jsSat1.StreamInfo("SAT-1") + require_NoError(t, err) + if si.State.Msgs != 10 { + return errors.New("SAT-1 Not sync'd yet") + } + si, err = jsSat2.StreamInfo("SAT-2") + require_NoError(t, err) + if si.State.Msgs != 10 { + return errors.New("SAT-2 Not sync'd yet") + } + return nil + }) + + testReconnect := func(t *testing.T, delay time.Duration, expected uint64) { + // Now disconnect Sat1 and Sat2. In 2.12 we can do this with active: false, but since this will be + // pulled into 2.11.9 just shutdown both gateways. + sGW1.Shutdown() + checkLeafNodeConnectedCount(t, sSat1, 0) + checkLeafNodeConnectedCount(t, sHub, 1) + + sGW2.Shutdown() + checkLeafNodeConnectedCount(t, sSat2, 0) + checkLeafNodeConnectedCount(t, sHub, 0) + + // Send 10 more messages for each while GW1 and GW2 are down. + for i := 0; i < 10; i++ { + jsHub.Publish("H.SAT-1.foo", []byte("CMD")) + jsHub.Publish("H.SAT-2.foo", []byte("CMD")) + } + + // Keep GWs down for delay. + time.Sleep(delay) + + sGW1, _ = RunServerWithConfig(confD) + // Make sure we are connected ok. + checkLeafNodeConnectedCount(t, sHub, 1) + checkLeafNodeConnectedCount(t, sSat1, 1) + checkLeafNodeConnectedCount(t, sGW1, 2) + + sGW2, _ = RunServerWithConfig(confE) + // Make sure we are connected ok. + checkLeafNodeConnectedCount(t, sHub, 2) + checkLeafNodeConnectedCount(t, sSat2, 1) + checkLeafNodeConnectedCount(t, sGW2, 2) + + // Make sure sync'd in less than a second or two. + checkFor(t, 2*time.Second, 100*time.Millisecond, func() error { + si, err := jsSat1.StreamInfo("SAT-1") + require_NoError(t, err) + if si.State.Msgs != expected { + return fmt.Errorf("SAT-1 not sync'd, expected %d got %d", expected, si.State.Msgs) + } + si, err = jsSat2.StreamInfo("SAT-2") + require_NoError(t, err) + if si.State.Msgs != expected { + return fmt.Errorf("SAT-2 not sync'd, expected %d got %d", expected, si.State.Msgs) + } + return nil + }) + } + + // We will test two scenarios with amount of time the GWs (link) is down. + // 1. Just a second, we will not have detected the consumer is offline as of yet. + // 2. Just over sourceHealthCheckInterval, meaning we detect it is down and schedule for another try. + t.Run(fmt.Sprintf("reconnect-%v", time.Second), func(t *testing.T) { + testReconnect(t, time.Second, 20) + }) + t.Run(fmt.Sprintf("reconnect-%v", sourceHealthCheckInterval+time.Second), func(t *testing.T) { + testReconnect(t, sourceHealthCheckInterval+time.Second, 30) + }) + defer sGW1.Shutdown() + defer sGW2.Shutdown() +} diff --git a/server/jetstream_test.go b/server/jetstream_test.go index 4b2f6076d71..940386efccc 100644 --- a/server/jetstream_test.go +++ b/server/jetstream_test.go @@ -18489,7 +18489,7 @@ func TestJetStreamDelayedAPIResponses(t *testing.T) { acc := s.GlobalAccount() - // Send B, A, D, C and exected to receive A, B, C, D + // Send B, A, D, C and expected to receive A, B, C, D s.sendDelayedAPIErrResponse(nil, acc, "B", _EMPTY_, "request2", "response2", nil, 500*time.Millisecond) time.Sleep(50 * time.Millisecond) s.sendDelayedAPIErrResponse(nil, acc, "A", _EMPTY_, "request1", "response1", nil, 200*time.Millisecond) @@ -20605,3 +20605,112 @@ func TestJetStreamKVNoSubjectDeleteMarkerOnPurgeMarker(t *testing.T) { }) } } + +func TestJetStreamOfflineStreamAndConsumerAfterDowngrade(t *testing.T) { + s := RunBasicJetStreamServer(t) + defer s.Shutdown() + port := s.getOpts().Port + sd := s.JetStreamConfig().StoreDir + + _, err := s.globalAccount().addStream(&StreamConfig{ + Name: "DowngradeStreamTest", + Storage: FileStorage, + Replicas: 1, + Metadata: map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)}, + }) + require_NoError(t, err) + + s.Shutdown() + s = RunJetStreamServerOnPort(port, sd) + defer s.Shutdown() + + nc := clientConnectToServer(t, s) + defer nc.Close() + + offlineReason := fmt.Sprintf("unsupported - required API level: %d, current API level: %d", math.MaxInt, JSApiLevel) + msg, err := nc.Request(fmt.Sprintf(JSApiStreamInfoT, "DowngradeStreamTest"), nil, time.Second) + require_NoError(t, err) + var si JSApiStreamInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &si)) + require_NotNil(t, si.Error) + require_Error(t, si.Error, NewJSStreamOfflineReasonError(errors.New(offlineReason))) + + var sn JSApiStreamNamesResponse + msg, err = nc.Request(JSApiStreams, nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &sn)) + require_Len(t, len(sn.Streams), 1) + require_Equal(t, sn.Streams[0], "DowngradeStreamTest") + + var sl JSApiStreamListResponse + msg, err = nc.Request(JSApiStreamList, nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &sl)) + require_Len(t, len(sl.Streams), 0) + require_Len(t, len(sl.Missing), 1) + require_Equal(t, sl.Missing[0], "DowngradeStreamTest") + require_Len(t, len(sl.Offline), 1) + require_Equal(t, sl.Offline["DowngradeStreamTest"], offlineReason) + + mset, err := s.globalAccount().lookupStream("DowngradeStreamTest") + require_NoError(t, err) + require_True(t, mset.closed.Load()) + require_Equal(t, mset.offlineReason, offlineReason) + require_NoError(t, mset.delete()) + + s.Shutdown() + s = RunJetStreamServerOnPort(port, sd) + defer s.Shutdown() + + _, err = s.globalAccount().addStream(&StreamConfig{ + Name: "DowngradeConsumerTest", + Storage: FileStorage, + Replicas: 1, + }) + require_NoError(t, err) + mset, err = s.globalAccount().lookupStream("DowngradeConsumerTest") + require_NoError(t, err) + _, err = mset.addConsumer(&ConsumerConfig{ + Name: "DowngradeConsumerTest", + Metadata: map[string]string{"_nats.req.level": strconv.Itoa(math.MaxInt)}, + }) + require_NoError(t, err) + + s.Shutdown() + s = RunJetStreamServerOnPort(port, sd) + defer s.Shutdown() + + mset, err = s.globalAccount().lookupStream("DowngradeConsumerTest") + require_NoError(t, err) + require_True(t, mset.closed.Load()) + require_Equal(t, mset.offlineReason, "stopped") + + obs := mset.getPublicConsumers() + require_Len(t, len(obs), 1) + require_True(t, obs[0].isClosed()) + require_Equal(t, obs[0].offlineReason, offlineReason) + + msg, err = nc.Request(fmt.Sprintf(JSApiConsumerInfoT, "DowngradeConsumerTest", "DowngradeConsumerTest"), nil, time.Second) + require_NoError(t, err) + var ci JSApiConsumerInfoResponse + require_NoError(t, json.Unmarshal(msg.Data, &ci)) + require_NotNil(t, ci.Error) + require_Error(t, ci.Error, NewJSConsumerOfflineReasonError(errors.New(offlineReason))) + + var cn JSApiConsumerNamesResponse + msg, err = nc.Request(fmt.Sprintf(JSApiConsumersT, "DowngradeConsumerTest"), nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &cn)) + require_Len(t, len(cn.Consumers), 1) + require_Equal(t, cn.Consumers[0], "DowngradeConsumerTest") + + var cl JSApiConsumerListResponse + msg, err = nc.Request(fmt.Sprintf(JSApiConsumerListT, "DowngradeConsumerTest"), nil, time.Second) + require_NoError(t, err) + require_NoError(t, json.Unmarshal(msg.Data, &cl)) + require_Len(t, len(cl.Consumers), 0) + require_Len(t, len(cl.Missing), 1) + require_Equal(t, cl.Missing[0], "DowngradeConsumerTest") + require_Len(t, len(cl.Offline), 1) + require_Equal(t, cl.Offline["DowngradeConsumerTest"], offlineReason) +} diff --git a/server/jetstream_versioning.go b/server/jetstream_versioning.go index 1192968b88f..41ecf04c7d4 100644 --- a/server/jetstream_versioning.go +++ b/server/jetstream_versioning.go @@ -24,6 +24,23 @@ const ( JSServerLevelMetadataKey = "_nats.level" ) +// getRequiredApiLevel returns the required API level for the JetStream asset. +func getRequiredApiLevel(metadata map[string]string) string { + if l, ok := metadata[JSRequiredLevelMetadataKey]; ok && l != _EMPTY_ { + return l + } + return _EMPTY_ +} + +// supportsRequiredApiLevel returns whether the required API level for the JetStream asset is supported. +func supportsRequiredApiLevel(metadata map[string]string) bool { + if l := getRequiredApiLevel(metadata); l != _EMPTY_ { + li, err := strconv.Atoi(l) + return err == nil && li <= JSApiLevel + } + return true +} + // setStaticStreamMetadata sets JetStream stream metadata, like the server version and API level. // Any dynamic metadata is removed, it must not be stored and only be added for responses. func setStaticStreamMetadata(cfg *StreamConfig) { @@ -50,10 +67,15 @@ func setStaticStreamMetadata(cfg *StreamConfig) { // setDynamicStreamMetadata adds dynamic fields into the (copied) metadata. func setDynamicStreamMetadata(cfg *StreamConfig) *StreamConfig { - newCfg := *cfg + var newCfg StreamConfig + if cfg != nil { + newCfg = *cfg + } newCfg.Metadata = make(map[string]string) - for key, value := range cfg.Metadata { - newCfg.Metadata[key] = value + if cfg != nil { + for key, value := range cfg.Metadata { + newCfg.Metadata[key] = value + } } newCfg.Metadata[JSServerVersionMetadataKey] = VERSION newCfg.Metadata[JSServerLevelMetadataKey] = strconv.Itoa(JSApiLevel) @@ -121,10 +143,15 @@ func setStaticConsumerMetadata(cfg *ConsumerConfig) { // setDynamicConsumerMetadata adds dynamic fields into the (copied) metadata. func setDynamicConsumerMetadata(cfg *ConsumerConfig) *ConsumerConfig { - newCfg := *cfg + var newCfg ConsumerConfig + if cfg != nil { + newCfg = *cfg + } newCfg.Metadata = make(map[string]string) - for key, value := range cfg.Metadata { - newCfg.Metadata[key] = value + if cfg != nil { + for key, value := range cfg.Metadata { + newCfg.Metadata[key] = value + } } newCfg.Metadata[JSServerVersionMetadataKey] = VERSION newCfg.Metadata[JSServerLevelMetadataKey] = strconv.Itoa(JSApiLevel) diff --git a/server/jetstream_versioning_test.go b/server/jetstream_versioning_test.go index 6dcd73b4fb5..996ede4a27c 100644 --- a/server/jetstream_versioning_test.go +++ b/server/jetstream_versioning_test.go @@ -29,6 +29,19 @@ import ( "github.com/nats-io/nats.go" ) +func TestGetAndSupportsRequiredApiLevel(t *testing.T) { + require_Equal(t, getRequiredApiLevel(nil), _EMPTY_) + require_Equal(t, getRequiredApiLevel(map[string]string{}), _EMPTY_) + require_Equal(t, getRequiredApiLevel(map[string]string{JSRequiredLevelMetadataKey: "1"}), "1") + require_Equal(t, getRequiredApiLevel(map[string]string{JSRequiredLevelMetadataKey: "text"}), "text") + + require_True(t, supportsRequiredApiLevel(nil)) + require_True(t, supportsRequiredApiLevel(map[string]string{})) + require_True(t, supportsRequiredApiLevel(map[string]string{JSRequiredLevelMetadataKey: "1"})) + require_True(t, supportsRequiredApiLevel(map[string]string{JSRequiredLevelMetadataKey: strconv.Itoa(JSApiLevel)})) + require_False(t, supportsRequiredApiLevel(map[string]string{JSRequiredLevelMetadataKey: "text"})) +} + func metadataAtLevel(featureLevel string) map[string]string { return map[string]string{ JSRequiredLevelMetadataKey: featureLevel, diff --git a/server/jwt.go b/server/jwt.go index 04d7dc60a3e..82d65d90d56 100644 --- a/server/jwt.go +++ b/server/jwt.go @@ -80,7 +80,11 @@ func validateTrustedOperators(o *Options) error { if err != nil { return fmt.Errorf("default sentinel JWT not valid") } - if !juc.BearerToken { + + if !juc.BearerToken && juc.IssuerAccount != "" && juc.HasEmptyPermissions() { + // we cannot resolve the account yet - but this looks like a scoped user + // it will be rejected at runtime if not valid + } else if !juc.BearerToken { return fmt.Errorf("default sentinel must be a bearer token") } } diff --git a/server/jwt_test.go b/server/jwt_test.go index 92e3450c33d..94d9dedbf6e 100644 --- a/server/jwt_test.go +++ b/server/jwt_test.go @@ -7127,6 +7127,24 @@ func TestDefaultSentinelUser(t *testing.T) { require_NoError(t, err) aKP, aPub, aAC := NewJwtAccountClaim("A") + aScopedKP, err := nkeys.CreateAccount() + require_NoError(t, err) + aScopedPK, err := aScopedKP.PublicKey() + require_NoError(t, err) + + sentinelScope := jwt.NewUserScope() + sentinelScope.Key = aScopedPK + sentinelScope.Role = "sentinel" + sentinelScope.Description = "Sentinel Role" + sentinelScope.Template = jwt.UserPermissionLimits{ + BearerToken: true, + Permissions: jwt.Permissions{ + Pub: jwt.Permission{Deny: []string{">"}}, + Sub: jwt.Permission{Deny: []string{">"}}, + }, + } + aAC.SigningKeys.AddScopedSigner(sentinelScope) + preload[aPub], err = aAC.Encode(oKp) require_NoError(t, err) @@ -7188,7 +7206,6 @@ func TestDefaultSentinelUser(t *testing.T) { `, ojwt, sysPub, preloadConfig, sentinelToken))) ns, _ = RunServerWithConfig(conf) - defer ns.Shutdown() nc, err := nats.Connect(ns.ClientURL()) require_NoError(t, err) defer nc.Close() @@ -7201,6 +7218,29 @@ func TestDefaultSentinelUser(t *testing.T) { var ui SR require_NoError(t, json.Unmarshal(r.Data, &ui)) require_Equal(t, ui.Data.UserID, uPub) + ns.Shutdown() + + // now lets make a sentinel that is a scoped user with bearer token + uc = jwt.NewUserClaims(uPub) + uc.IssuerAccount = aPub + uc.UserPermissionLimits = jwt.UserPermissionLimits{} + + sentinelToken, err = uc.Encode(aScopedKP) + require_NoError(t, err) + conf = createConfFile(t, []byte(fmt.Sprintf(` + listen: 127.0.0.1:4747 + operator: %s + system_account: %s + resolver: MEM + resolver_preload: %s + default_sentinel: %s +`, ojwt, sysPub, preloadConfig, sentinelToken))) + ns, _ = RunServerWithConfig(conf) + defer ns.Shutdown() + nc, err = nats.Connect(ns.ClientURL()) + require_NoError(t, err) + defer nc.Close() + } func TestJWTUpdateAccountClaimsStreamAndServiceImportDeadlock(t *testing.T) { diff --git a/server/leafnode.go b/server/leafnode.go index f49544812f3..8b64c080017 100644 --- a/server/leafnode.go +++ b/server/leafnode.go @@ -1418,7 +1418,7 @@ func (c *client) processLeafnodeInfo(info *Info) { c.setPermissions(perms) } - var resumeConnect, checkSyncConsumers bool + var resumeConnect bool // If this is a remote connection and this is the first INFO protocol, // then we need to finish the connect process by sending CONNECT, etc.. @@ -1428,7 +1428,6 @@ func (c *client) processLeafnodeInfo(info *Info) { resumeConnect = true } else if !firstINFO && didSolicit { c.leaf.remoteAccName = info.RemoteAccount - checkSyncConsumers = info.JetStream } // Check if we have the remote account information and if so make sure it's stored. @@ -1448,11 +1447,10 @@ func (c *client) processLeafnodeInfo(info *Info) { s.leafNodeFinishConnectProcess(c) } - // If we have JS enabled and so does the other side, we will - // check to see if we need to kick any internal source or mirror consumers. - if checkSyncConsumers { - s.checkInternalSyncConsumers(c.acc, info.Domain) - } + // Check to see if we need to kick any internal source or mirror consumers. + // This will be a no-op if JetStream not enabled for this server or if the bound account + // does not have jetstream. + s.checkInternalSyncConsumers(c.acc) } func (s *Server) negotiateLeafCompression(c *client, didSolicit bool, infoCompression string, co *CompressionOpts) (bool, error) { @@ -1984,16 +1982,16 @@ func (c *client) processLeafNodeConnect(s *Server, arg []byte, lang string) erro // This will be a no-op as needed. s.sendLeafNodeConnect(c.acc) - // If we have JS enabled and so does the other side, we will - // check to see if we need to kick any internal source or mirror consumers. - if proto.JetStream { - s.checkInternalSyncConsumers(acc, proto.Domain) - } + // Check to see if we need to kick any internal source or mirror consumers. + // This will be a no-op if JetStream not enabled for this server or if the bound account + // does not have jetstream. + s.checkInternalSyncConsumers(acc) + return nil } // checkInternalSyncConsumers -func (s *Server) checkInternalSyncConsumers(acc *Account, remoteDomain string) { +func (s *Server) checkInternalSyncConsumers(acc *Account) { // Grab our js js := s.getJetStream() @@ -2012,6 +2010,7 @@ func (s *Server) checkInternalSyncConsumers(acc *Account, remoteDomain string) { if jsa == nil { return } + var streams []*stream jsa.mu.RLock() for _, mset := range jsa.streams { @@ -2029,7 +2028,7 @@ func (s *Server) checkInternalSyncConsumers(acc *Account, remoteDomain string) { // Now loop through all candidates and check if we are the leader and have NOT // created the sync up consumer. for _, mset := range streams { - mset.retryDisconnectedSyncConsumers(remoteDomain) + mset.retryDisconnectedSyncConsumers() } } @@ -2228,9 +2227,11 @@ func (s *Server) updateInterestForAccountOnGateway(accName string, sub *subscrip acc.updateLeafNodes(sub, delta) } -// updateLeafNodes will make sure to update the account smap for the subscription. +// updateLeafNodesEx will make sure to update the account smap for the subscription. // Will also forward to all leaf nodes as needed. -func (acc *Account) updateLeafNodes(sub *subscription, delta int32) { +// If `hubOnly` is true, then will update only leaf nodes that connect to this server +// (that is, for which this server acts as a hub to them). +func (acc *Account) updateLeafNodesEx(sub *subscription, delta int32, hubOnly bool) { if acc == nil || sub == nil { return } @@ -2278,8 +2279,14 @@ func (acc *Account) updateLeafNodes(sub *subscription, delta int32) { if ln == sub.client { continue } - // Check to make sure this sub does not have an origin cluster that matches the leafnode. ln.mu.Lock() + // If `hubOnly` is true, it means that we want to update only leafnodes + // that connect to this server (so isHubLeafNode() would return `true`). + if hubOnly && !ln.isHubLeafNode() { + ln.mu.Unlock() + continue + } + // Check to make sure this sub does not have an origin cluster that matches the leafnode. // If skipped, make sure that we still let go the "$LDS." subscription that allows // the detection of loops as long as different cluster. clusterDifferent := cluster != ln.remoteCluster() @@ -2290,6 +2297,12 @@ func (acc *Account) updateLeafNodes(sub *subscription, delta int32) { } } +// updateLeafNodes will make sure to update the account smap for the subscription. +// Will also forward to all leaf nodes as needed. +func (acc *Account) updateLeafNodes(sub *subscription, delta int32) { + acc.updateLeafNodesEx(sub, delta, false) +} + // This will make an update to our internal smap and determine if we should send out // an interest update to the remote side. // Lock should be held. diff --git a/server/leafnode_test.go b/server/leafnode_test.go index d5ebc462081..a90ff3eaf56 100644 --- a/server/leafnode_test.go +++ b/server/leafnode_test.go @@ -9712,30 +9712,48 @@ func TestLeafNodeDupeDeliveryQueueSubAndPlainSub(t *testing.T) { // Create plain subscriber on server B attached to system-b account. ncB := natsConnect(t, srvB.ClientURL(), nats.UserInfo("sb", "sb")) defer ncB.Close() - sub, err := ncB.SubscribeSync("*.system-a.events.>") - require_NoError(t, err) - // Create a new sub that has a queue group as well. - subq, err := ncB.QueueSubscribeSync("*.system-a.events.objectnotfound", "SYSB") - require_NoError(t, err) - ncB.Flush() + sub := natsSubSync(t, ncB, "*.system-a.events.>") + subq := natsQueueSubSync(t, ncB, "*.system-a.events.objectnotfound", "SBQ") + natsFlush(t, ncB) + + // Create a subscription on SA1 (we will send from SA0). We want to make sure that + // when subscription on B is removed, this does not affect the subject interest + // in SA0 on behalf of SA1. + ncSAA1 := natsConnect(t, srvA1.ClientURL(), nats.UserInfo("sa", "sa")) + defer ncSAA1.Close() + sub2 := natsSubSync(t, ncSAA1, "*.system-a.events.>") + subq2 := natsQueueSubSync(t, ncSAA1, "*.system-a.events.objectnotfound", "SBQ") + natsFlush(t, ncSAA1) + time.Sleep(250 * time.Millisecond) - // Connect to cluster A + // Connect to cluster A on SA0. ncA := natsConnect(t, srvA0.ClientURL(), nats.UserInfo("t", "t")) defer ncA.Close() - err = ncA.Publish("system-a.events.objectnotfound", []byte("EventA")) - require_NoError(t, err) - ncA.Flush() - // Wait for them to be received. + natsPub(t, ncA, "system-a.events.objectnotfound", []byte("EventA")) + natsFlush(t, ncA) + + natsNexMsg(t, sub, time.Second) + natsNexMsg(t, sub2, time.Second) + if _, err := subq.NextMsg(250 * time.Millisecond); err != nil { + natsNexMsg(t, subq2, time.Second) + } + + // Unsubscribe the subscriptions from server B. + natsUnsub(t, sub) + natsUnsub(t, subq) + natsFlush(t, ncB) + + // Wait for subject propagation. time.Sleep(250 * time.Millisecond) - n, _, err := sub.Pending() - require_NoError(t, err) - require_Equal(t, n, 1) - n, _, err = subq.Pending() - require_NoError(t, err) - require_Equal(t, n, 1) + // Publish again, subscriptions on SA1 should receive it. + natsPub(t, ncA, "system-a.events.objectnotfound", []byte("EventA")) + natsFlush(t, ncA) + + natsNexMsg(t, sub2, time.Second) + natsNexMsg(t, subq2, time.Second) } func TestLeafNodeServerKickClient(t *testing.T) { @@ -10087,3 +10105,226 @@ func TestLeafNodePermissionWithGateways(t *testing.T) { t.Fatalf("Should not have received the reply, got %q", msg.Data) } } + +func TestLeafNodeDaisyChainWithAccountImportExport(t *testing.T) { + hubConf := createConfFile(t, []byte(` + server_name: hub + listen: "127.0.0.1:-1" + + leafnodes { + listen: "127.0.0.1:-1" + } + accounts { + SYS: { + users: [{ user: s, password: s}], + }, + ODC: { + jetstream: enabled + users: [ + { + user: u, password: u, + permissions: { + publish: {deny: ["local.>","hub2leaf.>"]} + subscribe: {deny: ["local.>","leaf2leaf.>"]} + } + } + ] + } + } + `)) + hub, ohub := RunServerWithConfig(hubConf) + defer hub.Shutdown() + + storeDir := t.TempDir() + leafJSConf := createConfFile(t, fmt.Appendf(nil, ` + server_name: leaf-js + listen: "127.0.0.1:-1" + + jetstream { + store_dir="%s/leaf-js" + domain=leaf-js + } + accounts { + ODC: { + jetstream: enabled + users: [{ user: u, password: u}] + }, + } + leafnodes { + remotes [ + { + urls: ["leaf://u:u@127.0.0.1:%d"] # connects to hub + account: ODC + } + ] + } + `, storeDir, ohub.LeafNode.Port)) + leafJS, _ := RunServerWithConfig(leafJSConf) + defer leafJS.Shutdown() + + checkLeafNodeConnected(t, hub) + checkLeafNodeConnected(t, leafJS) + + otherConf := createConfFile(t, []byte(` + server_name: other + listen: "127.0.0.1:-1" + leafnodes { + listen: "127.0.0.1:-1" + } + `)) + other, oother := RunServerWithConfig(otherConf) + defer other.Shutdown() + + tmpl := ` + server_name: %s + listen: "127.0.0.1:-1" + + leafnodes { + listen: "127.0.0.1:-1" + remotes [ + { + urls: ["leaf://u:u@127.0.0.1:%d"] + account: ODC_DEV + } + { + urls: ["leaf://127.0.0.1:%d"] + account: ODC_DEV + } + ] + } + cluster { + name: "hubsh" + listen: "127.0.0.1:-1" + %s + } + accounts: { + ODC_DEV: { + users: [ + {user: o, password: o} + ] + imports: [ + {service: {account: "SH1", subject: "$JS.leaf-sh.API.>"}} + {stream: {account: "SH1", subject: "sync.leaf-sh.jspush.>"}} + ] + exports: [ + {stream: ">"} + {service: ">", response_type: "Singleton"} + ] + } + SH1: { + users: [ + {user: s, password: s} + ] + exports: [ + {service: "$JS.leaf-sh.API.>", response_type: "Stream"} + {stream: "sync.leaf-sh.jspush.>"} + ] + } + } + ` + hubSh1Conf := createConfFile(t, fmt.Appendf(nil, tmpl, "hubsh1", ohub.LeafNode.Port, oother.LeafNode.Port, _EMPTY_)) + hubSh1, ohubSh1 := RunServerWithConfig(hubSh1Conf) + defer hubSh1.Shutdown() + + hubSh2Conf := createConfFile(t, fmt.Appendf(nil, tmpl, "hubsh2", ohub.LeafNode.Port, oother.LeafNode.Port, + fmt.Sprintf("routes: [\"nats://127.0.0.1:%d\"]", ohubSh1.Cluster.Port))) + hubSh2, ohubSh2 := RunServerWithConfig(hubSh2Conf) + defer hubSh2.Shutdown() + + checkClusterFormed(t, hubSh1, hubSh2) + + checkLeafNodeConnectedCount(t, hub, 3) + checkLeafNodeConnectedCount(t, hubSh1, 2) + checkLeafNodeConnectedCount(t, hubSh2, 2) + + leafShConf := createConfFile(t, fmt.Appendf(nil, ` + server_name: leafsh + listen: "127.0.0.1:-1" + + jetstream { + store_dir="%s/leafsh" + domain=leaf-sh + } + accounts { + SH: { + jetstream: enabled + users: [{user: u, password: u}] + } + } + leafnodes { + remotes [ + { + urls: ["leaf://s:s@127.0.0.1:%d"] + account: SH + } + ] + } + `, storeDir, ohubSh2.LeafNode.Port)) + leafSh, _ := RunServerWithConfig(leafShConf) + defer leafSh.Shutdown() + + checkLeafNodeConnectedCount(t, hubSh2, 3) + checkLeafNodeConnected(t, leafSh) + + ncLeafSh, jsLeafSh := jsClientConnect(t, leafSh, nats.UserInfo("u", "u")) + defer ncLeafSh.Close() + + sc := &nats.StreamConfig{ + Name: "leaf-sh", + Subjects: []string{"leaf2leaf.>"}, + Retention: nats.LimitsPolicy, + Storage: nats.FileStorage, + AllowRollup: true, + AllowDirect: true, + } + _, err := jsLeafSh.AddStream(sc) + require_NoError(t, err) + + ncLeafJS, jsLeafJS := jsClientConnect(t, leafJS, nats.UserInfo("u", "u")) + defer ncLeafJS.Close() + + sc = &nats.StreamConfig{ + Name: "leaf-js", + Retention: nats.LimitsPolicy, + Storage: nats.FileStorage, + AllowRollup: true, + AllowDirect: true, + Sources: []*nats.StreamSource{ + { + Name: "leaf-sh", + External: &nats.ExternalStream{ + APIPrefix: "$JS.leaf-sh.API", + DeliverPrefix: "sync.leaf-sh.jspush"}, + }, + }, + } + _, err = jsLeafJS.AddStream(sc) + require_NoError(t, err) + + for range 10 { + _, err = jsLeafSh.Publish("leaf2leaf.v1.test", []byte("hello")) + require_NoError(t, err) + } + + check := func(js nats.JetStreamContext, stream string) { + t.Helper() + checkFor(t, 2*time.Second, 50*time.Millisecond, func() error { + si, err := js.StreamInfo(stream) + if err != nil { + return err + } + if n := si.State.Msgs; n != 10 { + return fmt.Errorf("Expected 10 messages, got %v", n) + } + return nil + }) + } + check(jsLeafSh, "leaf-sh") + check(jsLeafJS, "leaf-js") + + acc := other.GlobalAccount() + acc.mu.RLock() + sr := acc.sl.ReverseMatch("sync.leaf-sh.jspush.>") + acc.mu.RUnlock() + require_Len(t, len(sr.psubs), 0) +} diff --git a/server/monitor.go b/server/monitor.go index 9ac83850e00..352d0b7cb96 100644 --- a/server/monitor.go +++ b/server/monitor.go @@ -3788,6 +3788,9 @@ func (s *Server) healthz(opts *HealthzOptions) *HealthStatus { } for stream, sa := range asa { + if sa != nil && sa.unsupported != nil { + continue + } // Make sure we can look up if err := js.isStreamHealthy(acc, sa); err != nil { if !details { diff --git a/server/raft.go b/server/raft.go index c188db878c8..902f12078df 100644 --- a/server/raft.go +++ b/server/raft.go @@ -31,6 +31,7 @@ import ( "sync/atomic" "time" + "github.com/antithesishq/antithesis-sdk-go/assert" "github.com/nats-io/nats-server/v2/internal/fastrand" "github.com/minio/highwayhash" @@ -3324,6 +3325,14 @@ func (n *raft) truncateWAL(term, index uint64) { n.debug("Clearing WAL state (no commits)") } } + if index < n.commit { + assert.Unreachable("WAL truncate lost commits", map[string]any{ + "term": term, + "index": index, + "commit": n.commit, + "applied": n.applied, + }) + } defer func() { // Check to see if we invalidated any snapshots that might have held state @@ -3412,7 +3421,6 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { // Are we receiving from another leader. if n.State() == Leader { - // If we are the same we should step down to break the tie. if lterm >= n.term { // If the append entry term is newer than the current term, erase our // vote. @@ -3420,6 +3428,16 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { n.term = lterm n.vote = noVote n.writeTermVote() + } else { + assert.Unreachable( + "Two leaders using the same term", + map[string]any{ + "Node id": n.id, + "Node term": n.term, + "AppendEntry id": ae.leader, + "AppendEntry term": ae.term, + "AppendEntry lterm": ae.lterm, + }) } n.debug("Received append entry from another leader, stepping down to %q", ae.leader) n.stepdownLocked(ae.leader) @@ -3468,22 +3486,50 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { } } - // If we are/were catching up ignore old catchup subs. - // This could happen when we stall or cancel a catchup. - if !isNew && sub != nil && (!catchingUp || sub != n.catchup.sub) { + // If we are/were catching up ignore old catchup subs, but only if catching up from an older server + // that doesn't send the leader term when catching up. We can reject old catchups from newer subs + // later, just by checking the append entry is on the correct term. + if !isNew && sub != nil && ae.lterm == 0 && (!catchingUp || sub != n.catchup.sub) { n.Unlock() n.debug("AppendEntry ignoring old entry from previous catchup") return } + // If this term is greater than ours. + if lterm > n.term { + n.term = lterm + n.vote = noVote + if isNew { + n.writeTermVote() + } + if n.State() != Follower { + n.debug("Term higher than ours and we are not a follower: %v, stepping down to %q", n.State(), ae.leader) + n.stepdownLocked(ae.leader) + } + } else if lterm < n.term && sub != nil && (isNew || ae.lterm != 0) { + // Anything that's below our expected highest term needs to be rejected. + // Unless we're replaying (sub=nil), in which case we'll always continue. + // For backward-compatibility we shouldn't reject if we're being caught up by an old server. + if !isNew { + n.debug("AppendEntry ignoring old entry from previous catchup") + n.Unlock() + return + } + n.debug("Rejected AppendEntry from a leader (%s) with term %d which is less than ours", ae.leader, lterm) + ar := newAppendEntryResponse(n.term, n.pindex, n.id, false) + n.Unlock() + n.sendRPC(ae.reply, _EMPTY_, ar.encode(arbuf)) + arPool.Put(ar) + return + } + // Check state if we are catching up. - var resetCatchingUp bool if catchingUp { if cs := n.catchup; cs != nil && n.pterm >= cs.cterm && n.pindex >= cs.cindex { // If we are here we are good, so if we have a catchup pending we can cancel. n.cancelCatchup() // Reset our notion of catching up. - resetCatchingUp = true + catchingUp = false } else if isNew { var ar *appendEntryResponse var inbox string @@ -3503,34 +3549,6 @@ func (n *raft) processAppendEntry(ae *appendEntry, sub *subscription) { } } - // If this term is greater than ours. - if lterm > n.term { - n.term = lterm - n.vote = noVote - if isNew { - n.writeTermVote() - } - if n.State() != Follower { - n.debug("Term higher than ours and we are not a follower: %v, stepping down to %q", n.State(), ae.leader) - n.stepdownLocked(ae.leader) - } - } else if lterm < n.term && sub != nil && !(catchingUp && ae.lterm == 0) { - // Anything that's below our expected highest term needs to be rejected. - // Unless we're replaying (sub=nil), in which case we'll always continue. - // For backward-compatibility we shouldn't reject if we're being caught up by an old server. - n.debug("Rejected AppendEntry from a leader (%s) with term %d which is less than ours", ae.leader, lterm) - ar := newAppendEntryResponse(n.term, n.pindex, n.id, false) - n.Unlock() - n.sendRPC(ae.reply, _EMPTY_, ar.encode(arbuf)) - arPool.Put(ar) - return - } - - // Reset after checking the term is correct, because we use catchingUp in a condition above. - if resetCatchingUp { - catchingUp = false - } - if isNew && n.leader != ae.leader && n.State() == Follower { n.debug("AppendEntry updating leader to %q", ae.leader) n.updateLeader(ae.leader) @@ -3671,21 +3689,7 @@ CONTINUE: n.Unlock() return } - // Save in memory for faster processing during applyCommit. - // Only save so many however to avoid memory bloat. - if l := len(n.pae); l <= paeDropThreshold { - n.pae[n.pindex], l = ae, l+1 - if l > paeWarnThreshold && l%paeWarnModulo == 0 { - n.warn("%d append entries pending", len(n.pae)) - } - } else { - // Invalidate cache entry at this index, we might have - // stored it previously with a different value. - delete(n.pae, n.pindex) - if l%paeWarnModulo == 0 { - n.debug("Not saving to append entries pending") - } - } + n.cachePendingEntry(ae) } else { // This is a replay on startup so just take the appendEntry version. n.pterm = ae.term @@ -3904,12 +3908,7 @@ func (n *raft) sendAppendEntry(entries []*Entry) { return } n.active = time.Now() - - // Save in memory for faster processing during applyCommit. - n.pae[n.pindex] = ae - if l := len(n.pae); l > paeWarnThreshold && l%paeWarnModulo == 0 { - n.warn("%d append entries pending", len(n.pae)) - } + n.cachePendingEntry(ae) } n.sendRPC(n.asubj, n.areply, ae.buf) if !shouldStore { @@ -3917,6 +3916,21 @@ func (n *raft) sendAppendEntry(entries []*Entry) { } } +// cachePendingEntry saves append entries in memory for faster processing during applyCommit. +// Only save so many however to avoid memory bloat. +func (n *raft) cachePendingEntry(ae *appendEntry) { + if l := len(n.pae); l < paeDropThreshold { + n.pae[n.pindex], l = ae, l+1 + if l >= paeWarnThreshold && l%paeWarnModulo == 0 { + n.warn("%d append entries pending", len(n.pae)) + } + } else { + // Invalidate cache entry at this index, we might have + // stored it previously with a different value. + delete(n.pae, n.pindex) + } +} + type extensionState uint16 const ( diff --git a/server/raft_test.go b/server/raft_test.go index 2958e0ed500..9199eb2a32c 100644 --- a/server/raft_test.go +++ b/server/raft_test.go @@ -413,6 +413,7 @@ func TestNRGSwitchStateClearsQueues(t *testing.T) { prop: newIPQueue[*proposedEntry](s, "prop"), resp: newIPQueue[*appendEntryResponse](s, "resp"), leadc: make(chan bool, 1), // for switchState + sd: t.TempDir(), } n.state.Store(int32(Leader)) require_Equal(t, n.prop.len(), 0) @@ -2569,7 +2570,6 @@ func TestNRGRejectAppendEntryDuringCatchupFromPreviousLeader(t *testing.T) { // Now send the second catchup entry. n.processAppendEntry(aeMsg2, nsub) - require_True(t, n.catchup == nil) require_Equal(t, n.pterm, 1) // Under the old behavior this entry is wrongly accepted. @@ -3204,6 +3204,131 @@ func TestNRGTruncateOnStartup(t *testing.T) { require_Equal(t, state.NumDeleted, 0) } +func TestNRGLeaderCatchupHandling(t *testing.T) { + n, cleanup := initSingleMemRaftNode(t) + defer cleanup() + + // Create a sample entry, the content doesn't matter, just that it's stored. + esm := encodeStreamMsgAllowCompress("foo", "_INBOX.foo", nil, nil, 0, 0, true) + entries := []*Entry{newEntry(EntryNormal, esm)} + + nats0 := "S1Nunr6R" // "nats-0" + aeMsg1 := encode(t, &appendEntry{leader: nats0, term: 1, commit: 0, pterm: 0, pindex: 0, entries: entries}) + aeMsg2 := encode(t, &appendEntry{leader: nats0, term: 1, commit: 0, pterm: 1, pindex: 1, entries: entries}) + aeMsg3 := encode(t, &appendEntry{leader: nats0, term: 1, commit: 0, pterm: 1, pindex: 2, entries: entries}) + + n.processAppendEntry(aeMsg1, n.aesub) + n.processAppendEntry(aeMsg2, n.aesub) + n.processAppendEntry(aeMsg3, n.aesub) + require_Equal(t, n.pindex, 3) + + n.switchToLeader() + + catchupReply := "$TEST" + nc, err := nats.Connect(n.s.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) + require_NoError(t, err) + defer nc.Close() + + sub, err := nc.SubscribeSync(catchupReply) + require_NoError(t, err) + defer sub.Drain() + require_NoError(t, nc.Flush()) + + // Simulate a follower that's up-to-date with only the first message. + n.catchupFollower(&appendEntryResponse{success: false, term: 1, index: 1, reply: catchupReply}) + + // Should receive all messages the leader knows up to this point. + msg, err := sub.NextMsg(500 * time.Millisecond) + require_NoError(t, err) + ae, err := n.decodeAppendEntry(msg.Data, nil, _EMPTY_) + require_NoError(t, err) + require_Equal(t, ae.pterm, 1) + require_Equal(t, ae.pindex, 1) + + msg, err = sub.NextMsg(500 * time.Millisecond) + require_NoError(t, err) + ae, err = n.decodeAppendEntry(msg.Data, nil, _EMPTY_) + require_NoError(t, err) + require_Equal(t, ae.pterm, 1) + require_Equal(t, ae.pindex, 2) +} + +func TestNRGNewEntriesFromOldLeaderResetsWALDuringCatchup(t *testing.T) { + n, cleanup := initSingleMemRaftNode(t) + defer cleanup() + + // Create a sample entry, the content doesn't matter, just that it's stored. + esm := encodeStreamMsgAllowCompress("foo", "_INBOX.foo", nil, nil, 0, 0, true) + entries := []*Entry{newEntry(EntryNormal, esm)} + + nats0 := "S1Nunr6R" // "nats-0" + aeMsg1 := encode(t, &appendEntry{leader: nats0, lterm: 20, term: 20, commit: 0, pterm: 0, pindex: 0, entries: entries}) + aeMsg2 := encode(t, &appendEntry{leader: nats0, lterm: 20, term: 20, commit: 0, pterm: 20, pindex: 1, entries: entries}) + aeMsg3 := encode(t, &appendEntry{leader: nats0, lterm: 20, term: 20, commit: 0, pterm: 20, pindex: 2, entries: entries}) + + aeReply := "$TEST" + aeMsg1Fork := encode(t, &appendEntry{leader: nats0, term: 1, commit: 0, pterm: 0, pindex: 0, entries: entries, reply: aeReply}) + aeMsg2Fork := encode(t, &appendEntry{leader: nats0, term: 1, commit: 0, pterm: 1, pindex: 1, entries: entries}) + + // Trigger a catchup. + n.processAppendEntry(aeMsg2, n.aesub) + validateCatchup := func() { + t.Helper() + require_True(t, n.catchup != nil) + require_Equal(t, n.catchup.cterm, 20) + require_Equal(t, n.catchup.cindex, 1) + } + validateCatchup() + + // Catchup the first missed entry. + csub := n.catchup.sub + n.processAppendEntry(aeMsg1, csub) + require_Equal(t, n.pindex, 1) + require_Equal(t, n.pterm, 20) + + nc, err := nats.Connect(n.s.ClientURL(), nats.UserInfo("admin", "s3cr3t!")) + require_NoError(t, err) + defer nc.Close() + + sub, err := nc.SubscribeSync(aeReply) + require_NoError(t, err) + defer sub.Drain() + require_NoError(t, nc.Flush()) + + // Would previously stall the catchup and restart it with a previous leader. + n.catchup.pindex = aeMsg1.pindex + 1 + n.catchup.active = time.Time{} + n.processAppendEntry(aeMsg1Fork, n.aesub) + require_Equal(t, n.pindex, 1) + require_Equal(t, n.pterm, 20) + validateCatchup() + + // Should reply we have a higher term, prompting the server to step down. + msg, err := sub.NextMsg(time.Second) + require_NoError(t, err) + ar := n.decodeAppendEntryResponse(msg.Data) + require_False(t, ar.success) + require_Equal(t, ar.index, 1) + require_Equal(t, ar.term, 20) + + // Would previously reset the WAL. + n.processAppendEntry(aeMsg2Fork, n.aesub) + require_Equal(t, n.pindex, 1) + require_Equal(t, n.pterm, 20) + validateCatchup() + + // Now the catchup should continue, undisturbed by an old leader sending append entries. + n.processAppendEntry(aeMsg2, csub) + require_Equal(t, n.pindex, 2) + require_Equal(t, n.pterm, 20) + require_True(t, n.catchup == nil) + + // A remaining catchup entry can still be ingested, even if the catchup state itself is gone. + n.processAppendEntry(aeMsg3, csub) + require_Equal(t, n.pindex, 3) + require_Equal(t, n.pterm, 20) +} + // This is a RaftChainOfBlocks test where a block is proposed and then we wait for all replicas to apply it before // proposing the next one. // The test may fail if: diff --git a/server/route.go b/server/route.go index 2d72e6fe58d..ac4d9c66b8a 100644 --- a/server/route.go +++ b/server/route.go @@ -87,6 +87,17 @@ type route struct { // Transient value used to set the Info.GossipMode when initiating // an implicit route and sending to the remote. gossipMode byte + // This will be set in case of pooling so that a route can trigger + // the creation of the next after receiving the first PONG, ensuring + // that authentication did not fail. + startNewRoute *routeInfo +} + +// This contains the information required to create a new route. +type routeInfo struct { + url *url.URL + rtype RouteType + gossipMode byte } // Do not change the values/order since they are exchanged between servers. @@ -2379,20 +2390,18 @@ func (s *Server) addRoute(c *client, didSolicit, sendDelayedInfo bool, gossipMod // Send the subscriptions interest. s.sendSubsToRoute(c, idx, _EMPTY_) - // In pool mode, if we did not yet reach the cap, try to connect a new connection + // In pool mode, if we did not yet reach the cap, try to connect a new connection, + // but do so only after receiving the first PONG to our PING, which will ensure + // that we have proper authentication. if pool && didSolicit && sz != effectivePoolSize { - s.startGoRoutine(func() { - select { - case <-time.After(time.Duration(rand.Intn(100)) * time.Millisecond): - case <-s.quitCh: - // Doing this here and not as a defer because connectToRoute is also - // calling s.grWG.Done() on exit, so we do this only if we don't - // invoke connectToRoute(). - s.grWG.Done() - return - } - s.connectToRoute(url, rtype, true, gossipMode, _EMPTY_) - }) + c.mu.Lock() + c.route.startNewRoute = &routeInfo{ + url: url, + rtype: rtype, + gossipMode: gossipMode, + } + c.sendPing() + c.mu.Unlock() } } s.mu.Unlock() diff --git a/server/routes_test.go b/server/routes_test.go index 941312a85ea..27eb9683ac9 100644 --- a/server/routes_test.go +++ b/server/routes_test.go @@ -326,10 +326,12 @@ func checkClusterFormed(t testing.TB, servers ...*Server) { if a == b { continue } - if b.getOpts().Cluster.PoolSize < 0 { + bo := b.getOpts() + if ps := bo.Cluster.PoolSize; ps < 0 { total++ } else { - total += nr + bps := ps + len(bo.Cluster.PinnedAccounts) + total += max(nr, bps) } } enr = append(enr, total) @@ -3680,6 +3682,62 @@ func TestRoutePoolWithOlderServerConnectAndReconnect(t *testing.T) { checkRepeatConnect() } +func TestRoutePoolBadAuthNoRunawayCreateRoute(t *testing.T) { + conf1 := createConfFile(t, []byte(` + server_name: "S1" + listen: "127.0.0.1:-1" + cluster { + name: "local" + listen: "127.0.0.1:-1" + pool_size: 4 + authorization { + user: "correct" + password: "correct" + timeout: 5 + } + } + `)) + s1, o1 := RunServerWithConfig(conf1) + defer s1.Shutdown() + + l := &captureErrorLogger{errCh: make(chan string, 100)} + s1.SetLogger(l, false, false) + + tmpl := ` + server_name: "S2" + listen: "127.0.0.1:-1" + cluster { + name: "local" + listen: "127.0.0.1:-1" + pool_size: 5 + routes: ["nats://%s@127.0.0.1:%d"] + } + ` + conf2 := createConfFile(t, fmt.Appendf(nil, tmpl, "incorrect:incorrect", o1.Cluster.Port)) + s2, _ := RunServerWithConfig(conf2) + defer s2.Shutdown() + + deadline := time.Now().Add(2 * time.Second) + var errors int + for time.Now().Before(deadline) { + select { + case <-l.errCh: + errors++ + default: + } + } + // We should not get that many errors now. In the past, we would get more + // than 200 for the 2 sec wait. + if errors > 10 { + t.Fatalf("Unexpected number of errors: %v", errors) + } + + // Reload with proper credentials. + reloadUpdateConfig(t, s2, conf2, fmt.Sprintf(tmpl, "correct:correct", o1.Cluster.Port)) + // Ensure we can connect. + checkClusterFormed(t, s1, s2) +} + func TestRouteCompressionOptions(t *testing.T) { org := testDefaultClusterCompression testDefaultClusterCompression = _EMPTY_ diff --git a/server/stream.go b/server/stream.go index 7dd36888208..03330205a2e 100644 --- a/server/stream.go +++ b/server/stream.go @@ -205,6 +205,13 @@ type StreamInfo struct { TimeStamp time.Time `json:"ts"` } +// streamInfoClusterResponse is a response used in a cluster to communicate the stream info +// back to the meta leader as part of a stream list request. +type streamInfoClusterResponse struct { + StreamInfo + OfflineReason string `json:"offline_reason,omitempty"` // Reporting when a stream is offline. +} + type StreamAlternate struct { Name string `json:"name"` Domain string `json:"domain,omitempty"` @@ -378,6 +385,10 @@ type stream struct { lastBySub *subscription monitorWg sync.WaitGroup // Wait group for the monitor routine. + + // If standalone/single-server, the offline reason needs to be stored directly in the stream. + // Otherwise, if clustered it will be part of the stream assignment. + offlineReason string } type sourceInfo struct { @@ -940,6 +951,17 @@ func (mset *stream) monitorQuitC() <-chan struct{} { return mset.mqch } +// signalMonitorQuit signals to exit the monitor loop. If there's no Raft node, +// this will be the only way to stop the monitor goroutine. +func (mset *stream) signalMonitorQuit() { + mset.mu.Lock() + defer mset.mu.Unlock() + if mset.mqch != nil { + close(mset.mqch) + mset.mqch = nil + } +} + func (mset *stream) updateC() <-chan struct{} { if mset == nil { return nil @@ -1781,6 +1803,10 @@ func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account, pedantic boo } } + // Remove placement if it's an empty object. + if cfg.Placement != nil && reflect.DeepEqual(cfg.Placement, &Placement{}) { + cfg.Placement = nil + } // For now don't allow preferred server in placement. if cfg.Placement != nil && cfg.Placement.Preferred != _EMPTY_ { return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("preferred server not permitted in placement")) @@ -2423,7 +2449,7 @@ func (mset *stream) mirrorInfo() *StreamSourceInfo { // retryDisconnectedSyncConsumers() will check if we have any disconnected // sync consumers for either mirror or a source and will reset and retry to connect. -func (mset *stream) retryDisconnectedSyncConsumers(remoteDomain string) { +func (mset *stream) retryDisconnectedSyncConsumers() { mset.mu.Lock() defer mset.mu.Unlock() @@ -2432,23 +2458,24 @@ func (mset *stream) retryDisconnectedSyncConsumers(remoteDomain string) { return } + shouldRetry := func(si *sourceInfo) bool { + if si != nil && (si.sip || si.sub == nil || (si.sub.client != nil && si.sub.client.isClosed())) { + // Need to reset + si.fails, si.sip = 0, false + mset.cancelSourceInfo(si) + return true + } + return false + } + // Check mirrors first. if si := mset.mirror; si != nil { - if si.sub == nil && !si.sip { - if remoteDomain == _EMPTY_ || (mset.cfg.Mirror != nil && mset.cfg.Mirror.External.Domain() == remoteDomain) { - // Need to reset - si.fails = 0 - mset.cancelSourceInfo(si) - mset.scheduleSetupMirrorConsumerRetry() - } + if shouldRetry(si) { + mset.scheduleSetupMirrorConsumerRetry() } } else { for _, si := range mset.sources { - ss := mset.streamSource(si.iname) - if remoteDomain == _EMPTY_ || (ss != nil && ss.External.Domain() == remoteDomain) { - // Need to reset - si.fails = 0 - mset.cancelSourceInfo(si) + if shouldRetry(si) { mset.setupSourceConsumer(si.iname, si.sseq+1, time.Time{}) } } @@ -2973,7 +3000,8 @@ func (mset *stream) setupMirrorConsumer() error { if mset.mirror != nil { mset.mirror.sip = false // If we need to retry, schedule now - if retry { + // If sub is not nil means we re-established somewhere else so do not re-attempt here. + if retry && mset.mirror.sub == nil { mset.mirror.fails++ // Cancel here since we can not do anything with this consumer at this point. mset.cancelSourceInfo(mset.mirror) @@ -3334,7 +3362,8 @@ func (mset *stream) trySetupSourceConsumer(iname string, seq uint64, startTime t if si := mset.sources[iname]; si != nil { si.sip = false // If we need to retry, schedule now - if retry { + // If sub is not nil means we re-established somewhere else so do not re-attempt here. + if retry && si.sub == nil { si.fails++ // Cancel here since we can not do anything with this consumer at this point. mset.cancelSourceInfo(si) @@ -5745,6 +5774,7 @@ func (mset *stream) resetAndWaitOnConsumers() { node.Stop() } if o.isMonitorRunning() { + o.signalMonitorQuit() o.monitorWg.Wait() } } @@ -5761,7 +5791,7 @@ func (mset *stream) delete() error { // Internal function to stop or delete the stream. func (mset *stream) stop(deleteFlag, advisory bool) error { mset.mu.RLock() - js, jsa, name := mset.js, mset.jsa, mset.cfg.Name + js, jsa, name, offlineReason := mset.js, mset.jsa, mset.cfg.Name, mset.offlineReason mset.mu.RUnlock() if jsa == nil { @@ -5770,7 +5800,10 @@ func (mset *stream) stop(deleteFlag, advisory bool) error { // Remove from our account map first. jsa.mu.Lock() - delete(jsa.streams, name) + // Preserve in the account if it's marked offline, to have it remain queryable. + if deleteFlag || offlineReason == _EMPTY_ { + delete(jsa.streams, name) + } accName := jsa.account.Name jsa.mu.Unlock() @@ -5803,9 +5836,12 @@ func (mset *stream) stop(deleteFlag, advisory bool) error { for _, o := range mset.consumers { obs = append(obs, o) } - mset.clsMu.Lock() - mset.consumers, mset.cList, mset.csl = nil, nil, nil - mset.clsMu.Unlock() + // Preserve the consumers if it's marked offline, to have them remain queryable. + if deleteFlag || offlineReason == _EMPTY_ { + mset.clsMu.Lock() + mset.consumers, mset.cList, mset.csl = nil, nil, nil + mset.clsMu.Unlock() + } // Check if we are a mirror. if mset.mirror != nil && mset.mirror.sub != nil { @@ -5829,6 +5865,7 @@ func (mset *stream) stop(deleteFlag, advisory bool) error { // but should we log? o.stopWithFlags(deleteFlag, deleteFlag, false, advisory) if !isShuttingDown { + o.signalMonitorQuit() o.monitorWg.Wait() } } @@ -5906,14 +5943,17 @@ func (mset *stream) stop(deleteFlag, advisory bool) error { } if deleteFlag { + // cleanup directories after the stream + accDir := filepath.Join(js.config.StoreDir, accName) if store != nil { // Ignore errors. store.Delete() + } else { + streamDir := filepath.Join(accDir, streamsDir) + os.RemoveAll(filepath.Join(streamDir, name)) } // Release any resources. js.releaseStreamResources(&mset.cfg) - // cleanup directories after the stream - accDir := filepath.Join(js.config.StoreDir, accName) // Do cleanup in separate go routine similar to how fs will use purge here.. go func() { // no op if not empty