Skip to content

Commit

Permalink
remove addresses from node class hash
Browse files Browse the repository at this point in the history
When a node is fingerprinted, we calculate a "computed class" from a hash over a
subset of its fields and attributes. In the scheduler, when a given node fails
feasibility checking (before fit checking) we know that no other node of that
same class will be feasible, and we add the hash to a map so we can reject them
early. This hash cannot include any values that are unique to a given node,
otherwise no other node will have the same hash and we'll never save ourselves
the work of feasibility checking those nodes.

In #4390 we introduce the `nomad.advertise.address` attribute and in #19969 we
introduced `consul.dns.addr` attribute. Both of these are unique per node and
break the hash.

Additionally, we were not correctly filtering attributes out when checking if a
node escaped the class by not filtering for attributes that start with
`unique.`. The test for this introduced in #708 had an inverted assertion, which
allowed this to pass unnoticed since the early days of Nomad.

Ref: #708
Ref: #4390
Ref: #19969
  • Loading branch information
tgross committed Jan 24, 2025
1 parent ef32825 commit 7c69a85
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 33 deletions.
7 changes: 7 additions & 0 deletions .changelog/24942.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```release-note:bug
scheduler: Fixed a bug where node class hashes included unique attributes, making scheduling more costly
```

```release-note:breaking-change
node: The node attribute `consul.addr.dns` has been changed to `unique.consul.addr.dns`. The node attribute `nomad.advertise.address` has been changed to `unique.advertise.address`.
```
4 changes: 2 additions & 2 deletions client/allocrunner/networking_cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,10 @@ func (c *cniNetworkConfigurator) setupTransparentProxyArgs(alloc *structs.Alloca
func (c *cniNetworkConfigurator) dnsFromAttrs(cluster string) (string, int) {
var dnsAddrAttr, dnsPortAttr string
if cluster == structs.ConsulDefaultCluster || cluster == "" {
dnsAddrAttr = "consul.dns.addr"
dnsAddrAttr = "unique.consul.dns.addr"
dnsPortAttr = "consul.dns.port"
} else {
dnsAddrAttr = "consul." + cluster + ".dns.addr"
dnsAddrAttr = "unique.consul." + cluster + ".dns.addr"
dnsPortAttr = "consul." + cluster + ".dns.port"
}

Expand Down
50 changes: 25 additions & 25 deletions client/fingerprint/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,34 +156,34 @@ func (cfs *consulFingerprintState) initialize(cfg *config.ConsulConfig, logger h

if cfg.Name == structs.ConsulDefaultCluster {
cfs.extractors = map[string]consulExtractor{
"consul.server": cfs.server,
"consul.version": cfs.version,
"consul.sku": cfs.sku,
"consul.revision": cfs.revision,
"unique.consul.name": cfs.name, // note: won't have this for non-default clusters
"consul.datacenter": cfs.dc,
"consul.segment": cfs.segment,
"consul.connect": cfs.connect,
"consul.grpc": cfs.grpc(consulConfig.Scheme, logger),
"consul.ft.namespaces": cfs.namespaces,
"consul.partition": cfs.partition,
"consul.dns.port": cfs.dnsPort,
"consul.dns.addr": cfs.dnsAddr(logger),
"consul.server": cfs.server,
"consul.version": cfs.version,
"consul.sku": cfs.sku,
"consul.revision": cfs.revision,
"unique.consul.name": cfs.name, // note: won't have this for non-default clusters
"consul.datacenter": cfs.dc,
"consul.segment": cfs.segment,
"consul.connect": cfs.connect,
"consul.grpc": cfs.grpc(consulConfig.Scheme, logger),
"consul.ft.namespaces": cfs.namespaces,
"consul.partition": cfs.partition,
"consul.dns.port": cfs.dnsPort,
"unique.consul.dns.addr": cfs.dnsAddr(logger),
}
} else {
cfs.extractors = map[string]consulExtractor{
fmt.Sprintf("consul.%s.server", cfg.Name): cfs.server,
fmt.Sprintf("consul.%s.version", cfg.Name): cfs.version,
fmt.Sprintf("consul.%s.sku", cfg.Name): cfs.sku,
fmt.Sprintf("consul.%s.revision", cfg.Name): cfs.revision,
fmt.Sprintf("consul.%s.datacenter", cfg.Name): cfs.dc,
fmt.Sprintf("consul.%s.segment", cfg.Name): cfs.segment,
fmt.Sprintf("consul.%s.connect", cfg.Name): cfs.connect,
fmt.Sprintf("consul.%s.grpc", cfg.Name): cfs.grpc(consulConfig.Scheme, logger),
fmt.Sprintf("consul.%s.ft.namespaces", cfg.Name): cfs.namespaces,
fmt.Sprintf("consul.%s.partition", cfg.Name): cfs.partition,
fmt.Sprintf("consul.%s.dns.port", cfg.Name): cfs.dnsPort,
fmt.Sprintf("consul.%s.dns.addr", cfg.Name): cfs.dnsAddr(logger),
fmt.Sprintf("consul.%s.server", cfg.Name): cfs.server,
fmt.Sprintf("consul.%s.version", cfg.Name): cfs.version,
fmt.Sprintf("consul.%s.sku", cfg.Name): cfs.sku,
fmt.Sprintf("consul.%s.revision", cfg.Name): cfs.revision,
fmt.Sprintf("consul.%s.datacenter", cfg.Name): cfs.dc,
fmt.Sprintf("consul.%s.segment", cfg.Name): cfs.segment,
fmt.Sprintf("consul.%s.connect", cfg.Name): cfs.connect,
fmt.Sprintf("consul.%s.grpc", cfg.Name): cfs.grpc(consulConfig.Scheme, logger),
fmt.Sprintf("consul.%s.ft.namespaces", cfg.Name): cfs.namespaces,
fmt.Sprintf("consul.%s.partition", cfg.Name): cfs.partition,
fmt.Sprintf("consul.%s.dns.port", cfg.Name): cfs.dnsPort,
fmt.Sprintf("unique.consul.%s.dns.addr", cfg.Name): cfs.dnsAddr(logger),
}
}

Expand Down
2 changes: 1 addition & 1 deletion client/fingerprint/nomad.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func NewNomadFingerprint(logger log.Logger) Fingerprint {
}

func (f *NomadFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error {
resp.AddAttribute("nomad.advertise.address", req.Node.HTTPAddr)
resp.AddAttribute("unique.advertise.address", req.Node.HTTPAddr)
resp.AddAttribute("nomad.version", req.Config.Version.VersionNumber())
resp.AddAttribute("nomad.revision", req.Config.Version.Revision)
resp.AddAttribute("nomad.service_discovery", strconv.FormatBool(req.Config.NomadServiceDiscovery))
Expand Down
2 changes: 2 additions & 0 deletions nomad/structs/node_class.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ func EscapedConstraints(constraints []*Constraint) []*Constraint {
// computed node class optimization.
func constraintTargetEscapes(target string) bool {
switch {
case strings.HasPrefix(target, "${unique."):
return true
case strings.HasPrefix(target, "${node.unique."):
return true
case strings.HasPrefix(target, "${attr.unique."):
Expand Down
8 changes: 3 additions & 5 deletions nomad/structs/node_class_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package structs

import (
"reflect"
"testing"

"github.com/hashicorp/nomad/ci"
Expand Down Expand Up @@ -276,8 +275,7 @@ func TestNode_EscapedConstraints(t *testing.T) {
Operand: "!=",
}
constraints := []*Constraint{ne1, ne2, ne3, e1, e2, e3}
expected := []*Constraint{ne1, ne2, ne3}
if act := EscapedConstraints(constraints); reflect.DeepEqual(act, expected) {
t.Fatalf("EscapedConstraints(%v) returned %v; want %v", constraints, act, expected)
}
expected := []*Constraint{e1, e2, e3}
must.Eq(t, expected, EscapedConstraints(constraints),
must.Sprintf("expected unique fields to escape constraints"))
}

0 comments on commit 7c69a85

Please sign in to comment.