Skip to content

Commit

Permalink
remove addresses from node class hash
Browse files Browse the repository at this point in the history
When a node is fingerprinted, we calculate a "computed class" from a hash over a
subset of its fields and attributes. In the scheduler, when a given node fails
feasibility checking (before fit checking) we know that no other node of that
same class will be feasible, and we add the hash to a map so we can reject them
early. This hash cannot include any values that are unique to a given node,
otherwise no other node will have the same hash and we'll never save ourselves
the work of feasibility checking those nodes.

In #4390 we introduce the `nomad.advertise.address` attribute and in #19969 we
introduced `consul.dns.addr` attribute. Both of these are unique per node and
break the hash.

Additionally, we were not correctly filtering attributes out when checking if a
node escaped the class by not filtering for attributes that start with
`unique.`. The test for this introduced in #708 had an inverted assertion, which
allowed this to pass unnoticed since the early days of Nomad.

Ref: #708
Ref: #4390
Ref: #19969
  • Loading branch information
tgross committed Feb 3, 2025
1 parent 7929939 commit 05635ce
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 70 deletions.
7 changes: 7 additions & 0 deletions .changelog/24942.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```release-note:bug
scheduler: Fixed a bug where node class hashes included unique attributes, making scheduling more costly
```

```release-note:breaking-change
node: The node attribute `consul.addr.dns` has been changed to `unique.consul.addr.dns`. The node attribute `nomad.advertise.address` has been changed to `unique.advertise.address`.
```
4 changes: 2 additions & 2 deletions client/allocrunner/networking_cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,10 @@ func (c *cniNetworkConfigurator) setupTransparentProxyArgs(alloc *structs.Alloca
func (c *cniNetworkConfigurator) dnsFromAttrs(cluster string) (string, int) {
var dnsAddrAttr, dnsPortAttr string
if cluster == structs.ConsulDefaultCluster || cluster == "" {
dnsAddrAttr = "consul.dns.addr"
dnsAddrAttr = "unique.consul.dns.addr"
dnsPortAttr = "consul.dns.port"
} else {
dnsAddrAttr = "consul." + cluster + ".dns.addr"
dnsAddrAttr = "unique.consul." + cluster + ".dns.addr"
dnsPortAttr = "consul." + cluster + ".dns.port"
}

Expand Down
20 changes: 10 additions & 10 deletions client/allocrunner/networking_cni_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ func TestSetup(t *testing.T) {
}

nodeAddrs := map[string]string{
"consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"unique.consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
}
nodeMeta := map[string]string{
"connect.transparent_proxy.default_outbound_port": "15001",
Expand Down Expand Up @@ -554,8 +554,8 @@ func TestCNI_setupTproxyArgs(t *testing.T) {
}

nodeAttrs := map[string]string{
"consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"unique.consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
}

alloc := mock.ConnectAlloc()
Expand Down Expand Up @@ -716,8 +716,8 @@ func TestCNI_setupTproxyArgs(t *testing.T) {
{
name: "tproxy with consul dns disabled",
nodeAttrs: map[string]string{
"consul.dns.port": "-1",
"consul.dns.addr": "192.168.1.117",
"consul.dns.port": "-1",
"unique.consul.dns.addr": "192.168.1.117",
},
tproxySpec: &structs.ConsulTransparentProxy{},
expectIPConfig: &iptables.Config{
Expand All @@ -732,10 +732,10 @@ func TestCNI_setupTproxyArgs(t *testing.T) {
name: "tproxy for other cluster with default consul dns disabled",
cluster: "infra",
nodeAttrs: map[string]string{
"consul.dns.port": "-1",
"consul.dns.addr": "192.168.1.110",
"consul.infra.dns.port": "8600",
"consul.infra.dns.addr": "192.168.1.117",
"consul.dns.port": "-1",
"unique.consul.dns.addr": "192.168.1.110",
"consul.infra.dns.port": "8600",
"unique.consul.infra.dns.addr": "192.168.1.117",
},
tproxySpec: &structs.ConsulTransparentProxy{},
expectIPConfig: &iptables.Config{
Expand Down
50 changes: 25 additions & 25 deletions client/fingerprint/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,34 +131,34 @@ func (cfs *consulState) initialize(cfg *config.ConsulConfig, logger hclog.Logger

if cfg.Name == structs.ConsulDefaultCluster {
cfs.readers = map[string]valueReader{
"consul.server": cfs.server,
"consul.version": cfs.version,
"consul.sku": cfs.sku,
"consul.revision": cfs.revision,
"unique.consul.name": cfs.name, // note: won't have this for non-default clusters
"consul.datacenter": cfs.dc,
"consul.segment": cfs.segment,
"consul.connect": cfs.connect,
"consul.grpc": cfs.grpc(consulConfig.Scheme, logger),
"consul.ft.namespaces": cfs.namespaces,
"consul.partition": cfs.partition,
"consul.dns.port": cfs.dnsPort,
"consul.dns.addr": cfs.dnsAddr(logger),
"consul.server": cfs.server,
"consul.version": cfs.version,
"consul.sku": cfs.sku,
"consul.revision": cfs.revision,
"unique.consul.name": cfs.name, // note: won't have this for non-default clusters
"consul.datacenter": cfs.dc,
"consul.segment": cfs.segment,
"consul.connect": cfs.connect,
"consul.grpc": cfs.grpc(consulConfig.Scheme, logger),
"consul.ft.namespaces": cfs.namespaces,
"consul.partition": cfs.partition,
"consul.dns.port": cfs.dnsPort,
"unique.consul.dns.addr": cfs.dnsAddr(logger),
}
} else {
cfs.readers = map[string]valueReader{
fmt.Sprintf("consul.%s.server", cfg.Name): cfs.server,
fmt.Sprintf("consul.%s.version", cfg.Name): cfs.version,
fmt.Sprintf("consul.%s.sku", cfg.Name): cfs.sku,
fmt.Sprintf("consul.%s.revision", cfg.Name): cfs.revision,
fmt.Sprintf("consul.%s.datacenter", cfg.Name): cfs.dc,
fmt.Sprintf("consul.%s.segment", cfg.Name): cfs.segment,
fmt.Sprintf("consul.%s.connect", cfg.Name): cfs.connect,
fmt.Sprintf("consul.%s.grpc", cfg.Name): cfs.grpc(consulConfig.Scheme, logger),
fmt.Sprintf("consul.%s.ft.namespaces", cfg.Name): cfs.namespaces,
fmt.Sprintf("consul.%s.partition", cfg.Name): cfs.partition,
fmt.Sprintf("consul.%s.dns.port", cfg.Name): cfs.dnsPort,
fmt.Sprintf("consul.%s.dns.addr", cfg.Name): cfs.dnsAddr(logger),
fmt.Sprintf("consul.%s.server", cfg.Name): cfs.server,
fmt.Sprintf("consul.%s.version", cfg.Name): cfs.version,
fmt.Sprintf("consul.%s.sku", cfg.Name): cfs.sku,
fmt.Sprintf("consul.%s.revision", cfg.Name): cfs.revision,
fmt.Sprintf("consul.%s.datacenter", cfg.Name): cfs.dc,
fmt.Sprintf("consul.%s.segment", cfg.Name): cfs.segment,
fmt.Sprintf("consul.%s.connect", cfg.Name): cfs.connect,
fmt.Sprintf("consul.%s.grpc", cfg.Name): cfs.grpc(consulConfig.Scheme, logger),
fmt.Sprintf("consul.%s.ft.namespaces", cfg.Name): cfs.namespaces,
fmt.Sprintf("consul.%s.partition", cfg.Name): cfs.partition,
fmt.Sprintf("consul.%s.dns.port", cfg.Name): cfs.dnsPort,
fmt.Sprintf("unique.consul.%s.dns.addr", cfg.Name): cfs.dnsAddr(logger),
}
}

Expand Down
52 changes: 26 additions & 26 deletions client/fingerprint/consul_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,19 +688,19 @@ func TestConsulFingerprint_Fingerprint_ent(t *testing.T) {
err := cf.Fingerprint(&FingerprintRequest{Config: cfg, Node: node}, &resp)
must.NoError(t, err)
must.Eq(t, map[string]string{
"consul.datacenter": "dc1",
"consul.revision": "22ce6c6ad",
"consul.segment": "seg1",
"consul.server": "true",
"consul.sku": "ent",
"consul.version": "1.9.5+ent",
"consul.ft.namespaces": "true",
"consul.connect": "true",
"consul.grpc": "8502",
"consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"consul.partition": "default",
"unique.consul.name": "HAL9000",
"consul.datacenter": "dc1",
"consul.revision": "22ce6c6ad",
"consul.segment": "seg1",
"consul.server": "true",
"consul.sku": "ent",
"consul.version": "1.9.5+ent",
"consul.ft.namespaces": "true",
"consul.connect": "true",
"consul.grpc": "8502",
"unique.consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"consul.partition": "default",
"unique.consul.name": "HAL9000",
}, resp.Attributes)
must.True(t, resp.Detected)

Expand Down Expand Up @@ -736,19 +736,19 @@ func TestConsulFingerprint_Fingerprint_ent(t *testing.T) {
err3 := cf.Fingerprint(&FingerprintRequest{Config: cfg, Node: node}, &resp3)
must.NoError(t, err3)
must.Eq(t, map[string]string{
"consul.datacenter": "dc1",
"consul.revision": "22ce6c6ad",
"consul.segment": "seg1",
"consul.server": "true",
"consul.sku": "ent",
"consul.version": "1.9.5+ent",
"consul.ft.namespaces": "true",
"consul.connect": "true",
"consul.grpc": "8502",
"consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"consul.partition": "default",
"unique.consul.name": "HAL9000",
"consul.datacenter": "dc1",
"consul.revision": "22ce6c6ad",
"consul.segment": "seg1",
"consul.server": "true",
"consul.sku": "ent",
"consul.version": "1.9.5+ent",
"consul.ft.namespaces": "true",
"consul.connect": "true",
"consul.grpc": "8502",
"unique.consul.dns.addr": "192.168.1.117",
"consul.dns.port": "8600",
"consul.partition": "default",
"unique.consul.name": "HAL9000",
}, resp3.Attributes)

// consul now available again
Expand Down
2 changes: 1 addition & 1 deletion client/fingerprint/nomad.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func NewNomadFingerprint(logger log.Logger) Fingerprint {
}

func (f *NomadFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error {
resp.AddAttribute("nomad.advertise.address", req.Node.HTTPAddr)
resp.AddAttribute("unique.advertise.address", req.Node.HTTPAddr)
resp.AddAttribute("nomad.version", req.Config.Version.VersionNumber())
resp.AddAttribute("nomad.revision", req.Config.Version.Revision)
resp.AddAttribute("nomad.service_discovery", strconv.FormatBool(req.Config.NomadServiceDiscovery))
Expand Down
2 changes: 1 addition & 1 deletion client/fingerprint/nomad_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestNomadFingerprint(t *testing.T) {
t.Fatalf("incorrect revision")
}

if response.Attributes["nomad.advertise.address"] != h {
if response.Attributes["unique.advertise.address"] != h {
t.Fatalf("incorrect advertise address")
}

Expand Down
2 changes: 2 additions & 0 deletions nomad/structs/node_class.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ func EscapedConstraints(constraints []*Constraint) []*Constraint {
// computed node class optimization.
func constraintTargetEscapes(target string) bool {
switch {
case strings.HasPrefix(target, "${unique."):
return true
case strings.HasPrefix(target, "${node.unique."):
return true
case strings.HasPrefix(target, "${attr.unique."):
Expand Down
8 changes: 3 additions & 5 deletions nomad/structs/node_class_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package structs

import (
"reflect"
"testing"

"github.com/hashicorp/nomad/ci"
Expand Down Expand Up @@ -276,8 +275,7 @@ func TestNode_EscapedConstraints(t *testing.T) {
Operand: "!=",
}
constraints := []*Constraint{ne1, ne2, ne3, e1, e2, e3}
expected := []*Constraint{ne1, ne2, ne3}
if act := EscapedConstraints(constraints); reflect.DeepEqual(act, expected) {
t.Fatalf("EscapedConstraints(%v) returned %v; want %v", constraints, act, expected)
}
expected := []*Constraint{e1, e2, e3}
must.Eq(t, expected, EscapedConstraints(constraints),
must.Sprintf("expected unique fields to escape constraints"))
}

0 comments on commit 05635ce

Please sign in to comment.