Skip to content

Commit 4d39305

Browse files
freddygvhashicorp-ci
authored andcommitted
Add DC and NS support for Envoy metrics (#9207)
This PR updates the tags that we generate for Envoy stats. Several of these come with breaking changes, since we can't keep two stats prefixes for a filter.
1 parent e421da3 commit 4d39305

File tree

220 files changed

+2792
-914
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

220 files changed

+2792
-914
lines changed

Diff for: .changelog/9207.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:breaking-change
2+
connect: Update Envoy metrics names and labels for proxy listeners so that attributes like datacenter and namespace can be extracted.
3+
```

Diff for: agent/xds/listeners.go

+52-33
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ func (s *Server) makeIngressGatewayListeners(address string, cfgSnap *proxycfg.C
319319
filterName: listenerKey.RouteName(),
320320
routeName: listenerKey.RouteName(),
321321
cluster: "",
322-
statPrefix: "ingress_upstream_",
322+
statPrefix: "ingress_upstream.",
323323
routePath: "",
324324
ingress: false,
325325
httpAuthzFilter: nil,
@@ -771,7 +771,7 @@ func (s *Server) makeTerminatingGatewayListener(
771771

772772
// This fallback catch-all filter ensures a listener will be present for health checks to pass
773773
// Envoy will reset these connections since known endpoints are caught by filter chain matches above
774-
tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway_")
774+
tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.")
775775
if err != nil {
776776
return nil, err
777777
}
@@ -821,7 +821,7 @@ func (s *Server) makeFilterChainTerminatingGateway(
821821
// Lastly we setup the actual proxying component. For L4 this is a straight
822822
// tcp proxy. For L7 this is a very hands-off HTTP proxy just to inject an
823823
// HTTP filter to do intention checks here instead.
824-
statPrefix := fmt.Sprintf("terminating_gateway_%s_%s_", service.NamespaceOrDefault(), service.Name)
824+
statPrefix := fmt.Sprintf("terminating_gateway.%s.%s.", service.NamespaceOrDefault(), service.Name)
825825
opts := listenerFilterOpts{
826826
protocol: protocol,
827827
filterName: listener,
@@ -868,7 +868,7 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
868868

869869
// The cluster name here doesn't matter as the sni_cluster
870870
// filter will fill it in for us.
871-
tcpProxy, err := makeTCPProxyFilter(name, "", "mesh_gateway_local_")
871+
tcpProxy, err := makeTCPProxyFilter(name, "", "mesh_gateway_local.")
872872
if err != nil {
873873
return nil, err
874874
}
@@ -891,8 +891,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
891891
continue // skip local
892892
}
893893
clusterName := connect.DatacenterSNI(dc, cfgSnap.Roots.TrustDomain)
894-
filterName := fmt.Sprintf("%s_%s", name, dc)
895-
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote_")
894+
filterName := fmt.Sprintf("%s.%s", name, dc)
895+
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote.")
896896
if err != nil {
897897
return nil, err
898898
}
@@ -913,8 +913,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
913913
continue // skip local
914914
}
915915
clusterName := cfgSnap.ServerSNIFn(dc, "")
916-
filterName := fmt.Sprintf("%s_%s", name, dc)
917-
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote_")
916+
filterName := fmt.Sprintf("%s.%s", name, dc)
917+
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote.")
918918
if err != nil {
919919
return nil, err
920920
}
@@ -933,8 +933,8 @@ func (s *Server) makeMeshGatewayListener(name, addr string, port int, cfgSnap *p
933933
for _, srv := range cfgSnap.MeshGateway.ConsulServers {
934934
clusterName := cfgSnap.ServerSNIFn(cfgSnap.Datacenter, srv.Node.Node)
935935

936-
filterName := fmt.Sprintf("%s_%s", name, cfgSnap.Datacenter)
937-
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_local_server_")
936+
filterName := fmt.Sprintf("%s.%s", name, cfgSnap.Datacenter)
937+
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_local_server.")
938938
if err != nil {
939939
return nil, err
940940
}
@@ -976,38 +976,61 @@ func (s *Server) makeUpstreamListenerForDiscoveryChain(
976976
}
977977

978978
useRDS := true
979-
clusterName := ""
979+
var (
980+
clusterName string
981+
destination, datacenter, namespace string
982+
)
980983
if chain == nil || chain.IsDefault() {
984+
useRDS = false
985+
981986
dc := u.Datacenter
982987
if dc == "" {
983988
dc = cfgSnap.Datacenter
984989
}
985-
sni := connect.UpstreamSNI(u, "", dc, cfgSnap.Roots.TrustDomain)
990+
destination, datacenter, namespace = u.DestinationName, dc, u.DestinationNamespace
986991

987-
useRDS = false
992+
sni := connect.UpstreamSNI(u, "", dc, cfgSnap.Roots.TrustDomain)
988993
clusterName = CustomizeClusterName(sni, chain)
989994

990-
} else if cfg.Protocol == "tcp" {
991-
startNode := chain.Nodes[chain.StartNode]
992-
if startNode == nil {
993-
return nil, fmt.Errorf("missing first node in compiled discovery chain for: %s", chain.ServiceName)
994-
} else if startNode.Type != structs.DiscoveryGraphNodeTypeResolver {
995-
return nil, fmt.Errorf("unexpected first node in discovery chain using protocol=%q: %s", cfg.Protocol, startNode.Type)
995+
} else {
996+
destination, datacenter, namespace = chain.ServiceName, chain.Datacenter, chain.Namespace
997+
998+
if cfg.Protocol == "tcp" {
999+
useRDS = false
1000+
1001+
startNode := chain.Nodes[chain.StartNode]
1002+
if startNode == nil {
1003+
return nil, fmt.Errorf("missing first node in compiled discovery chain for: %s", chain.ServiceName)
1004+
}
1005+
if startNode.Type != structs.DiscoveryGraphNodeTypeResolver {
1006+
return nil, fmt.Errorf("unexpected first node in discovery chain using protocol=%q: %s", cfg.Protocol, startNode.Type)
1007+
}
1008+
targetID := startNode.Resolver.Target
1009+
target := chain.Targets[targetID]
1010+
1011+
clusterName = CustomizeClusterName(target.Name, chain)
9961012
}
997-
targetID := startNode.Resolver.Target
998-
target := chain.Targets[targetID]
1013+
}
9991014

1000-
useRDS = false
1001-
clusterName = CustomizeClusterName(target.Name, chain)
1015+
// Default the namespace to match how SNIs are generated
1016+
if namespace == "" {
1017+
namespace = structs.IntentionDefaultNamespace
1018+
}
1019+
filterName := fmt.Sprintf("%s.%s.%s", destination, namespace, datacenter)
1020+
1021+
if u.DestinationType == structs.UpstreamDestTypePreparedQuery {
1022+
// Avoid encoding dc and namespace for prepared queries.
1023+
// Those are defined in the query itself and are not available here.
1024+
filterName = upstreamID
10021025
}
10031026

10041027
opts := listenerFilterOpts{
10051028
useRDS: useRDS,
10061029
protocol: cfg.Protocol,
1007-
filterName: upstreamID,
1030+
filterName: filterName,
10081031
routeName: upstreamID,
10091032
cluster: clusterName,
1010-
statPrefix: "upstream_",
1033+
statPrefix: "upstream.",
10111034
routePath: "",
10121035
ingress: false,
10131036
httpAuthzFilter: nil,
@@ -1120,31 +1143,27 @@ func makeSNIClusterFilter() (*envoylistener.Filter, error) {
11201143

11211144
func makeTCPProxyFilter(filterName, cluster, statPrefix string) (*envoylistener.Filter, error) {
11221145
cfg := &envoytcp.TcpProxy{
1123-
StatPrefix: makeStatPrefix("tcp", statPrefix, filterName),
1146+
StatPrefix: makeStatPrefix(statPrefix, filterName),
11241147
ClusterSpecifier: &envoytcp.TcpProxy_Cluster{Cluster: cluster},
11251148
}
11261149
return makeFilter("envoy.tcp_proxy", cfg, false)
11271150
}
11281151

1129-
func makeStatPrefix(protocol, prefix, filterName string) string {
1152+
func makeStatPrefix(prefix, filterName string) string {
11301153
// Replace colons here because Envoy does that in the metrics for the actual
11311154
// clusters but doesn't in the stat prefix here while dashboards assume they
11321155
// will match.
1133-
return fmt.Sprintf("%s%s_%s", prefix, strings.Replace(filterName, ":", "_", -1), protocol)
1156+
return fmt.Sprintf("%s%s", prefix, strings.Replace(filterName, ":", "_", -1))
11341157
}
11351158

11361159
func makeHTTPFilter(opts listenerFilterOpts) (*envoylistener.Filter, error) {
11371160
op := envoyhttp.HttpConnectionManager_Tracing_INGRESS
11381161
if !opts.ingress {
11391162
op = envoyhttp.HttpConnectionManager_Tracing_EGRESS
11401163
}
1141-
proto := "http"
1142-
if opts.protocol == "grpc" {
1143-
proto = opts.protocol
1144-
}
11451164

11461165
cfg := &envoyhttp.HttpConnectionManager{
1147-
StatPrefix: makeStatPrefix(proto, opts.statPrefix, opts.filterName),
1166+
StatPrefix: makeStatPrefix(opts.statPrefix, opts.filterName),
11481167
CodecType: envoyhttp.HttpConnectionManager_AUTO,
11491168
HttpFilters: []*envoyhttp.HttpFilter{
11501169
{

Diff for: agent/xds/listeners_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
577577
"name": "envoy.tcp_proxy",
578578
"config": {
579579
"cluster": "local_app",
580-
"stat_prefix": "public_listener_tcp"
580+
"stat_prefix": "public_listener"
581581
}
582582
}
583583
]
@@ -600,7 +600,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
600600
"name": "envoy.tcp_proxy",
601601
"config": {
602602
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
603-
"stat_prefix": "upstream_db_tcp"
603+
"stat_prefix": "upstream.db.default.dc1"
604604
}
605605
}
606606
]
@@ -623,7 +623,7 @@ func expectListenerJSONResources(t *testing.T, snap *proxycfg.ConfigSnapshot) ma
623623
"name": "envoy.tcp_proxy",
624624
"config": {
625625
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
626-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
626+
"stat_prefix": "upstream.prepared_query_geo-cache"
627627
}
628628
}
629629
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-and-overrides.envoy-1-13-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
},
3636
"route_config_name": "db"
3737
},
38-
"stat_prefix": "upstream_db_grpc",
38+
"stat_prefix": "upstream.db.default.dc1",
3939
"tracing": {
4040
"operation_name": "EGRESS",
4141
"random_sampling": {
@@ -63,7 +63,7 @@
6363
"name": "envoy.tcp_proxy",
6464
"config": {
6565
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
66-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
66+
"stat_prefix": "upstream.prepared_query_geo-cache"
6767
}
6868
}
6969
]
@@ -117,7 +117,7 @@
117117
"name": "envoy.tcp_proxy",
118118
"config": {
119119
"cluster": "local_app",
120-
"stat_prefix": "public_listener_tcp"
120+
"stat_prefix": "public_listener"
121121
}
122122
}
123123
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-and-overrides.envoy-1-14-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
},
3636
"route_config_name": "db"
3737
},
38-
"stat_prefix": "upstream_db_grpc",
38+
"stat_prefix": "upstream.db.default.dc1",
3939
"tracing": {
4040
"operation_name": "EGRESS",
4141
"random_sampling": {
@@ -63,7 +63,7 @@
6363
"name": "envoy.tcp_proxy",
6464
"config": {
6565
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
66-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
66+
"stat_prefix": "upstream.prepared_query_geo-cache"
6767
}
6868
}
6969
]
@@ -117,7 +117,7 @@
117117
"name": "envoy.tcp_proxy",
118118
"config": {
119119
"cluster": "local_app",
120-
"stat_prefix": "public_listener_tcp"
120+
"stat_prefix": "public_listener"
121121
}
122122
}
123123
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-and-overrides.envoy-1-15-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
},
3636
"route_config_name": "db"
3737
},
38-
"stat_prefix": "upstream_db_grpc",
38+
"stat_prefix": "upstream.db.default.dc1",
3939
"tracing": {
4040
"operation_name": "EGRESS",
4141
"random_sampling": {
@@ -63,7 +63,7 @@
6363
"name": "envoy.tcp_proxy",
6464
"config": {
6565
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
66-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
66+
"stat_prefix": "upstream.prepared_query_geo-cache"
6767
}
6868
}
6969
]
@@ -117,7 +117,7 @@
117117
"name": "envoy.tcp_proxy",
118118
"config": {
119119
"cluster": "local_app",
120-
"stat_prefix": "public_listener_tcp"
120+
"stat_prefix": "public_listener"
121121
}
122122
}
123123
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-and-overrides.envoy-1-16-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
},
3636
"route_config_name": "db"
3737
},
38-
"stat_prefix": "upstream_db_grpc",
38+
"stat_prefix": "upstream.db.default.dc1",
3939
"tracing": {
4040
"operation_name": "EGRESS",
4141
"random_sampling": {
@@ -63,7 +63,7 @@
6363
"name": "envoy.tcp_proxy",
6464
"config": {
6565
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
66-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
66+
"stat_prefix": "upstream.prepared_query_geo-cache"
6767
}
6868
}
6969
]
@@ -117,7 +117,7 @@
117117
"name": "envoy.tcp_proxy",
118118
"config": {
119119
"cluster": "local_app",
120-
"stat_prefix": "public_listener_tcp"
120+
"stat_prefix": "public_listener"
121121
}
122122
}
123123
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-external-sni.envoy-1-13-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"name": "envoy.tcp_proxy",
1818
"config": {
1919
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
20-
"stat_prefix": "upstream_db_tcp"
20+
"stat_prefix": "upstream.db.default.dc1"
2121
}
2222
}
2323
]
@@ -40,7 +40,7 @@
4040
"name": "envoy.tcp_proxy",
4141
"config": {
4242
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
43-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
43+
"stat_prefix": "upstream.prepared_query_geo-cache"
4444
}
4545
}
4646
]
@@ -94,7 +94,7 @@
9494
"name": "envoy.tcp_proxy",
9595
"config": {
9696
"cluster": "local_app",
97-
"stat_prefix": "public_listener_tcp"
97+
"stat_prefix": "public_listener"
9898
}
9999
}
100100
]

Diff for: agent/xds/testdata/listeners/connect-proxy-with-chain-external-sni.envoy-1-14-x.golden

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"name": "envoy.tcp_proxy",
1818
"config": {
1919
"cluster": "db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul",
20-
"stat_prefix": "upstream_db_tcp"
20+
"stat_prefix": "upstream.db.default.dc1"
2121
}
2222
}
2323
]
@@ -40,7 +40,7 @@
4040
"name": "envoy.tcp_proxy",
4141
"config": {
4242
"cluster": "geo-cache.default.dc1.query.11111111-2222-3333-4444-555555555555.consul",
43-
"stat_prefix": "upstream_prepared_query_geo-cache_tcp"
43+
"stat_prefix": "upstream.prepared_query_geo-cache"
4444
}
4545
}
4646
]
@@ -94,7 +94,7 @@
9494
"name": "envoy.tcp_proxy",
9595
"config": {
9696
"cluster": "local_app",
97-
"stat_prefix": "public_listener_tcp"
97+
"stat_prefix": "public_listener"
9898
}
9999
}
100100
]

0 commit comments

Comments
 (0)