Skip to content

Commit d096adf

Browse files
committed
Allow selective querying for prometheus plugin
1 parent 8f207e3 commit d096adf

File tree

7 files changed

+591
-79
lines changed

7 files changed

+591
-79
lines changed

deps/rabbitmq_prometheus/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,29 @@ To go back to aggregated metrics on-the-fly, run the following command:
8585
rabbitmqctl eval 'application:set_env(rabbitmq_prometheus, return_per_object_metrics, false).'
8686
```
8787

88+
## Reducing number of metrics
89+
90+
As mentioned in the previous section, returning a lot of metrics is a computationally intensive process.
91+
92+
Default endpoints `/metrics` and `/metrics/per-object` expose every possible, including low-level stats like Erlang VM stats and higher-level things like queue stats.
93+
94+
Using aggregation is one way to reduce the number of metrics and somewhat reduce CPU usage.
95+
96+
It's also possible to completely disable some groups of higher-level metrics that provide excessive level of detail, or pose no interest at all in given circumstances. For queue metrics it's also possible to filter on a per-vhost basis - that can be useful if there is a way to choose less interesting queues (like one-off transient queues for RPC) and don't include them in the output.
97+
98+
Those customizations can be applied to default endpoints using a configuration file. The following config snippet will make default endpoints to expose just enough information to get a number of messages and and a number of consumers for each queue, but only in the default vhost `/`:
99+
100+
```ini
101+
prometheus.core_metrics.default_families.1 = queue_coarse_metrics
102+
prometheus.core_metrics.default_families.2 = queue_consumer_count
103+
prometheus.core_metrics.default_vhosts.1 = /
104+
```
105+
106+
Even when the number of metrics for the default endpoints is reduced, it's possible to get any combination of those configurable metrics via a separate endpoint, where HTTP `GET`-parameters determine what exactly should be returned. E.g. scraping `/metrics/core?vhost=vhost-1&vhost=vhost-2&family=queue_coarse_metrics&family=queue_consumer_count`. will only return requested metrics (and not, for example, low-level Erlang VM metrics). It supports the following parameters:
107+
108+
* Zero or more `family` - if given, only these metric families will be returned. The full list is documented in [metrics](metrics.md), and it's the same names that are being used in the config file.
109+
* Zero or more `vhost` - if it's given, queue related metrics (`queue_coarse_metrics`, `queue_coarse_metrics` and `queue_metrics`) will be returned only for given vhost(s).
110+
* Optional `per-object` - if it's `1` or `true`, per-object values without aggregation will be returned.
88111

89112
## Contributing
90113

deps/rabbitmq_prometheus/priv/schema/rabbitmq_prometheus.schema

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
{mapping, "prometheus.return_per_object_metrics", "rabbitmq_prometheus.return_per_object_metrics",
1616
[{datatype, {enum, [true, false]}}]}.
1717

18+
%% families by default
19+
%% vhosts by default
20+
1821
%% Endpoint path
1922
{mapping, "prometheus.path", "rabbitmq_prometheus.path",
2023
[{datatype, string}]}.
@@ -99,19 +102,19 @@
99102
[{datatype, atom}]}.
100103

101104
{translation, "rabbitmq_prometheus.ssl_config.versions",
102-
fun(Conf) ->
105+
fun(Conf) ->
103106
Settings = cuttlefish_variable:filter_by_prefix("prometheus.ssl.versions", Conf),
104107
[V || {_, V} <- Settings]
105-
end}.
108+
end}.
106109

107110
{mapping, "prometheus.ssl.ciphers.$cipher", "rabbitmq_prometheus.ssl_config.ciphers",
108111
[{datatype, string}]}.
109112

110113
{translation, "rabbitmq_prometheus.ssl_config.ciphers",
111-
fun(Conf) ->
114+
fun(Conf) ->
112115
Settings = cuttlefish_variable:filter_by_prefix("prometheus.ssl.ciphers", Conf),
113116
lists:reverse([V || {_, V} <- Settings])
114-
end}.
117+
end}.
115118

116119
{mapping, "prometheus.ssl.compress", "rabbitmq_prometheus.ssl_config.cowboy_opts.compress",
117120
[{datatype, {enum, [true, false]}}]}.
@@ -125,3 +128,43 @@ end}.
125128
[{datatype, integer}, {validators, ["non_negative_integer"]}]}.
126129
{mapping, "prometheus.ssl.max_keepalive", "rabbitmq_prometheus.ssl_config.cowboy_opts.max_keepalive",
127130
[{datatype, integer}, {validators, ["non_negative_integer"]}]}.
131+
132+
{mapping, "prometheus.core_metrics.default_families.$num", "rabbitmq_prometheus.core_metrics_default_families",
133+
[{datatype, {enum, [connection_churn_metrics, node_coarse_metrics, node_persister_metrics, ra_metrics,
134+
queue_coarse_metrics, queue_consumer_count, queue_metrics, channel_metrics,
135+
channel_exchange_metrics, channel_process_metrics, channel_queue_metrics,
136+
connection_coarse_metrics, connection_metrics, channel_queue_exchange_metrics,
137+
auth_attempt_metrics, auth_attempt_detailed_metrics]}}]}.
138+
139+
{translation, "rabbitmq_prometheus.core_metrics_default_families",
140+
fun(Conf) ->
141+
Settings = cuttlefish_variable:filter_by_prefix("prometheus.core_metrics.default_families", Conf),
142+
FamiliesWithPosition =
143+
[ {case catch list_to_integer(Num) of
144+
N when is_integer(N) -> N;
145+
Err ->
146+
cuttlefish:invalid(iolist_to_binary(io_lib:format("Metrics family position should be an integer ~p", [Err])))
147+
end,
148+
FamilyName}
149+
|| {["prometheus", "core_metrics", "default_families", Num], FamilyName} <- Settings],
150+
Families = lists:map(fun ({_, N}) -> N end, lists:keysort(1, FamiliesWithPosition)),
151+
Families
152+
end}.
153+
154+
{mapping, "prometheus.core_metrics.default_vhosts.$num", "rabbitmq_prometheus.core_metrics_default_vhosts",
155+
[{datatype, string}]}.
156+
157+
{translation, "rabbitmq_prometheus.core_metrics_default_vhosts",
158+
fun(Conf) ->
159+
Settings = cuttlefish_variable:filter_by_prefix("prometheus.core_metrics.default_vhosts", Conf),
160+
VHostsWithPosition =
161+
[ {case catch list_to_integer(Num) of
162+
N when is_integer(N) -> N;
163+
Err ->
164+
cuttlefish:invalid(iolist_to_binary(io_lib:format("Prometheus exposed vhost position should be an integer ~p", [Err])))
165+
end,
166+
list_to_binary(VHost)}
167+
|| {["prometheus", "core_metrics", "default_vhosts", Num], VHost} <- Settings],
168+
VHosts = lists:map(fun ({_, N}) -> N end, lists:keysort(1, VHostsWithPosition)),
169+
VHosts
170+
end}.

0 commit comments

Comments
 (0)