Skip to content

Commit

Permalink
Merge pull request #860 from 3scale/metrics-policy
Browse files Browse the repository at this point in the history
Metrics policy
  • Loading branch information
davidor authored Aug 30, 2018
2 parents 8813f78 + 77cf346 commit c27e7f5
Show file tree
Hide file tree
Showing 12 changed files with 313 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Liquid Context Debugging policy. It's a policy only meant for debugging purposes, returns the context available when evaluating liquid [PR #849](https://github.com/3scale/apicast/pull/849)
- Logging policy. It allows to enable/disable access logs per service [PR #856](https://github.com/3scale/apicast/pull/856), [THREESCALE-1148](https://issues.jboss.org/browse/THREESCALE-1148)
- Support JWK through OIDC Discovery [PR #850](https://github.com/3scale/apicast/pull/850)
- Initial Prometheus metrics policy (backend responses and nginx metrics) [PR #860](https://github.com/3scale/apicast/pull/860), [THREESCALE-1230](https://issues.jboss.org/browse/THREESCALE-1230)

### Changed

Expand Down
1 change: 1 addition & 0 deletions gateway/config/production.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ return {
configuration_loader = 'boot',
configuration_cache = os.getenv('APICAST_CONFIGURATION_CACHE') or 5*60,
timer_resolution = '100ms',
port = { metrics = 9421 },
}
1 change: 1 addition & 0 deletions gateway/config/staging.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ return {
lua_code_cache = 'on',
configuration_loader = 'lazy',
configuration_cache = os.getenv('APICAST_CONFIGURATION_CACHE'),
port = { metrics = 9421 }, -- see https://github.com/prometheus/prometheus/wiki/Default-port-allocations,
}
3 changes: 3 additions & 0 deletions gateway/http.d/lua_capture_error_log.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# To be able to use the ngx.errlog methods that we call from the Metrics policy
# Ref: https://github.com/openresty/lua-nginx-module#lua_capture_error_log
lua_capture_error_log 4k;
9 changes: 9 additions & 0 deletions gateway/src/apicast/backend_client.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ local http_ng = require('resty.http_ng')
local user_agent = require('apicast.user_agent')
local resty_url = require('resty.url')
local resty_env = require('resty.env')
local threescale_backend_status_counters = require('apicast.metrics.3scale_backend_status')

local http_proxy = require('resty.http.proxy')
local http_ng_ngx = require('resty.http_ng.backend.ngx')
Expand Down Expand Up @@ -97,6 +98,10 @@ function _M:new(service, http_client)
}, mt)
end

local function inc_backend_status_metric(status)
threescale_backend_status_counters.inc(status)
end

local function build_args(args)
local query = {}

Expand Down Expand Up @@ -133,6 +138,8 @@ local function call_backend_transaction(self, path, options, ...)

ngx.log(ngx.INFO, 'backend client uri: ', url, ' ok: ', res.ok, ' status: ', res.status, ' body: ', res.body, ' error: ', res.error)

inc_backend_status_metric(res.status)

return res
end

Expand Down Expand Up @@ -229,6 +236,8 @@ function _M:report(reports_batch)
local report_body = format_transactions(reports_batch)
local res = http_client.post(report_uri, report_body)

inc_backend_status_metric(res.status)

return res
end

Expand Down
27 changes: 27 additions & 0 deletions gateway/src/apicast/metrics/3scale_backend_status.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
local prometheus = require('apicast.prometheus')
local metrics_updater = require('apicast.metrics.updater')

local format = string.format

local _M = {}

local backend_response_metric = prometheus(
'counter',
'backend_response',
"Response status codes from 3scale's backend",
{ 'status' }
)

local function label_for_status(status)
if not status or status == 0 then
return 'invalid_status'
else
return format("%dxx", status/100)
end
end

function _M.inc(status)
metrics_updater.inc(backend_response_metric, label_for_status(status))
end

return _M
20 changes: 20 additions & 0 deletions gateway/src/apicast/metrics/updater.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
local tonumber = tonumber

local _M = {}

local function metric_op(op, metric, value, label)
local metric_labels = {}
if not metric then return end
metric_labels[1] = label
metric[op](metric, tonumber(value) or 0, metric_labels)
end

function _M.set(metric, value, label)
return metric_op('set', metric, value, label)
end

function _M.inc(metric, label)
return metric_op('inc', metric, 1, label)
end

return _M
1 change: 1 addition & 0 deletions gateway/src/apicast/policy/nginx_metrics/init.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
return require('nginx_metrics')
113 changes: 113 additions & 0 deletions gateway/src/apicast/policy/nginx_metrics/nginx_metrics.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
local _M = require('apicast.policy').new('Metrics')

local resty_env = require('resty.env')
local errlog = require('ngx.errlog')
local prometheus = require('apicast.prometheus')
local metrics_updater = require('apicast.metrics.updater')
local tonumber = tonumber
local select = select
local find = string.find
local pairs = pairs

local new = _M.new

local log_levels_list = {
'emerg',
'alert',
'crit',
'error',
'warn',
'notice',
'info',
'debug',
}

local log_level_env = 'NGINX_METRICS_LOG_LEVEL'
local max_logs_env = 'NGINX_METRICS_MAX_LOGS'

local log_level_default = 'error'
local max_logs_default = 100

local function find_i(t, value)
for i=1, #t do
if t[i] == value then return i end
end
end

local empty = {}

local function get_logs(max)
return errlog.get_logs(max) or empty
end

local function filter_level()
local level = resty_env.value(log_level_env) or log_level_default

local level_index = find_i(log_levels_list, level)

if not level_index then
ngx.log(ngx.WARN, _M._NAME, ': invalid level: ', level, ' using error instead')
level_index = find_i(log_levels_list, 'error')
end

return level_index
end

function _M.new(configuration)
local m = new()

local config = configuration or empty

-- how many logs to take in one iteration
m.max_logs = tonumber(config.max_logs) or
resty_env.value(max_logs_env) or
max_logs_default

return m
end

local logs_metric = prometheus('counter', 'nginx_error_log', "Items in nginx error log", {'level'})
local http_connections_metric = prometheus('gauge', 'nginx_http_connections', 'Number of HTTP connections', {'state'})
local shdict_capacity_metric = prometheus('gauge', 'openresty_shdict_capacity', 'OpenResty shared dictionary capacity', {'dict'})
local shdict_free_space_metric = prometheus('gauge', 'openresty_shdict_free_space', 'OpenResty shared dictionary free space', {'dict'})

function _M.init()
errlog.set_filter_level(filter_level())

get_logs(100) -- to throw them away after setting the filter level (and get rid of debug ones)

for name,dict in pairs(ngx.shared) do
metrics_updater.set(shdict_capacity_metric, dict:capacity(), name)
end
end

function _M:metrics()
local logs = get_logs(self.max_logs)

for i = 1, #logs, 3 do
metrics_updater.inc(logs_metric, log_levels_list[logs[i]] or 'unknown')
end

local response = ngx.location.capture("/nginx_status")

if response.status == 200 then
local accepted, handled, total = select(3, find(response.body, [[accepts handled requests%s+(%d+) (%d+) (%d+)]]))
local var = ngx.var

metrics_updater.set(http_connections_metric, var.connections_reading, 'reading')
metrics_updater.set(http_connections_metric, var.connections_waiting, 'waiting')
metrics_updater.set(http_connections_metric, var.connections_writing, 'writing')
metrics_updater.set(http_connections_metric, var.connections_active, 'active')
metrics_updater.set(http_connections_metric, accepted, 'accepted')
metrics_updater.set(http_connections_metric, handled, 'handled')
metrics_updater.set(http_connections_metric, total, 'total')
else
prometheus:log_error('Could not get status from nginx')
end

for name,dict in pairs(ngx.shared) do
metrics_updater.set(shdict_free_space_metric, dict:free_space(), name)
end
end

return _M
3 changes: 2 additions & 1 deletion gateway/src/apicast/policy_chain.lua
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ end
local DEFAULT_POLICIES = {
'apicast.policy.load_configuration',
'apicast.policy.find_service',
'apicast.policy.local_chain'
'apicast.policy.local_chain',
'apicast.policy.nginx_metrics'
}

--- Return new policy chain with default policies.
Expand Down
14 changes: 14 additions & 0 deletions t/fixtures/configs/without_nginx_metrics.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
local PolicyChain = require('apicast.policy_chain')

local policies = {
'apicast.policy.load_configuration',
'apicast.policy.find_service',
'apicast.policy.local_chain'
}

local policy_chain = PolicyChain.build(policies)

return {
policy_chain = policy_chain,
port = { metrics = 9421 },
}
121 changes: 121 additions & 0 deletions t/prometheus-metrics.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use lib 't';
use Test::APIcast::Blackbox 'no_plan';

# The output varies between requests, so run only once
repeat_each(1);

run_tests();

__DATA__
=== TEST 1: metrics endpoint works
--- configuration
{
}
--- request
GET /metrics
--- more_headers
Host: metrics
--- response_body
# HELP nginx_http_connections Number of HTTP connections
# TYPE nginx_http_connections gauge
nginx_http_connections{state="accepted"} 1
nginx_http_connections{state="active"} 1
nginx_http_connections{state="handled"} 1
nginx_http_connections{state="reading"} 0
nginx_http_connections{state="total"} 1
nginx_http_connections{state="waiting"} 0
nginx_http_connections{state="writing"} 1
# HELP nginx_metric_errors_total Number of nginx-lua-prometheus errors
# TYPE nginx_metric_errors_total counter
nginx_metric_errors_total 0
# HELP openresty_shdict_capacity OpenResty shared dictionary capacity
# TYPE openresty_shdict_capacity gauge
openresty_shdict_capacity{dict="api_keys"} 10485760
openresty_shdict_capacity{dict="batched_reports"} 1048576
openresty_shdict_capacity{dict="batched_reports_locks"} 1048576
openresty_shdict_capacity{dict="cached_auths"} 1048576
openresty_shdict_capacity{dict="configuration"} 10485760
openresty_shdict_capacity{dict="init"} 16384
openresty_shdict_capacity{dict="limiter"} 1048576
openresty_shdict_capacity{dict="locks"} 1048576
openresty_shdict_capacity{dict="prometheus_metrics"} 16777216
# HELP openresty_shdict_free_space OpenResty shared dictionary free space
# TYPE openresty_shdict_free_space gauge
openresty_shdict_free_space{dict="api_keys"} 10412032
openresty_shdict_free_space{dict="batched_reports"} 1032192
openresty_shdict_free_space{dict="batched_reports_locks"} 1032192
openresty_shdict_free_space{dict="cached_auths"} 1032192
openresty_shdict_free_space{dict="configuration"} 10412032
openresty_shdict_free_space{dict="init"} 4096
openresty_shdict_free_space{dict="limiter"} 1032192
openresty_shdict_free_space{dict="locks"} 1032192
openresty_shdict_free_space{dict="prometheus_metrics"} 16662528
--- error_code: 200
--- no_error_log
[error]
=== TEST 2: metric endpoints shows backend responses when the APIcast policy is in the chain
We do a couple of authorized requests to backend (2xx) and a couple of
unauthorized ones (4xx) and check that those metrics are shown correctly when
calling the prometheus metrics endpoint.
To simplify the output of the metrics endpoint, we use an environment config
that does not include the nginx metrics (tested in the previous test).
--- environment_file: t/fixtures/configs/without_nginx_metrics.lua
--- configuration
{
"services": [
{
"id": 42,
"backend_version": 1,
"backend_authentication_type": "service_token",
"backend_authentication_value": "token-value",
"proxy": {
"api_backend": "http://test:$TEST_NGINX_SERVER_PORT/",
"proxy_rules": [
{ "pattern": "/", "http_method": "GET", "metric_system_name": "hits", "delta": 1 }
],
"policy_chain": [
{ "name": "apicast.policy.apicast" }
]
}
}
]
}
--- upstream
location / {
content_by_lua_block {
ngx.say('yay, api backend');
}
}
--- backend
location /transactions/authrep.xml {
content_by_lua_block {
-- Check only the user key and assume the rest of params are OK
if ngx.req.get_uri_args(0)['user_key'] == 'invalid' then
ngx.exit(403)
else
ngx.exit(200)
end
}
}
--- request eval
["GET /?user_key=valid", "GET /?user_key=valid", "GET /?user_key=invalid", "GET /?user_key=invalid", "GET /metrics"]
--- more_headers eval
["", "", "", "", "Host: metrics"]
--- error_code eval
[ 200, 200, 403, 403, 200 ]
--- response_body eval
[ "yay, api backend\x{0a}", "yay, api backend\x{0a}", "Authentication failed", "Authentication failed",
<<'METRICS_OUTPUT'
# HELP backend_response Response status codes from 3scale's backend
# TYPE backend_response counter
backend_response{status="2xx"} 2
backend_response{status="4xx"} 2
# HELP nginx_metric_errors_total Number of nginx-lua-prometheus errors
# TYPE nginx_metric_errors_total counter
nginx_metric_errors_total 0
METRICS_OUTPUT
]
--- no_error_log
[error]

0 comments on commit c27e7f5

Please sign in to comment.