diff --git a/README.md b/README.md index 92d0db2..e37e094 100644 --- a/README.md +++ b/README.md @@ -86,9 +86,10 @@ Example: ### Layers -We can differentiate the processing of a request in KrakenD in 3 main stages: +We can differentiate the processing of a request in KrakenD in 3 main stages (each one +including or wrapping the inner stage): -- `router`: the part that comes before the `Lura`'s framework starts working with +- `global`: this part that comes before the `Lura`'s framework starts working with the request. In the case of [KrakenD CE](https://github.com/krakend/krakend-ce), this stage is implemented usin [gin](https://github.com/gin-gonic/gin) @@ -104,7 +105,7 @@ We can differentiate the processing of a request in KrakenD in 3 main stages: For each of those layers it can be selected the deatail of metrics and traces that we want to report. -#### router +#### global At the router level we have 3 main options: @@ -114,7 +115,7 @@ At the router level we have 3 main options: traces (so spans from a previous layer are linked to the KrakenD trace). ```json -"router": { +"global": { "disable_metrics": false, "disable_traces": false, "disable_propagation": false @@ -123,12 +124,12 @@ At the router level we have 3 main options: ##### Metrics -- `router-response-latency`: histogram of the time it takes to produce the response. +- `http.server.duration`: histogram of the time it takes to produce the response. Attributes: - `http.response.status_code`: status code of the produced response - `url.path`: the matched endpoint path - `krakend.stage`: always with value `router` -- `router-response-size`: histogram of the size of the body produced for the response. +- `http.server.response.size`: histogram of the size of the body produced for the response. Attributes: - `http.response.status_code`: status code of the produced response - `url.path`: the matched endpoint path @@ -151,12 +152,11 @@ At the pipe level we only have 2 options: ##### Metrics -- `stage-duration`: histogram of the time it takes to produce the response. +- `krakend.pipe.duration`: histogram of the time it takes to produce the response. Attributes: - `url.path`: the matched endpoint path that **krakend is serving** (is different than in `backend`, krakend stage, when this property is the path for the backend we are targetting). - - `krakend.stage`: always with value `pipe` ##### Traces @@ -222,13 +222,11 @@ For both, the `metrics` and `traces` part, we can select the same options: ##### Metrics -- `stage-duration`: histogram of the time it takes to produce the response. Controlled +- `krakend.backend.duration`: histogram of the time it takes to produce the response. Controlled by the `disable_stage` flag (if set to `true` this metric will not appear). Attributes: - `url.path`: the matched endpoint path that **krakend is serving** (is different than in `backend`, krakend stage, when this property is the path - for the backend we are targetting). - - `krakend.stage`: always with value `backend` - `krakend.endpoint`: this attribute is set to the krakend exposed endpoint that is the "parent" of the backend request. - `server.address`: the target host (in case more than one are provided, those @@ -245,17 +243,18 @@ The following metrics are enabled if `round_trip` is set to true, and share the metric, or if we should change that attribute there). - `krakend.stage`: always with value `backend-request` +- `http.client.duration`: histogram with the time taken since starting a request, until + until having the first byte of the body ready to read. -- `requests-started-count`: number of requests started. -- `requests-failed-count`: number of requests failed. -- `requests-canceled-count`: number of canceled request. -- `requests-timedout-count`: number of timed out requests. -- `requests-content-length`: counter wit the sum of `Content-Length` header for the +- `http.client.request.started.count`: number of requests started. +- `http.client.request.failed.count`: number of requests [failed](failed). +- `http.client.request.canceled.count`: number of canceled request. +- `http.client.request.timedout.count`: number of timed out requests. +- `http.client.request.content-length`: counter wit the sum of `Content-Length` header for the sent payload for the request. -- `response-latency`: histogram with the time taken until receiving the first byte of the response -- `response-content-length`: histogram with the size of response bodies as reported in the - `Content-Lenght` header. +- `http.client.response.content-length`: histogram with the size of response bodies as reported in the + `Content-Length` header. ###### Read Payload metrics @@ -372,7 +371,7 @@ Putting it all together, here we have an example of a configuration: } ], "layers": { - "router": { + "global": { "disable_metrics": false, "disable_traces": false, "disable_propagation": false diff --git a/example/Makefile b/example/Makefile index 39cc04a..c3e9382 100644 --- a/example/Makefile +++ b/example/Makefile @@ -10,6 +10,12 @@ image: conf: cat ./docker_compose/conf.local/prometheus/prometheus.env_tmpl.yml | envsubst > ./docker_compose/conf.local/prometheus/prometheus.yml +docker_up: + cd ./docker_compose && docker-compose -p krakend_otel_local -f compose.local.yaml up -d + +docker_down: + cd ./docker_compose && docker-compose -p krakend_otel_local -f compose.local.yaml down --remove-orphans -v + run: srv cd ./docker_compose && docker-compose -p krakend_otel_local -f compose.local.yaml up -d ./srv -c ./docker_compose/conf.local/krakend_front/configuration.json > log_frontend.log & diff --git a/example/client/k6client/client.js b/example/client/k6client/client.js new file mode 100644 index 0000000..deee2f4 --- /dev/null +++ b/example/client/k6client/client.js @@ -0,0 +1,24 @@ +import http from 'k6/http'; +import { sleep } from 'k6'; + +export const options = { + // A number specifying the number of VUs to run concurrently. + vus: 10, + // A string specifying the total duration of the test run. + duration: '15m', +}; + +// The function that defines VU logic. +// +// See https://grafana.com/docs/k6/latest/examples/get-started-with-k6/ to learn more +// about authoring k6 scripts. +// +export default function() { + let address = 'http://192.168.1.12:54444'; + http.get(address + '/fake/fsf'); + http.get(address + '/combination/1'); + http.get(address + '/direct/slow'); + http.get(address + '/direct/delayed'); + http.get(address + '/direct/drop'); + sleep(1); +} diff --git a/example/client/make_requests.sh b/example/client/make_requests.sh new file mode 100644 index 0000000..76db7d3 --- /dev/null +++ b/example/client/make_requests.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# to generate a new base script: +# +# docker run --rm -i -v ${PWD}:/app \ +# --user $(id -u):$(id -g) \ +# -w /app grafana/k6 \ +# new client.js + +docker run --rm -i -v ${PWD}:/app \ + --user $(id -u):$(id -g) \ + -w /app grafana/k6 \ + run k6client/client.js + +## for i in {1..1} +## # for i in {1..1000} +## do +## # curl localhost:54444/fake/fsf +## # curl localhost:54444/combination/2 +## curl localhost:54444/combination/1 +## # curl localhost:54444/direct/slow +## sleep 0.1 +## # curl localhost:54444/direct/delayed +## # curl localhost:54444/direct/drop +## done +## +## # curl localhost:44444/fake/fsf | jq +## # echo -e "\n" +## # curl localhost:44444/combination/23 | jq diff --git a/example/docker_compose/compose.local.yaml b/example/docker_compose/compose.local.yaml index b11e466..d81943e 100644 --- a/example/docker_compose/compose.local.yaml +++ b/example/docker_compose/compose.local.yaml @@ -8,7 +8,7 @@ services: healthcheck: test: [CMD, wget, --spicer, -S, http://localhost:3100/ready] ports: - - "3100:3100" + - "53100:3100" tempo: image: grafana/tempo:latest domainname: tempo @@ -17,24 +17,24 @@ services: - "./conf.local/tempo/tempo.yaml:/etc/tempo.yaml" - "vol_local_tempo:/tmp/tempo" ports: - - "14268:14268" # jaeger ingest - - "3200:3200" # tempo - - "9095:9095" # tempo grpc - - "54317:4317" # otlp grpc - - "54318:4318" # otlp http - - "9411:9411" # zipkin + - "53200:3200" # tempo + - "59095:9095" # tempo grpc + - "53317:4317" # otlp grpc + - "53318:4318" # otlp http +# - "14268:14268" # jaeger ingest +# - "9411:9411" # zipkin prometheus: image: prom/prometheus:latest domainname: prometheus ports: - - "59090:9090" + - "59590:9090" volumes: - "./conf.local/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" grafana: image: grafana/grafana:latest domainname: grafana ports: - - "3000:3000" + - "53000:3000" environment: GF_SECURITY_ADMIN_USER: krakend GF_SECURITY_ADMIN_PASSWORD: krakend @@ -46,15 +46,19 @@ services: - loki - tempo jaeger: - image: jaegertracing/all-in-one:1.52 + image: jaegertracing/all-in-one:1.54 domainname: jaeger environment: COLLECTOR_ZIPKIN_HOST_PORT: ":9411" ports: - - "5778:5778" # serve configs - - "16686:16686" # serve frontend UI + - "55778:5778" # serve configs + - "56686:16686" # serve frontend UI - "55317:4317" # otlp grpc: we remap this to be able to run other envs - "55318:4318" # otlp http: we reamp this to be able to run other envs + deploy: + resources: + limits: + memory: 1024M fakeapi: image: dhontecillas/reqstatsrv:latest domainname: fakeapi diff --git a/example/docker_compose/conf.local/fakeapi/config/example.dockerized.json b/example/docker_compose/conf.local/fakeapi/config/example.dockerized.json index 37920bd..54d9773 100644 --- a/example/docker_compose/conf.local/fakeapi/config/example.dockerized.json +++ b/example/docker_compose/conf.local/fakeapi/config/example.dockerized.json @@ -70,12 +70,12 @@ "name": "delayer", "config": { "delay_millis_distribution": [ - {"key": 0, "val": 0.05}, - {"key": 10, "val": 0.2}, + {"key": 0, "val": 0.0}, + {"key": 10, "val": 0.0}, {"key": 50, "val": 0.0}, - {"key": 100, "val": 0.5}, - {"key": 200, "val": 0.0}, - {"key": 700, "val": 0.25} + {"key": 100, "val": 0.25}, + {"key": 200, "val": 0.25}, + {"key": 700, "val": 0.5} ], "seed": 1 } diff --git a/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.external.json b/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.external.json index a303271..ea140a9 100644 --- a/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.external.json +++ b/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.external.json @@ -90,7 +90,7 @@ }, "id": 8, "panels": [], - "title": "Row title", + "title": "Overview", "type": "row" }, { @@ -98,7 +98,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Router Requests Per Minute", + "description": "Requests Per Minute", "fieldConfig": { "defaults": { "color": { @@ -153,7 +153,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "sum(rate(router_response_latency_count{instance=\"$instance\"}[$__rate_interval]) * 60)", + "expr": "sum(rate(http_server_duration_count{instance=\"$instance\"}[$__rate_interval]) * 60)", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -163,7 +163,7 @@ "useBackend": false } ], - "title": "Router Requests", + "title": "Requests Per Minute", "type": "stat" }, { @@ -171,6 +171,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Percentile 99 of HTTP server duration.", "fieldConfig": { "defaults": { "color": { @@ -225,7 +226,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(histogram_quantile(0.99, sum by(le) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval]))))", + "expr": "avg(histogram_quantile(0.99, sum by(le) (rate(http_server_duration_bucket{instance=\"$instance\"}[$__rate_interval]))))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -235,7 +236,7 @@ "useBackend": false } ], - "title": "Router Latency p99", + "title": "Duration p99", "transparent": true, "type": "stat" }, @@ -296,8 +297,8 @@ "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(stage_duration_bucket{krakend_stage=\"pipe\"}[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(krakend_pipe_duration_bucket[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -367,8 +368,8 @@ "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(stage_duration_bucket{krakend_stage=\"backend\"}[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(krakend_backend_duration_bucket[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -391,7 +392,7 @@ }, "id": 4, "panels": [], - "title": "Router", + "title": "Global", "type": "row" }, { @@ -455,7 +456,7 @@ }, "gridPos": { "h": 8, - "w": 12, + "w": 6, "x": 0, "y": 7 }, @@ -463,8 +464,8 @@ "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom", + "displayMode": "table", + "placement": "right", "showLegend": true }, "tooltip": { @@ -479,8 +480,8 @@ "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(http_server_duration_bucket{instance=\"$instance\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -490,10 +491,27 @@ "useBackend": false } ], - "title": "95p Router Endpoint Latencies", + "title": "95p Endpoint Latencies", "type": "timeseries" }, { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 3, + "panels": [], + "title": "Pipeline", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { "color": { @@ -549,17 +567,17 @@ "overrides": [] }, "gridPos": { - "h": 8, + "h": 11, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 16 }, - "id": 13, + "id": 14, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom", + "displayMode": "table", + "placement": "right", "showLegend": true }, "tooltip": { @@ -567,7 +585,25 @@ "sort": "none" } }, - "title": "Panel Title", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(krakend_pipe_duration_bucket{instance=\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "95p pipe duration", "type": "timeseries" }, { @@ -576,25 +612,111 @@ "h": 1, "w": 24, "x": 0, - "y": 15 + "y": 27 }, - "id": 3, + "id": 2, "panels": [], - "title": "Pipeline", + "title": "Backends", "type": "row" }, { - "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { - "h": 1, - "w": 24, + "h": 12, + "w": 12, "x": 0, - "y": 16 + "y": 28 }, - "id": 2, - "panels": [], - "title": "Backends", - "type": "row" + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(krakend_backend_duration_bucket{instance=\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": true, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "95p backend duration", + "type": "timeseries" }, { "datasource": { @@ -656,17 +778,17 @@ "overrides": [] }, "gridPos": { - "h": 15, - "w": 24, - "x": 0, - "y": 17 + "h": 12, + "w": 12, + "x": 12, + "y": 28 }, "id": 1, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom", + "displayMode": "table", + "placement": "right", "showLegend": true }, "tooltip": { @@ -681,18 +803,208 @@ "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, url_path, server_address, otel_scope_name) (rate(response_latency_bucket{instance=\"$instance\"}[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, url_path, server_address) (rate(http_client_duration_bucket{instance=\"$instance\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": true, - "legendFormat": "__auto", + "legendFormat": "{{server_address}}{{url_path}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "95p backends latency", + "title": "95p requests duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " sum by (http_request_method_original, krakend_endpoint, server_address, url_path) (rate(http_client_requests_started_count_total[1m]))", + "instant": false, + "legendFormat": "{{http_request_method_original}} {{server_address}}{{url_path}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests Started", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": " sum by (http_request_method_original, krakend_endpoint, server_address, url_path) (rate(http_client_requests_started_count_total[1m]))", + "instant": false, + "legendFormat": "{{http_request_method_original}} {{server_address}}{{url_path}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests Started", "type": "timeseries" }, { @@ -701,7 +1013,7 @@ "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 52 }, "id": 9, "panels": [], @@ -743,7 +1055,7 @@ "h": 19, "w": 24, "x": 0, - "y": 33 + "y": 53 }, "id": 10, "options": { @@ -825,13 +1137,13 @@ ] }, "time": { - "from": "now-5m", + "from": "now-3h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "KrakenD OTEL", "uid": "d7e631a4-afbc-4267-91e8-b9492bcd801b", - "version": 9, + "version": 3, "weekStart": "" } \ No newline at end of file diff --git a/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.json b/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.json index d43fd44..5fd3fb6 100644 --- a/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.json +++ b/example/docker_compose/conf.local/grafana/KrakenD_OTEL_Dashboard.json @@ -19,7 +19,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 3, + "id": 1, "links": [], "liveNow": false, "panels": [ @@ -96,7 +96,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "sum(rate(router_response_latency_count{instance=\"$instance\"}[$__rate_interval]) * 60)", + "expr": "sum(rate(global_response_latency_count{instance=\"$instance\"}[$__rate_interval]) * 60)", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -168,7 +168,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(histogram_quantile(0.99, sum by(le) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval]))))", + "expr": "avg(histogram_quantile(0.99, sum by(le) (rate(global_response_latency_bucket{instance=\"$instance\"}[$__rate_interval]))))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -422,8 +422,8 @@ "uid": "b9564da7-2fba-447a-934e-e0217745974e" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(global_response_latency_bucket{instance=\"$instance\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -436,83 +436,6 @@ "title": "95p Router Endpoint Latencies", "type": "timeseries" }, - { - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 7 - }, - "id": 13, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "title": "Panel Title", - "type": "timeseries" - }, { "collapsed": false, "gridPos": { @@ -654,7 +577,7 @@ { "datasource": { "type": "tempo", - "uid": "e7eea071-7df9-44ff-8a6a-155248d05f42" + "uid": "af13a7d6-b296-4f6e-978a-942e087b2c9e" }, "fieldConfig": { "defaults": { @@ -706,7 +629,7 @@ { "datasource": { "type": "tempo", - "uid": "e7eea071-7df9-44ff-8a6a-155248d05f42" + "uid": "af13a7d6-b296-4f6e-978a-942e087b2c9e" }, "filters": [ { @@ -779,6 +702,6 @@ "timezone": "", "title": "KrakenD OTEL", "uid": "d7e631a4-afbc-4267-91e8-b9492bcd801b", - "version": 9, + "version": 2, "weekStart": "" } \ No newline at end of file diff --git a/example/docker_compose/conf.local/grafana/get_data_sources.sh b/example/docker_compose/conf.local/grafana/get_data_sources.sh index 29887ca..3fd1ce4 100644 --- a/example/docker_compose/conf.local/grafana/get_data_sources.sh +++ b/example/docker_compose/conf.local/grafana/get_data_sources.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl http://krakend:krakend@localhost:3000/api/datasources +curl http://krakend:krakend@localhost:53000/api/datasources diff --git a/example/docker_compose/conf.local/grafana/set_dashboards.sh b/example/docker_compose/conf.local/grafana/set_dashboards.sh index f244401..84b22f0 100644 --- a/example/docker_compose/conf.local/grafana/set_dashboards.sh +++ b/example/docker_compose/conf.local/grafana/set_dashboards.sh @@ -1,11 +1,11 @@ #!/bin/bash -echo '{ "dashboard": ' > tmp.json -cat KrakenD_OTEL_Dashboard.json >> tmp.json -echo '}' >> tmp.json +# echo '{ "dashboard": ' > tmp.json +# cat KrakenD_OTEL_Dashboard.json >> tmp.json +# echo '}' >> tmp.json curl -X POST --insecure --header "Content-Type: application/json" \ - http://krakend:krakend@localhost:3000/api/dashboards/db \ + http://krakend:krakend@localhost:53000/api/dashboards/db \ -d @tmp.json # rm tmp.json diff --git a/example/docker_compose/conf.local/grafana/set_data_sources.sh b/example/docker_compose/conf.local/grafana/set_data_sources.sh index fb4062c..f93b61f 100644 --- a/example/docker_compose/conf.local/grafana/set_data_sources.sh +++ b/example/docker_compose/conf.local/grafana/set_data_sources.sh @@ -1,13 +1,13 @@ #!/bin/bash curl -X POST --header "Content-Type: application/json" \ - http://krakend:krakend@localhost:3000/api/datasources \ + http://krakend:krakend@localhost:53000/api/datasources \ -d @prometheus_datasource.json curl -X POST --header "Content-Type: application/json" \ - http://krakend:krakend@localhost:3000/api/datasources \ + http://krakend:krakend@localhost:53000/api/datasources \ -d @tempo_datasource.json curl -X POST --header "Content-Type: application/json" \ - http://krakend:krakend@localhost:3000/api/datasources \ + http://krakend:krakend@localhost:53000/api/datasources \ -d @loki_datasource.json diff --git a/example/docker_compose/conf.local/grafana/tmp.json b/example/docker_compose/conf.local/grafana/tmp.json index 92da143..7b105a9 100644 --- a/example/docker_compose/conf.local/grafana/tmp.json +++ b/example/docker_compose/conf.local/grafana/tmp.json @@ -1,36 +1,5 @@ { "dashboard": { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": {}, - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "10.2.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], "annotations": { "list": [ { @@ -51,14 +20,530 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, + "id": 3, "links": [], "liveNow": false, "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 8, + "panels": [], + "title": "Row title", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "description": "Router Requests Per Minute", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(rate(router_response_latency_count{instance=\"$instance\"}[$__rate_interval]) * 60)", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Router Requests", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 7, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(histogram_quantile(0.99, sum by(le) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval]))))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Router Latency p99", + "transparent": true, + "type": "stat" + }, { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.99, sum by(le) (rate(stage_duration_bucket{krakend_stage=\"pipe\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Pipe Stage Duration 99p", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.99, sum by(le) (rate(stage_duration_bucket{krakend_stage=\"backend\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Backend Stage Duration 99p", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 4, + "panels": [], + "title": "Router", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, url_path) (rate(router_response_latency_bucket{instance=\"$instance\"}[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "95p Router Endpoint Latencies", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "title": "Panel Title", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 3, + "panels": [], + "title": "Pipeline", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 2, + "panels": [], + "title": "Backends", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "b9564da7-2fba-447a-934e-e0217745974e" }, "fieldConfig": { "defaults": { @@ -118,7 +603,7 @@ "h": 15, "w": 24, "x": 0, - "y": 0 + "y": 17 }, "id": 1, "options": { @@ -137,7 +622,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "b9564da7-2fba-447a-934e-e0217745974e" }, "disableTextWrap": false, "editorMode": "builder", @@ -153,18 +638,118 @@ ], "title": "95p backends latency", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 9, + "panels": [], + "title": "Tracing", + "type": "row" + }, + { + "datasource": { + "type": "tempo", + "uid": "e7eea071-7df9-44ff-8a6a-155248d05f42" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 19, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 10, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "af13a7d6-b296-4f6e-978a-942e087b2c9e" + }, + "filters": [ + { + "id": "2adf4ac6", + "operator": "=", + "scope": "span" + }, + { + "id": "service-name", + "operator": "=", + "scope": "resource", + "tag": "service.name", + "value": [], + "valueType": "string" + } + ], + "key": "Q-7e494413-0c5c-41b3-bf83-a0c8dc0b4205-0", + "limit": 10, + "queryType": "traceqlSearch", + "refId": "A", + "spss": 100, + "tableType": "traces" + } + ], + "title": "New Panel", + "type": "table" } ], - "refresh": "30s", + "refresh": "5s", "schemaVersion": 38, "tags": [], "templating": { "list": [ { - "current": {}, + "current": { + "selected": true, + "text": "192.168.1.12:59091", + "value": "192.168.1.12:59091" + }, "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "b9564da7-2fba-447a-934e-e0217745974e" }, "definition": "label_values(instance)", "description": "The Instance that we want to monitor", @@ -188,13 +773,12 @@ ] }, "time": { - "from": "now-15m", + "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "KrakenD OTEL", - "uid": "d7e631a4-afbc-4267-91e8-b9492bcd801b", - "version": 2, + "version": 9, "weekStart": "" }} diff --git a/example/docker_compose/conf.local/krakend_back/configuration.json b/example/docker_compose/conf.local/krakend_back/configuration.json index 9df30db..78536ad 100644 --- a/example/docker_compose/conf.local/krakend_back/configuration.json +++ b/example/docker_compose/conf.local/krakend_back/configuration.json @@ -104,14 +104,16 @@ { "name": "local_tempo", "host": "127.0.0.1", - "port": 54317, - "use_http": false + "port": 53317, + "use_http": false, + "disable_metrics": true }, { "name": "local_jaeger", "host": "127.0.0.1", "port": 55317, - "use_http": false + "use_http": false, + "disable_metrics": true } ] }, diff --git a/example/docker_compose/conf.local/krakend_front/configuration.json b/example/docker_compose/conf.local/krakend_front/configuration.json index 2f6d3ad..e757d3d 100644 --- a/example/docker_compose/conf.local/krakend_front/configuration.json +++ b/example/docker_compose/conf.local/krakend_front/configuration.json @@ -98,7 +98,7 @@ "detailed_connection": true, "static_attributes":[ { - "name": "my_metric_attr", + "key": "my_metric_attr", "value": "my_metric_val" } ] @@ -110,7 +110,7 @@ "detailed_connection": true, "static_attributes": [ { - "name": "my_trace_attr", + "key": "my_trace_attr", "value": "my_trace_val" } ] @@ -130,15 +130,17 @@ { "name": "local_tempo", "host": "127.0.0.1", - "port": 54317, - "use_http": false + "port": 53317, + "use_http": false, + "disable_metrics": true }, { "name": "local_jaeger", "kind": "opentelemetry", "host": "127.0.0.1", "port": 55317, - "use_http": false + "use_http": false, + "disable_metrics": true } ] } diff --git a/example/docker_compose/conf.local/krakend_middle/configuration.json b/example/docker_compose/conf.local/krakend_middle/configuration.json index 7a7bd49..1170b97 100644 --- a/example/docker_compose/conf.local/krakend_middle/configuration.json +++ b/example/docker_compose/conf.local/krakend_middle/configuration.json @@ -84,14 +84,16 @@ { "name": "local_tempo", "host": "127.0.0.1", - "port": 54317, - "use_http": false + "port": 53317, + "use_http": false, + "disable_metrics": true }, { "name": "local_jaeger", "host": "127.0.0.1", "port": 55317, - "use_http": false + "use_http": false, + "disable_metrics": true } ] } diff --git a/example/make_requests.sh b/example/make_requests.sh index 0165123..fd7301a 100644 --- a/example/make_requests.sh +++ b/example/make_requests.sh @@ -1,17 +1,21 @@ #!/bin/bash -for i in {1..1} -# for i in {1..1000} -do - # curl localhost:54444/fake/fsf - # curl localhost:54444/combination/2 - curl localhost:54444/combination/1 - # curl localhost:54444/direct/slow - sleep 0.1 - # curl localhost:54444/direct/delayed - # curl localhost:54444/direct/drop -done +docker run --rm -i -v $PWD:/app -w /app grafana/k6 new -# curl localhost:44444/fake/fsf | jq -# echo -e "\n" -# curl localhost:44444/combination/23 | jq + + +## for i in {1..1} +## # for i in {1..1000} +## do +## # curl localhost:54444/fake/fsf +## # curl localhost:54444/combination/2 +## curl localhost:54444/combination/1 +## # curl localhost:54444/direct/slow +## sleep 0.1 +## # curl localhost:54444/direct/delayed +## # curl localhost:54444/direct/drop +## done +## +## # curl localhost:44444/fake/fsf | jq +## # echo -e "\n" +## # curl localhost:44444/combination/23 | jq diff --git a/http/client/transport.go b/http/client/transport.go index 21eb6cf..b17a7d9 100644 --- a/http/client/transport.go +++ b/http/client/transport.go @@ -97,7 +97,7 @@ func readWrapperBuilder(metricsOpts *TransportMetricsOptions, tracesOpts *Transp t = tracer } - irf := otelio.NewInstrumentedReaderFactory(attrT, attrM, t, m) + irf := otelio.NewInstrumentedReaderFactory("http.client.request.read.", attrT, attrM, t, m) return func(r io.Reader, ctx context.Context) io.ReadCloser { return irf(r, ctx) } diff --git a/http/client/transport_metrics.go b/http/client/transport_metrics.go index bd0d0d5..58ecc9a 100644 --- a/http/client/transport_metrics.go +++ b/http/client/transport_metrics.go @@ -76,21 +76,21 @@ func newTransportMetrics(metricsOpts *TransportMetricsOptions, meter metric.Mete } var tm transportMetrics - tm.requestsStarted, _ = meter.Int64Counter("requests-started-count") // number of reqs started - tm.requestsFailed, _ = meter.Int64Counter("requests-failed-count") // number of reqs failed - tm.requestsCanceled, _ = meter.Int64Counter("requests-canceled-count") // number of canceled requests - tm.requestsTimedOut, _ = meter.Int64Counter("requests-timedout-count") // numer of timedout request (inclued in failed) + tm.requestsStarted, _ = meter.Int64Counter("http.client.requests.started.count") // number of reqs started + tm.requestsFailed, _ = meter.Int64Counter("http.client.requests.failed.count") // number of reqs failed + tm.requestsCanceled, _ = meter.Int64Counter("http.client.requests.canceled.count") // number of canceled requests + tm.requestsTimedOut, _ = meter.Int64Counter("http.client.requests.timedout.count") // numer of timedout request (inclued in failed) - tm.requestContentLength, _ = meter.Int64Counter("requests-content-length") // the value of the Content-Length header for the request + tm.requestContentLength, _ = meter.Int64Counter("http.client.requests.content-length") // the value of the Content-Length header for the request - tm.responseLatency, _ = meter.Float64Histogram("response-latency", timeBucketsOpt) + tm.responseLatency, _ = meter.Float64Histogram("http.client.duration", timeBucketsOpt) - tm.responseContentLength, _ = meter.Int64Histogram("response-content-length", sizeBucketsOpt) + tm.responseContentLength, _ = meter.Int64Histogram("http.client.response.content-length", sizeBucketsOpt) tm.detailsEnabled = metricsOpts.DetailedConnection - tm.getConnLatency, _ = meter.Float64Histogram("request-get-conn-latency", timeBucketsOpt) - tm.dnsLatency, _ = meter.Float64Histogram("request-dns-latency", timeBucketsOpt) - tm.tlsLatency, _ = meter.Float64Histogram("request-tls-latency", timeBucketsOpt) + tm.getConnLatency, _ = meter.Float64Histogram("http.client.request.get-conn-duration", timeBucketsOpt) + tm.dnsLatency, _ = meter.Float64Histogram("http.client.request.dns-duration", timeBucketsOpt) + tm.tlsLatency, _ = meter.Float64Histogram("http.client.request.tls-duration", timeBucketsOpt) return &tm } diff --git a/http/server/metrics.go b/http/server/metrics.go index faca7d3..e3da7e6 100644 --- a/http/server/metrics.go +++ b/http/server/metrics.go @@ -32,8 +32,8 @@ type metricsHTTP struct { func newMetricsHTTP(meter metric.Meter, attrs []attribute.KeyValue) *metricsHTTP { var m metricsHTTP - m.latency, _ = meter.Float64Histogram("global-response-latency", timeBucketsOpt) - m.size, _ = meter.Int64Histogram("global-response-size", sizeBucketsOpt) + m.latency, _ = meter.Float64Histogram("http.server.duration", timeBucketsOpt) + m.size, _ = meter.Int64Histogram("http.server.response.size", sizeBucketsOpt) if len(attrs) > 0 { m.fixedAttrs = make([]attribute.KeyValue, len(attrs)) copy(m.fixedAttrs, attrs) @@ -47,6 +47,7 @@ func (m *metricsHTTP) report(t *tracking) { return } dynAttrsOpts := metric.WithAttributes( + semconv.URLPath(t.endpointPattern), semconv.HTTPResponseStatusCode(t.responseStatus), ) m.latency.Record(t.ctx, t.latencyInSecs, m.fixedAttrsOpts, dynAttrsOpts) diff --git a/io/instruments.go b/io/instruments.go index df3be41..38e91d4 100644 --- a/io/instruments.go +++ b/io/instruments.go @@ -4,7 +4,6 @@ package otelio import ( "fmt" - "strings" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -62,10 +61,7 @@ func newInstruments(prefix string, tracer trace.Tracer, meter metric.Meter, ) *instruments { if prefix == "" { - prefix = "io-" - } - if !strings.HasSuffix(prefix, "-") { - prefix = prefix + "-" + prefix = "io." } strSizeCount := fmt.Sprintf("%ssize", prefix) strSizeHistogram := fmt.Sprintf("%ssize-hist", prefix) @@ -74,7 +70,7 @@ func newInstruments(prefix string, strErrorsMeter := fmt.Sprintf("%serrors", prefix) nopMProvider := noopmetric.NewMeterProvider() - nopM := nopMProvider.Meter(fmt.Sprintf("%s-nop-tracker", prefix)) + nopM := nopMProvider.Meter(fmt.Sprintf("%snop-tracker", prefix)) sizeCount, _ := nopM.Int64Counter(strSizeCount) sizeHistogram, _ := nopM.Int64Histogram(strSizeHistogram, sizeBucketsOpt) timeCount, _ := nopM.Float64Counter(strTimeCount) diff --git a/io/reader.go b/io/reader.go index 759ddff..23233a8 100644 --- a/io/reader.go +++ b/io/reader.go @@ -22,10 +22,13 @@ type instrumentedReader struct { // NewInstrumentedReaderFactory creates a function that can wrap a reader with // an instrumented reader. Is better than the [NewIntrumentedReader] call because // the instruments here are only created once. -func NewInstrumentedReaderFactory(attrT []attribute.KeyValue, attrM []attribute.KeyValue, +func NewInstrumentedReaderFactory(prefix string, attrT []attribute.KeyValue, attrM []attribute.KeyValue, tracer trace.Tracer, meter metric.Meter, ) func(io.Reader, context.Context) *instrumentedReader { - instr := newInstruments("read-", attrT, attrM, tracer, meter) + if prefix == "" { + prefix = "read." + } + instr := newInstruments(prefix, attrT, attrM, tracer, meter) return func(r io.Reader, ctx context.Context) *instrumentedReader { rc, ok := r.(io.ReadCloser) @@ -44,11 +47,11 @@ func NewInstrumentedReaderFactory(attrT []attribute.KeyValue, attrM []attribute. // NewInstrumentedReader wraps a reader with an instrumented reader. // Is better to use [NewInstrumentedReaderFactory]. -func NewInstrumentedReader(r io.Reader, ctx context.Context, +func NewInstrumentedReader(prefix string, r io.Reader, ctx context.Context, attrT []attribute.KeyValue, attrM []attribute.KeyValue, tracer trace.Tracer, meter metric.Meter, ) *instrumentedReader { - return NewInstrumentedReaderFactory(attrT, attrM, tracer, meter)(r, ctx) + return NewInstrumentedReaderFactory(prefix, attrT, attrM, tracer, meter)(r, ctx) } // Read wraps the reader's Read and keeps track of the diff --git a/io/writer.go b/io/writer.go index 61799de..6368a6e 100644 --- a/io/writer.go +++ b/io/writer.go @@ -31,10 +31,13 @@ type instrumentedWriter struct { // NewInstrumentedWriter creates a function that can wrap a writer with // an instrumented writer. Is better than the [NewIntrumentedWriter] call because // the instruments here are only created once. -func NewInstrumentedWriterFactory(attrT []attribute.KeyValue, attrM []attribute.KeyValue, +func NewInstrumentedWriterFactory(prefix string, attrT []attribute.KeyValue, attrM []attribute.KeyValue, tracer trace.Tracer, meter metric.Meter, ) func(io.Writer, context.Context) *instrumentedWriter { - instr := newInstruments("written-", attrT, attrM, tracer, meter) + if prefix == "" { + prefix = "written." + } + instr := newInstruments(prefix, attrT, attrM, tracer, meter) return func(w io.Writer, ctx context.Context) *instrumentedWriter { return &instrumentedWriter{ @@ -49,11 +52,11 @@ func NewInstrumentedWriterFactory(attrT []attribute.KeyValue, attrM []attribute. // NewInstrumentedWriter wraps a writer with an instrumented writer. // Is better to use [NewInstrumentedWriterFactory]. -func NewInstrumentedWriter(w io.Writer, ctx context.Context, +func NewInstrumentedWriter(prefix string, w io.Writer, ctx context.Context, attrT []attribute.KeyValue, attrM []attribute.KeyValue, tracer trace.Tracer, meter metric.Meter, ) *instrumentedWriter { - return NewInstrumentedWriterFactory(attrT, attrM, tracer, meter)(w, ctx) + return NewInstrumentedWriterFactory(prefix, attrT, attrM, tracer, meter)(w, ctx) } // Write wraps the writer Write and keeps track of the diff --git a/lura/attributes.go b/lura/attributes.go index 497e5d0..aa9a75a 100644 --- a/lura/attributes.go +++ b/lura/attributes.go @@ -18,24 +18,14 @@ import ( // - the method: one of the `GET`, `POST`, `PUT` .. etc // - the "path" , that is actually the path "template" to not have different values // for different params but the same endpoint. -// - the krakend stage, that can be one of -// - router: includes from the very point of receiving a request until -// a response is returned to the client. -// - pipe: includes all the processing that is performed -// for the endpoint part of a request (like merging and grouping -// responses from different backends). -// - backend: includes all middlewares and processing that is done for -// a given backend. -// - backend-request: when reporting the request to the backends // - server address: the host for the request -func backendConfigAttributes(cfg *config.Backend, stage string) []attribute.KeyValue { +func backendConfigAttributes(cfg *config.Backend) []attribute.KeyValue { urlPattern := kotelconfig.NormalizeURLPattern(cfg.URLPattern) parentEndpoint := kotelconfig.NormalizeURLPattern(cfg.ParentEndpoint) attrs := []attribute.KeyValue{ semconv.HTTPRequestMethodOriginal(cfg.Method), semconv.URLPath(urlPattern), // <- for traces we can use URLFull to not have the matched path - attribute.String("krakend.stage", stage), attribute.String("krakend.endpoint", parentEndpoint), } numHosts := len(cfg.Host) diff --git a/lura/backend.go b/lura/backend.go index 5e6579d..c2e663c 100644 --- a/lura/backend.go +++ b/lura/backend.go @@ -68,35 +68,29 @@ func InstrumentedHTTPClientFactory(clientFactory transport.HTTPClientFactory, } urlPattern := otelconfig.NormalizeURLPattern(cfg.URLPattern) - - // we set a basic list of attributes that will be set for both traces and - // metrics, as those are expected to have low cardinality - // - the method: one of the `GET`, `POST`, `PUT` .. etc - // - the "path" , that is actually the path "template" to not have different values - // for different params but the same endpoint. - // - the krakend stage, that can be one of - // - router: includes from the very point of receiving a request until - // a response is returned to the client. - // - pipe: includes all the processing that is performed - // for the endpoint part of a request (like merging and grouping - // responses from different backends). - // - backend: includes all middlewares and processing that is done for - // a given backend. - // - backend-request: when reporting the request to the backends - attrs := backendConfigAttributes(cfg, "backend-request") + attrs := backendConfigAttributes(cfg) metricAttrs := attrs if len(opts.Metrics.StaticAttributes) > 0 { for _, kv := range opts.Metrics.StaticAttributes { - metricAttrs = append(metricAttrs, attribute.String(kv.Key, kv.Value)) + if len(kv.Key) > 0 && len(kv.Value) > 0 { + metricAttrs = append(metricAttrs, attribute.String(kv.Key, kv.Value)) + } } } - traceAttrs := attrs + + traceAttrs := make([]attribute.KeyValue, len(attrs), + len(attrs)+1+len(opts.Traces.StaticAttributes)) + copy(traceAttrs, attrs) + traceAttrs = append(traceAttrs, attribute.String("krakend.stage", "backend-request")) if len(opts.Traces.StaticAttributes) > 0 { - for _, kv := range opts.Metrics.StaticAttributes { - traceAttrs = append(traceAttrs, attribute.String(kv.Key, kv.Value)) + for _, kv := range opts.Traces.StaticAttributes { + if len(kv.Key) > 0 && len(kv.Value) > 0 { + traceAttrs = append(traceAttrs, attribute.String(kv.Key, kv.Value)) + } } } + t := clienthttp.TransportOptions{ MetricsOpts: clienthttp.TransportMetricsOptions{ RoundTrip: opts.Metrics.RoundTrip, diff --git a/lura/proxy.go b/lura/proxy.go index 5de6fbe..df4ab14 100644 --- a/lura/proxy.go +++ b/lura/proxy.go @@ -35,13 +35,27 @@ var ( // Middleware creates a proxy that instruments the proxy it wraps by creating an span if enabled, // and report the duration of this stage in metrics if enabled. -func Middleware(staticAttrs []attribute.KeyValue, gsf state.GetterFn, metricsEnabled bool, tracesEnabled bool, - spanName string, -) proxy.Middleware { +func Middleware(gsf state.GetterFn, metricsEnabled bool, tracesEnabled bool, + stageName string, urlPattern string, staticAttrs []attribute.KeyValue) proxy.Middleware { + if gsf == nil { gsf = state.GlobalState } + mAttrs := make([]attribute.KeyValue, 0, len(staticAttrs)+1) + tAttrs := make([]attribute.KeyValue, 0, len(staticAttrs)+1) + + mAttrs = append(mAttrs, semconv.URLPath(urlPattern)) + tAttrs = append(tAttrs, attribute.String("krakend.stage", stageName)) + + for _, sa := range staticAttrs { + mAttrs = append(mAttrs, sa) + tAttrs = append(tAttrs, sa) + } + + metricAttrs := metric.WithAttributes(mAttrs...) + metricPrefix := "krakend." + stageName + return func(next ...proxy.Proxy) proxy.Proxy { if len(next) > 1 { panic(proxy.ErrTooManyProxies) @@ -58,19 +72,19 @@ func Middleware(staticAttrs []attribute.KeyValue, gsf state.GetterFn, metricsEna // measure the time it takes to process all reportMetrics := metricsEnabled meter := gs.Meter() - duration, err := meter.Float64Histogram("stage-duration", timeBucketsOpt) + duration, err := meter.Float64Histogram(metricPrefix+".duration", timeBucketsOpt) if err != nil { reportMetrics = false } - metricAttrs := metric.WithAttributes(staticAttrs...) reportTrace := tracesEnabled tracer := gs.Tracer() + return func(ctx context.Context, req *proxy.Request) (*proxy.Response, error) { var span trace.Span if reportTrace { - ctx, span = tracer.Start(ctx, spanName) - span.SetAttributes(staticAttrs...) + ctx, span = tracer.Start(ctx, urlPattern) + span.SetAttributes(tAttrs...) } startedAt := time.Now() @@ -127,11 +141,8 @@ func ProxyFactory(pf proxy.Factory, gsfn state.GetterFn, opts *kotelconfig.PipeO } urlPattern := kotelconfig.NormalizeURLPattern(cfg.Endpoint) - staticAttrs := []attribute.KeyValue{ - semconv.URLPath(urlPattern), - attribute.String("krakend.stage", "pipe"), - } - return Middleware(staticAttrs, gsfn, metricsEnabled, tracesEnabled, urlPattern)(next), nil + return Middleware(gsfn, metricsEnabled, tracesEnabled, "pipe", urlPattern, + []attribute.KeyValue{})(next), nil } } @@ -156,9 +167,8 @@ func OTELBackendFactory(bf proxy.BackendFactory, gsfn state.GetterFn, metricsEna return next } } - staticAttrs := backendConfigAttributes(cfg, "backend") - + staticAttrs := backendConfigAttributes(cfg) urlPattern := kotelconfig.NormalizeURLPattern(cfg.URLPattern) - return Middleware(staticAttrs, gsfn, metricsEnabled, tracesEnabled, urlPattern)(next) + return Middleware(gsfn, metricsEnabled, tracesEnabled, "backend", urlPattern, staticAttrs)(next) } }