Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

Block to submit accounting events #1010

Merged
merged 6 commits into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 143 additions & 11 deletions dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"gnetId": null,
"graphTooltip": 1,
"id": null,
"iteration": 1535621129449,
"iteration": 1535648268402,
"links": [],
"panels": [
{
Expand Down Expand Up @@ -3613,8 +3613,8 @@
"fill": 0,
"grid": {},
"gridPos": {
"h": 7,
"w": 12,
"h": 6,
"w": 8,
"x": 0,
"y": 74
},
Expand Down Expand Up @@ -3712,6 +3712,138 @@
"alignLevel": null
}
},
{
"aliasColors": {
"add-latency-p90": "#c15c17",
"max": "#890F02",
"used": "#3F6833",
"utilisation": "#6ed0e0"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"editable": true,
"error": false,
"fill": 0,
"grid": {},
"gridPos": {
"h": 6,
"w": 7,
"x": 8,
"y": 74
},
"id": 50,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "used",
"lines": false,
"pointradius": 1,
"points": true,
"yaxis": 2
},
{
"alias": "max",
"lines": false,
"pointradius": 1,
"points": true,
"yaxis": 2
},
{
"alias": "/latency/",
"fill": 2,
"linewidth": 0
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"refCount": 1,
"refId": "A",
"target": "alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.max.gauge64), 'max')",
"textEditor": false
},
{
"refCount": 1,
"refId": "B",
"target": "alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32), 'used')",
"textEditor": false
},
{
"refCount": 1,
"refId": "C",
"target": "alias(averageSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.add.latency.p90.gauge32), 'add-latency-p90')",
"textEditor": false
},
{
"hide": true,
"refCount": 0,
"refId": "D",
"target": "alias(divideSeries(#A,#B),'utilisation')",
"targetFull": "alias(divideSeries(alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.max.gauge64), 'max'),alias(sumSeries(metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32), 'used')),'utilisation')",
"textEditor": true
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "accounting",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
"add": "#629E51",
Expand All @@ -3731,9 +3863,9 @@
"fill": 10,
"grid": {},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"h": 6,
"w": 9,
"x": 15,
"y": 74
},
"id": 28,
Expand Down Expand Up @@ -3844,7 +3976,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 81
"y": 80
},
"id": 48,
"panels": [],
Expand All @@ -3871,7 +4003,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 82
"y": 81
},
"id": 32,
"legend": {
Expand Down Expand Up @@ -3993,7 +4125,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 82
"y": 81
},
"id": 15,
"legend": {
Expand Down Expand Up @@ -4173,5 +4305,5 @@
"timezone": "browser",
"title": "Metrictank",
"uid": "tQW3QShiz",
"version": 1
}
"version": 5
}
3 changes: 2 additions & 1 deletion docs/operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ If you expect consistent or predictable load, you may also want to monitor:

* `metrictank.stats.$environment.$instance.store.cassandra.chunk_operations.save_ok.counter32`: number of saved chunks (based on your chunkspan settings)
* `metrictank.stats.$environment.$instance.api.request_handle.values.rate32` : rate per second of render requests
* `metrictank.stats.$environment.$instance.input.*.*.received.counter32`: input counter (derive with perSecond(
* `metrictank.stats.$environment.$instance.input.*.*.received.counter32`: input counter (derive with perSecond)
* `metrictank.stats.$environment.$instance.cache.accounting.queue.size.used.max.gauge32`: accounting queue size, if this queue fills up, it will slow down requests (compare to size.max)


## Crash
Expand Down
13 changes: 6 additions & 7 deletions mdata/cache/accnt/flat_accnt.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package accnt

import (
"sort"
"time"

"github.com/grafana/metrictank/mdata/chunk"
"github.com/raintank/schema"
"github.com/raintank/worldping-api/pkg/log"
)

const evictQSize = 1000
Expand Down Expand Up @@ -110,6 +110,7 @@ func NewFlatAccnt(maxSize uint64) *FlatAccnt {
eventQ: make(chan FlatAccntEvent, EventQSize),
}
cacheSizeMax.SetUint64(maxSize)
accntEventQueueMax.SetUint64(uint64(EventQSize))

go accnt.eventLoop()
return &accnt
Expand Down Expand Up @@ -157,12 +158,10 @@ func (a *FlatAccnt) act(eType eventType, payload interface{}) {
pl: payload,
}

select {
// we never want to block for accounting, rather just let it miss some events and print an error
case a.eventQ <- event:
default:
log.Error(3, "Failed to submit event to accounting, channel was blocked")
}
pre := time.Now()
a.eventQ <- event
accntEventAddDuration.Value(time.Now().Sub(pre))
accntEventQueueUsed.Value(len(a.eventQ))
}

func (a *FlatAccnt) eventLoop() {
Expand Down
7 changes: 5 additions & 2 deletions mdata/cache/accnt/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ var (
// metric cache.ops.chunk.evict is how many chunks were evicted from the cache
cacheChunkEvict = stats.NewCounter32("cache.ops.chunk.evict")

cacheSizeMax = stats.NewGauge64("cache.size.max")
cacheSizeUsed = stats.NewGauge64("cache.size.used")
cacheSizeMax = stats.NewGauge64("cache.size.max")
cacheSizeUsed = stats.NewGauge64("cache.size.used")
accntEventAddDuration = stats.NewLatencyHistogram15s32("cache.accounting.queue.add")
accntEventQueueUsed = stats.NewRange32("cache.accounting.queue.size.used")
accntEventQueueMax = stats.NewGauge64("cache.accounting.queue.size.max")
)