Skip to content

Commit

Permalink
Add metric for debug traces (#796)
Browse files Browse the repository at this point in the history
  • Loading branch information
black-adder authored May 25, 2018
1 parent 70dd38f commit 037875b
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 52 deletions.
87 changes: 46 additions & 41 deletions cmd/collector/app/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

const (
maxServiceNames = 2000
otherServices = "other-services"
)

// SpanProcessorMetrics contains all the necessary metrics for the SpanProcessor
Expand All @@ -47,15 +48,16 @@ type SpanProcessorMetrics struct { //TODO - initialize metrics in the traditiona
}

type countsBySvc struct {
counts map[string]metrics.Counter // counters per service
factory metrics.Factory
lock *sync.Mutex
counts map[string]metrics.Counter // counters per service
debugCounts map[string]metrics.Counter // debug counters per service
factory metrics.Factory
lock *sync.Mutex
maxServiceNames int
}

type metricsBySvc struct {
spans countsBySvc // number of spans received per service
debugSpans countsBySvc // number of debug spans received per service
traces countsBySvc // number of traces originated per service
spans countsBySvc // number of spans received per service
traces countsBySvc // number of traces originated per service
}

// CountsBySpanType measures received, rejected, and receivedByService metrics for a format type
Expand Down Expand Up @@ -92,22 +94,25 @@ func NewSpanProcessorMetrics(serviceMetrics metrics.Factory, hostMetrics metrics
}

func newMetricsBySvc(factory metrics.Factory, category string) metricsBySvc {
spansFactory := factory.Namespace("spans."+category, nil)
tracesFactory := factory.Namespace("traces."+category, nil)
return metricsBySvc{
spans: countsBySvc{
counts: make(map[string]metrics.Counter),
factory: factory.Namespace("spans."+category, nil),
lock: &sync.Mutex{},
},
debugSpans: countsBySvc{
counts: make(map[string]metrics.Counter),
factory: factory.Namespace("debug-spans."+category, nil),
lock: &sync.Mutex{},
spans: newCountsBySvc(spansFactory, maxServiceNames),
traces: newCountsBySvc(tracesFactory, maxServiceNames),
}
}

func newCountsBySvc(factory metrics.Factory, maxServiceNames int) countsBySvc {
return countsBySvc{
counts: map[string]metrics.Counter{
otherServices: factory.Counter("", map[string]string{"service": otherServices, "debug": "false"}),
},
traces: countsBySvc{
counts: make(map[string]metrics.Counter),
factory: factory.Namespace("traces."+category, nil),
lock: &sync.Mutex{},
debugCounts: map[string]metrics.Counter{
otherServices: factory.Counter("", map[string]string{"service": otherServices, "debug": "true"}),
},
factory: factory,
lock: &sync.Mutex{},
maxServiceNames: maxServiceNames,
}
}

Expand All @@ -134,29 +139,21 @@ func (m metricsBySvc) ReportServiceNameForSpan(span *model.Span) {
if serviceName == "" {
return
}
m.countSpansByServiceName(serviceName)
if span.Flags.IsDebug() {
m.countDebugSpansByServiceName(serviceName)
}
m.countSpansByServiceName(serviceName, span.Flags.IsDebug())
if span.ParentSpanID() == 0 {
m.countTracesByServiceName(serviceName)
m.countTracesByServiceName(serviceName, span.Flags.IsDebug())
}
}

// countSpansByServiceName counts how many spans are received per service.
func (m metricsBySvc) countSpansByServiceName(serviceName string) {
m.spans.countByServiceName(serviceName)
}

// countDebugSpansByServiceName counts how many debug spans are received per service.
func (m metricsBySvc) countDebugSpansByServiceName(serviceName string) {
m.debugSpans.countByServiceName(serviceName)
func (m metricsBySvc) countSpansByServiceName(serviceName string, isDebug bool) {
m.spans.countByServiceName(serviceName, isDebug)
}

// countTracesByServiceName counts how many traces are received per service,
// i.e. the counter is only incremented for the root spans.
func (m metricsBySvc) countTracesByServiceName(serviceName string) {
m.traces.countByServiceName(serviceName)
func (m metricsBySvc) countTracesByServiceName(serviceName string, isDebug bool) {
m.traces.countByServiceName(serviceName, isDebug)
}

// countByServiceName maintains a map of counters for each service name it's
Expand All @@ -169,19 +166,27 @@ func (m metricsBySvc) countTracesByServiceName(serviceName string) {
// total number of stored counters, so if it exceeds say the 90% threshold
// an alert should be raised to investigate what's causing so many unique
// service names.
func (m *countsBySvc) countByServiceName(serviceName string) {
func (m *countsBySvc) countByServiceName(serviceName string, isDebug bool) {
serviceName = NormalizeServiceName(serviceName)
counts := m.counts
if isDebug {
counts = m.debugCounts
}
var counter metrics.Counter
m.lock.Lock()
if c, ok := m.counts[serviceName]; ok {
if c, ok := counts[serviceName]; ok {
counter = c
} else if len(m.counts) < maxServiceNames {
c := m.factory.Counter("", map[string]string{"service": serviceName})
m.counts[serviceName] = c
} else if len(counts) < m.maxServiceNames {
debugStr := "false"
if isDebug {
debugStr = "true"
}
c := m.factory.Counter("", map[string]string{"service": serviceName, "debug": debugStr})
counts[serviceName] = c
counter = c
} else {
counter = counts[otherServices]
}
m.lock.Unlock()
if counter != nil {
counter.Inc(1)
}
counter.Inc(1)
}
33 changes: 30 additions & 3 deletions cmd/collector/app/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,39 @@ func TestProcessorMetrics(t *testing.T) {
}
jFormat.ReceivedBySvc.ReportServiceNameForSpan(&mSpan)
mSpan.Flags.SetDebug()
jFormat.ReceivedBySvc.ReportServiceNameForSpan(&mSpan)
mSpan.ReplaceParentID(1234)
jFormat.ReceivedBySvc.ReportServiceNameForSpan(&mSpan)
counters, gauges := baseMetrics.LocalBackend.Snapshot()

assert.EqualValues(t, 2, counters["service.spans.received|format=jaeger|service=fry"])
assert.EqualValues(t, 1, counters["service.traces.received|format=jaeger|service=fry"])
assert.EqualValues(t, 1, counters["service.debug-spans.received|format=jaeger|service=fry"])
assert.EqualValues(t, 1, counters["service.spans.received|debug=false|format=jaeger|service=fry"])
assert.EqualValues(t, 2, counters["service.spans.received|debug=true|format=jaeger|service=fry"])
assert.EqualValues(t, 1, counters["service.traces.received|debug=false|format=jaeger|service=fry"])
assert.EqualValues(t, 1, counters["service.traces.received|debug=true|format=jaeger|service=fry"])
assert.Empty(t, gauges)
}

func TestNewCountsBySvc(t *testing.T) {
baseMetrics := jaegerM.NewLocalFactory(time.Hour)
metrics := newCountsBySvc(baseMetrics, 3)

metrics.countByServiceName("fry", false)
metrics.countByServiceName("leela", false)
metrics.countByServiceName("bender", false)
metrics.countByServiceName("zoidberg", false)

counters, _ := baseMetrics.LocalBackend.Snapshot()
assert.EqualValues(t, 1, counters["|debug=false|service=fry"])
assert.EqualValues(t, 1, counters["|debug=false|service=leela"])
assert.EqualValues(t, 2, counters["|debug=false|service=other-services"])

metrics.countByServiceName("zoidberg", true)
metrics.countByServiceName("bender", true)
metrics.countByServiceName("leela", true)
metrics.countByServiceName("fry", true)

counters, _ = baseMetrics.LocalBackend.Snapshot()
assert.EqualValues(t, 1, counters["|debug=true|service=zoidberg"])
assert.EqualValues(t, 1, counters["|debug=true|service=bender"])
assert.EqualValues(t, 2, counters["|debug=true|service=other-services"])
}
24 changes: 16 additions & 8 deletions cmd/collector/app/span_processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,27 @@ func TestBySvcMetrics(t *testing.T) {
} else {
panic("Unknown format")
}
expected := []metricsTest.ExpectedMetric{
{Name: metricPrefix + ".spans.received|format=" + format + "|service=" + test.serviceName, Value: 2},
}
expected := []metricsTest.ExpectedMetric{}
if test.debug {
expected = append(expected, metricsTest.ExpectedMetric{
Name: metricPrefix + ".debug-spans.received|format=" + format + "|service=" + test.serviceName, Value: 2,
Name: metricPrefix + ".spans.received|debug=true|format=" + format + "|service=" + test.serviceName, Value: 2,
})
}
if test.rootSpan {
} else {
expected = append(expected, metricsTest.ExpectedMetric{
Name: metricPrefix + ".traces.received|format=" + format + "|service=" + test.serviceName, Value: 2,
Name: metricPrefix + ".spans.received|debug=false|format=" + format + "|service=" + test.serviceName, Value: 2,
})
}
if test.rootSpan {
if test.debug {
expected = append(expected, metricsTest.ExpectedMetric{
Name: metricPrefix + ".traces.received|debug=true|format=" + format + "|service=" + test.serviceName, Value: 2,
})
} else {
expected = append(expected, metricsTest.ExpectedMetric{
Name: metricPrefix + ".traces.received|debug=false|format=" + format + "|service=" + test.serviceName, Value: 2,
})
}
}
if test.serviceName != blackListedService || test.debug {
// "error.busy" and "spans.dropped" are both equivalent to a span being accepted,
// because both are emitted when attempting to add span to the queue, and since
Expand All @@ -134,7 +142,7 @@ func TestBySvcMetrics(t *testing.T) {
})
} else {
expected = append(expected, metricsTest.ExpectedMetric{
Name: metricPrefix + ".spans.rejected|format=" + format + "|service=" + test.serviceName, Value: 2,
Name: metricPrefix + ".spans.rejected|debug=false|format=" + format + "|service=" + test.serviceName, Value: 2,
})
}
metricsTest.AssertCounterMetrics(t, mb, expected...)
Expand Down

0 comments on commit 037875b

Please sign in to comment.