-
Notifications
You must be signed in to change notification settings - Fork 137
Aggregation support for InfluxDBStore. #136
Changes from 1 commit
a8e5351
3a9d290
c065587
1ddd075
2fa0477
daf43b0
4dbd6c0
af53b2a
dbb4f3a
f484bba
65d9492
958b511
8b57bf4
ed37632
4e8ef32
094b223
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
package appdash | ||
|
||
import ( | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
"net/url" | ||
|
@@ -65,12 +66,39 @@ func (in *InfluxDBStore) Collect(id SpanID, anns ...Annotation) error { | |
"parent_id": id.Parent.String(), | ||
} | ||
|
||
// Find the start and end time of the span. | ||
var events []Event | ||
if err := UnmarshalEvents(anns, &events); err != nil { | ||
return err | ||
} | ||
var ( | ||
foundItems int | ||
name string | ||
duration time.Duration | ||
) | ||
for _, ev := range events { | ||
switch v := ev.(type) { | ||
case spanName: | ||
foundItems++ | ||
name = v.Name | ||
case TimespanEvent: | ||
foundItems++ | ||
duration = v.End().Sub(v.Start()) | ||
} | ||
} | ||
|
||
// Annotations `anns` are set as fields(InfluxDB does not index fields). | ||
fields := make(map[string]interface{}, len(anns)) | ||
for _, ann := range anns { | ||
fields[ann.Key] = string(ann.Value) | ||
} | ||
|
||
// If we have span name and duration, set them as a tag and field. | ||
if foundItems == 2 { | ||
tags["name"] = name | ||
fields["duration"] = float64(duration) / float64(time.Second) | ||
} | ||
|
||
// `schemasFieldName` field contains all the schemas found on `anns`. | ||
// Eg. fields[schemasFieldName] = "HTTPClient,HTTPServer" | ||
fields[schemasFieldName] = schemasFromAnnotations(anns) | ||
|
@@ -145,6 +173,85 @@ func (in *InfluxDBStore) Trace(id ID) (*Trace, error) { | |
return trace, nil | ||
} | ||
|
||
func mustJSONFloat64(x interface{}) float64 { | ||
n := x.(json.Number) | ||
v, err := n.Float64() | ||
if err != nil { | ||
panic(err) | ||
} | ||
return v | ||
} | ||
|
||
func mustJSONInt64(x interface{}) int64 { | ||
n := x.(json.Number) | ||
v, err := n.Int64() | ||
if err != nil { | ||
panic(err) | ||
} | ||
return v | ||
} | ||
|
||
// Aggregate implements the Aggregator interface. | ||
func (in *InfluxDBStore) Aggregate(start, end time.Duration) ([]*AggregatedResult, error) { | ||
// Find the mean (average), minimum, maximum, std. deviation, and count of | ||
// all spans. | ||
q := `SELECT MEAN("duration"),MIN("duration"),MAX("duration"),STDDEV("duration"),COUNT("duration") from spans` | ||
q += fmt.Sprintf( | ||
" WHERE time >= '%s' AND time <= '%s'", | ||
time.Now().Add(start).UTC().Format(time.RFC3339Nano), | ||
time.Now().Add(end).UTC().Format(time.RFC3339Nano), | ||
) | ||
q += ` GROUP BY "name"` | ||
result, err := in.executeOneQuery(q) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Populate the results. | ||
results := make([]*AggregatedResult, len(result.Series)) | ||
for i, row := range result.Series { | ||
v := row.Values[0] | ||
mean, min, max, stddev, count := v[1], v[2], v[3], v[4], v[5] | ||
results[i] = &AggregatedResult{ | ||
RootSpanName: row.Tags["name"], | ||
Average: time.Duration(mustJSONFloat64(mean) * float64(time.Second)), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we replace |
||
Min: time.Duration(mustJSONFloat64(min) * float64(time.Second)), | ||
Max: time.Duration(mustJSONFloat64(max) * float64(time.Second)), | ||
StdDev: time.Duration(mustJSONFloat64(stddev) * float64(time.Second)), | ||
Samples: mustJSONInt64(count), | ||
} | ||
} | ||
if len(result.Series) == 0 { | ||
return nil, nil | ||
} | ||
|
||
n := 5 | ||
if n > len(result.Series) { | ||
n = len(result.Series) | ||
} | ||
|
||
// Add in the N-slowest trace IDs for each span. | ||
// | ||
// TODO(slimsag): make N a pagination parameter instead. | ||
result, err = in.executeOneQuery(fmt.Sprintf(`SELECT TOP("duration",%d),trace_id FROM spans GROUP BY "name"`, n)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
for i, row := range result.Series { | ||
if row.Tags["name"] != results[i].RootSpanName { | ||
panic("expectation violated") // never happens, just for sanity. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we just return an error instead of panic?(as well in line 247); in order to avoid lib-users taking care about this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could, but if this ever happens ideally we would have some "very angry user" reporting it. That is, this should never ever happen and, in the event that it does, it is 100% a bug in Appdash. I would hate for an error like that to bubble up somewhere and have someone think it is their responsibility to handle it. |
||
} | ||
for _, vals := range row.Values { | ||
id, err := ParseID(vals[2].(string)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here should be relying on a fixed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very good idea! I will fix this. |
||
if err != nil { | ||
panic(err) // never happens, just for sanity. | ||
} | ||
results[i].Slowest = append(results[i].Slowest, id) | ||
} | ||
} | ||
return results, nil | ||
} | ||
|
||
func (in *InfluxDBStore) Traces(opts TracesOpts) ([]*Trace, error) { | ||
traces := make([]*Trace, 0) | ||
rootSpansQuery := fmt.Sprintf("SELECT * FROM spans WHERE parent_id='%s'", zeroID) | ||
|
@@ -598,6 +705,8 @@ func spansFromRow(row influxDBModels.Row) ([]*Span, error) { | |
|
||
// Checks if current column is some span's ID, if so set to the span & continue with next field. | ||
switch column { | ||
case "name", "duration": | ||
continue // aggregation | ||
case "trace_id": | ||
traceID, err := fieldToSpanID(field, errFieldType) | ||
if err != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great to see InfluxDB aggregation features landing here!, I think this is great working starting point 🌟
Also I'd like to mention that continuous queries let us have interesting room for performance improvements here, meaning we could be down-sampling the N-slowest spans every certain time, that down-sampled spans would be saved on a new measurement, perhaps called
slowest_spans
; so we would end-up querying spans on theslowest_spans
measurement, I think we can handle this in a new PR.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I appreciate the notes on continuous queries! I think I originally saw these but didn't understand exactly what they did, but your explanation makes it much more clear. Certainly a good idea to investigate in a follow-up PR.