This repository was archived by the owner on Aug 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 107
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1680 from grafana/parrot-init
mt-parrot: continuous validation by sending dummy stats and querying them back
- Loading branch information
Showing
11 changed files
with
530 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"github.com/grafana/metrictank/clock" | ||
"github.com/grafana/metrictank/schema" | ||
log "github.com/sirupsen/logrus" | ||
"sync/atomic" | ||
) | ||
|
||
func produceTestMetrics(metrics []*schema.MetricData) { | ||
for tick := range clock.AlignedTickLossless(testMetricsInterval) { | ||
for _, metric := range metrics { | ||
metric.Time = tick.Unix() | ||
metric.Value = float64(tick.Unix()) | ||
} | ||
publisher.Flush(metrics) | ||
atomic.StoreInt64(&lastPublish, tick.Unix()) | ||
log.Infof("flushed metrics for ts %d", tick.Unix()) | ||
} | ||
} | ||
|
||
//generateMetrics generates a MetricData that hashes to each of numPartitions partitions | ||
func generateMetrics(numPartitions int32) []*schema.MetricData { | ||
var metrics []*schema.MetricData | ||
for i := int32(0); i < numPartitions; i++ { | ||
metrics = append(metrics, generateMetric(i)) | ||
} | ||
return metrics | ||
} | ||
|
||
//generateMetric generates a single MetricData that hashes to the given partition | ||
func generateMetric(desiredPartition int32) *schema.MetricData { | ||
metric := schema.MetricData{ | ||
OrgId: orgId, | ||
Unit: "partyparrots", | ||
Mtype: "gauge", | ||
Interval: int(testMetricsInterval.Seconds()), | ||
} | ||
|
||
for i := 1; true; i++ { | ||
metric.Name = fmt.Sprintf("parrot.testdata.%d.identity.%s", desiredPartition, generatePartitionSuffix(i)) | ||
id, err := metric.PartitionID(partitionMethod, partitionCount) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
if id == desiredPartition { | ||
log.Infof("metric for partition %d: %s", desiredPartition, metric.Name) | ||
return &metric | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
var alphabet = []rune("abcdefghijklmnopqrstuvwxyz") | ||
|
||
//generatePartitionSuffix deterministically generates a suffix for partition by brute force | ||
func generatePartitionSuffix(i int) string { | ||
if i > 25 { | ||
return generatePartitionSuffix((i/26)-1) + string(alphabet[i%26]) | ||
} | ||
return string(alphabet[i%26]) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
package main | ||
|
||
import ( | ||
"os" | ||
"strings" | ||
"time" | ||
|
||
"github.com/grafana/metrictank/cmd/mt-fakemetrics/out" | ||
"github.com/grafana/metrictank/cmd/mt-fakemetrics/out/gnet" | ||
"github.com/grafana/metrictank/logger" | ||
"github.com/grafana/metrictank/schema" | ||
"github.com/grafana/metrictank/stats" | ||
"github.com/raintank/met/statsd" | ||
log "github.com/sirupsen/logrus" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var ( | ||
gatewayAddress string | ||
gatewayKey string | ||
orgId int | ||
partitionCount int32 | ||
partitionMethodString string | ||
testMetricsInterval time.Duration | ||
queryInterval time.Duration | ||
lookbackPeriod time.Duration | ||
logLevel string | ||
lastPublish int64 | ||
|
||
statsGraphite *stats.Graphite | ||
statsPrefix string | ||
statsAddr string | ||
statsBufferSize int | ||
statsTimeout time.Duration | ||
|
||
partitionMethod schema.PartitionByMethod | ||
publisher out.Out | ||
) | ||
|
||
func init() { | ||
parrotCmd.Flags().StringVar(&gatewayAddress, "gateway-address", "http://localhost:6059", "the url of the metrics gateway") | ||
parrotCmd.Flags().StringVar(&gatewayKey, "gateway-key", "", "the bearer token to include with gateway requests") | ||
parrotCmd.Flags().IntVar(&orgId, "org-id", 1, "org id to publish parrot metrics to") | ||
parrotCmd.Flags().Int32Var(&partitionCount, "partition-count", 8, "number of kafka partitions in use") | ||
parrotCmd.Flags().StringVar(&partitionMethodString, "partition-method", "bySeries", "the partition method in use on the gateway, must be one of bySeries|bySeriesWithTags|bySeriesWithTagsFnv") | ||
parrotCmd.Flags().DurationVar(&testMetricsInterval, "test-metrics-interval", 10*time.Second, "interval to send test metrics") | ||
parrotCmd.Flags().DurationVar(&queryInterval, "query-interval", 10*time.Second, "interval to query to validate metrics") | ||
parrotCmd.Flags().DurationVar(&lookbackPeriod, "lookback-period", 5*time.Minute, "how far to look back when validating metrics") | ||
parrotCmd.Flags().StringVar(&statsPrefix, "stats-prefix", "", "stats prefix (will add trailing dot automatically if needed)") | ||
parrotCmd.Flags().StringVar(&statsAddr, "stats-address", "localhost:2003", "address to send monitoring statistics to") | ||
parrotCmd.Flags().IntVar(&statsBufferSize, "stats-buffer-size", 20000, "how many messages (holding all measurements from one interval) to buffer up in case graphite endpoint is unavailable.") | ||
parrotCmd.Flags().DurationVar(&statsTimeout, "stats-timeout", time.Second*10, "timeout after which a write is considered not successful") | ||
|
||
parrotCmd.Flags().StringVar(&logLevel, "log-level", "info", "log level. panic|fatal|error|warning|info|debug") | ||
|
||
formatter := &logger.TextFormatter{} | ||
formatter.TimestampFormat = "2006-01-02 15:04:05.000" | ||
log.SetFormatter(formatter) | ||
} | ||
|
||
func main() { | ||
err := parrotCmd.Execute() | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
} | ||
|
||
var parrotCmd = &cobra.Command{ | ||
Use: "mt-parrot", | ||
Short: `generate deterministic metrics for each metrictank partition, query them back and report on correctness and performance | ||
Correctness: | ||
Monitor the parrot.monitoring.error series. There's 3 potential issues: | ||
* parrot.monitoring.error;error=http // could not execute http request | ||
* parrot.monitoring.error;error=decode // could not decode http response | ||
* parrot.monitoring.error;error=invalid // any other problem with the response itself | ||
Performance: | ||
In addition to these black-and-white measurements above, there are also more subjective measurements | ||
* parrot.monitoring.lag // how far the response is lagging behind | ||
* parrot.monitoring.nans // number of nans included in the response | ||
`, | ||
Run: func(cmd *cobra.Command, args []string) { | ||
lvl, err := log.ParseLevel(logLevel) | ||
if err != nil { | ||
log.Fatalf("failed to parse log-level, %s", err.Error()) | ||
} | ||
|
||
validateDurationsInSeconds() | ||
|
||
log.SetLevel(lvl) | ||
mustParsePartitionMethod() | ||
initGateway() | ||
initStats() | ||
|
||
metrics := generateMetrics(partitionCount) | ||
go produceTestMetrics(metrics) | ||
|
||
monitor() | ||
}, | ||
} | ||
|
||
// mustParsePartitionMethod parses the partitionScheme cli flag, | ||
// exiting if an invalid partition schema is entered or if org based partitioning is used (not currently allowed by parrot). | ||
func mustParsePartitionMethod() { | ||
var err error | ||
partitionMethod, err = schema.PartitonMethodFromString(partitionMethodString) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
if partitionMethod == schema.PartitionByOrg { | ||
log.Fatal("byOrg not supported") | ||
} | ||
} | ||
|
||
func initGateway() { | ||
var err error | ||
backend, _ := statsd.New(false, "", "") | ||
publisher, err = gnet.New(gatewayAddress+"/metrics", gatewayKey, backend) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
log.Info("gateway initialized") | ||
} | ||
|
||
func initStats() { | ||
hostname, _ := os.Hostname() | ||
prefix := strings.Replace(statsPrefix, "$hostname", strings.Replace(hostname, ".", "_", -1), -1) | ||
//need to use a negative interval so we can manually set the report timestamps | ||
statsGraphite = stats.NewGraphite(prefix, statsAddr, -1, statsBufferSize, statsTimeout) | ||
} | ||
|
||
func validateDurationsInSeconds() { | ||
if testMetricsInterval%time.Second != 0 { | ||
log.Fatal("test-metrics-interval must be in seconds") | ||
} | ||
if queryInterval%time.Second != 0 { | ||
log.Fatal("query-interval must be in seconds") | ||
} | ||
if lookbackPeriod%time.Second != 0 { | ||
log.Fatal("lookback-period must be in seconds") | ||
} | ||
} |
Oops, something went wrong.