Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Add Prometheus provider for external metrics check during autoscaling #104

Merged
merged 4 commits into from
Nov 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions cmd/server/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func RegisterCommand(rootCmd *cobra.Command) error {
serverCfg.RegisterTLSConfig(cmd)
serverCfg.RegisterTelemetryConfig(cmd)
serverCfg.RegisterClusterConfig(cmd)
serverCfg.RegisterMetricProviderConfig(cmd)
logCfg.RegisterConfig(cmd)
rootCmd.AddCommand(cmd)

Expand All @@ -37,6 +38,7 @@ func runServer(_ *cobra.Command, _ []string) {
tlsConfig := serverCfg.GetTLSConfig()
telemetryConfig := serverCfg.GetTelemetryConfig()
clusterConfig := serverCfg.GetClusterConfig()
metricProviderConfig := serverCfg.GetMetricProviderConfig()

if err := verifyServerConfig(serverConfig); err != nil {
fmt.Println(err)
Expand All @@ -51,10 +53,11 @@ func runServer(_ *cobra.Command, _ []string) {
}

cfg := &server.Config{
Cluster: &clusterConfig,
Server: &serverConfig,
TLS: &tlsConfig,
Telemetry: &telemetryConfig,
Cluster: &clusterConfig,
MetricProvider: metricProviderConfig,
Server: &serverConfig,
TLS: &tlsConfig,
Telemetry: &telemetryConfig,
}
srv := server.New(log.Logger, cfg)

Expand Down
1 change: 1 addition & 0 deletions docs/configuration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The Sherpa server can be configured by supplying either CLI flags or using envir
* `--log-format` (string: "auto") - Specify the log format ("auto", "zerolog" or "human").
* `--log-level` (string: "info") - Change the level used for logging.
* `--log-use-color` (bool: true) - Use ANSI colors in logging output.
* `--metric-provider-prometheus-addr` (string: "") The address of the Prometheus endpoint in the form <protocol>://<addr>:<port>.
* `--policy-engine-api-enabled` (bool: true) - Enable the Sherpa API to manage scaling policies.
* `--policy-engine-nomad-meta-enabled` (bool: false) - Enable Nomad job meta lookups to manage scaling policies.
* `--policy-engine-strict-checking-enabled` (bool: true) - When enabled, all scaling activities must pass through policy checks.
Expand Down
32 changes: 22 additions & 10 deletions docs/configuration/telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,16 +278,10 @@ Autoscale metrics allow operators to get insight into how the autoscaler is func
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.autoscale.evaluation.error`</td>
<td>Number of autoscaling evaluation errors across all jobs</td>
<td>Number of errors</td>
<td>Counter</td>
</tr>
<tr>
<td>`sherpa.autoscale.{job}.evaluation.error`</td>
<td>Number of autoscaling evaluation errors for the job named {job}</td>
<td>Number of errors</td>
<td>Counter</td>
<td>`sherpa.autoscale.{job}.{group}.evaluation`</td>
<td>The time taken to perform the autoscaling evaluation for the job named {job} and group named {group}</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.autoscale.trigger.error`</td>
Expand All @@ -313,4 +307,22 @@ Autoscale metrics allow operators to get insight into how the autoscaler is func
<td>Number of successes</td>
<td>Counter</td>
</tr>
<tr>
<td>`sherpa.autoscale.prometheus.get_value`</td>
<td>The time taken to query Prometheus for a metric value</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.autoscale.prometheus.error`</td>
<td>Number of errors querying Prometheus for a metric value</td>
<td>Number of errors</td>
<td>Counter</td>
</tr>
<tr>
<td>`sherpa.autoscale.prometheus.success`</td>
<td>Number of successful queries of Prometheus for a metric value</td>
<td>Number of successes</td>
<td>Counter</td>
</tr>
</table>
19 changes: 15 additions & 4 deletions pkg/api/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,21 @@ type JobGroupPolicy struct {
MinCount int
ScaleOutCount int
ScaleInCount int
ScaleOutCPUPercentageThreshold int
ScaleOutMemoryPercentageThreshold int
ScaleInCPUPercentageThreshold int
ScaleInMemoryPercentageThreshold int
ScaleOutCPUPercentageThreshold *int
ScaleOutMemoryPercentageThreshold *int
ScaleInCPUPercentageThreshold *int
ScaleInMemoryPercentageThreshold *int
ExternalChecks map[string]*ExternalCheck
}

// ExternalCheck represents an individual external check within a group scaling policy.
type ExternalCheck struct {
Enabled bool
Provider string
Query string
ComparisonOperator string
ComparisonValue int
Action string
}

func (p *Policies) List() (*map[string]map[string]*JobGroupPolicy, error) {
Expand Down
Loading