Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
vendor
jsonnetfile.lock.json
*.zip
.worktrees
12 changes: 8 additions & 4 deletions squid-mixin/.lint
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
exclusions:
template-job-rule:
reason: "Prometheus datasource variable is being named as prometheus_datasource now while linter expects 'datasource'"
panel-datasource-rule:
reason: "Modern mixins use signal-based architecture where datasource references are handled by the framework"
Comment on lines +2 to +5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is wrong. Ideally our refactor shouldn't really touch the .lint file

template-datasource-rule:
reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'"
template-instance-rule:
reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'"
panel-units-rule:
reason: "Custom units are used for better user experience in these panels"
entries:
- panel: "Client request errors"
- panel: "Server request errors"
template-datasource-rule:
reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'"
template-instance-rule:
reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'"
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
prometheusAlerts+:: {
groups+: [
new(this): {
groups: [
{
name: 'squid',
name: this.config.uid + '-alerts',
rules: [
{
alert: 'SquidHighPercentageOfHTTPServerRequestErrors',
alert: 'SquidHighHTTPServerRequestErrors',
expr: |||
rate(squid_server_http_errors_total[5m]) / clamp_min(rate(squid_server_http_requests_total[5m]),1) * 100 > %(alertsCriticalHighPercentageRequestErrors)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -17,14 +17,14 @@
summary: 'There are a high number of HTTP server errors.',
description: |||
The percentage of HTTP server request errors is {{ printf "%%.0f" $value }} over the last 5m on {{ $labels.instance }} which is above the threshold of %(alertsCriticalHighPercentageRequestErrors)s.
||| % $._config,
||| % this.config,
},
},
{
alert: 'SquidHighPercentageOfFTPServerRequestErrors',
alert: 'SquidHighFTPServerRequestErrors',
expr: |||
rate(squid_server_ftp_errors_total[5m]) / clamp_min(rate(squid_server_ftp_requests_total[5m]),1) * 100 > %(alertsCriticalHighPercentageRequestErrors)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -33,14 +33,14 @@
summary: 'There are a high number of FTP server request errors.',
description: |||
The percentage of FTP server request errors is {{ printf "%%.0f" $value }} over the last 5m on {{ $labels.instance }} which is above the threshold of %(alertsCriticalHighPercentageRequestErrors)s.
||| % $._config,
||| % this.config,
},
},
{
alert: 'SquidHighPercentageOfOtherServerRequestErrors',
alert: 'SquidHighOtherServerRequestErrors',
expr: |||
rate(squid_server_other_errors_total[5m]) / clamp_min(rate(squid_server_other_requests_total[5m]),1) * 100 > %(alertsCriticalHighPercentageRequestErrors)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -49,14 +49,14 @@
summary: 'There are a high number of other server request errors.',
description: |||
The percentage of other server request errors is {{ printf "%%.0f" $value }} over the last 5m on {{ $labels.instance }} which is above the threshold of %(alertsCriticalHighPercentageRequestErrors)s.
||| % $._config,
||| % this.config,
},
},
{
alert: 'SquidHighPercentageOfClientRequestErrors',
alert: 'SquidHighClientRequestErrors',
expr: |||
rate(squid_client_http_errors_total[5m]) / clamp_min(rate(squid_client_http_requests_total[5m]),1) * 100 > %(alertsCriticalHighPercentageRequestErrors)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -65,14 +65,14 @@
summary: 'There are a high number of HTTP client request errors.',
description: |||
The percentage of HTTP client request errors is {{ printf "%%.0f" $value }} over the last 5m on {{ $labels.instance }} which is above the threshold of %(alertsCriticalHighPercentageRequestErrors)s.
||| % $._config,
||| % this.config,
},
},
{
alert: 'SquidLowCacheHitRatio',
expr: |||
rate(squid_client_http_hits_total[10m]) / clamp_min(rate(squid_client_http_requests_total[10m]),1) * 100 < %(alertsWarningLowCacheHitRatio)s
||| % $._config,
||| % this.config,
'for': '10m',
labels: {
severity: 'warning',
Expand All @@ -81,7 +81,7 @@
summary: 'The cache hit ratio has fallen below the configured threshold (%).',
description: |||
The cache hit ratio is {{ printf "%%.0f" $value }} over the last 10m on {{ $labels.instance }} which is below the threshold of %(alertsWarningLowCacheHitRatio)s.
||| % $._config,
||| % this.config,
},
},
],
Expand Down
49 changes: 37 additions & 12 deletions squid-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,16 +1,41 @@
{
_config+:: {
dashboardTags: ['squid'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,

// alerts thresholds
alertsCriticalHighPercentageRequestErrors: 5,
alertsWarningLowCacheHitRatio: 85,
enableLokiLogs: true,
enableMultiCluster: false,
multiclusterSelector: 'job=~"$job"',
squidSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
// Basic filtering
filteringSelector: 'job=~"$job", instance=~"$instance"',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the incorrect filtering selector. I believe its default should probably be

Suggested change
filteringSelector: 'job=~"$job", instance=~"$instance"',
filteringSelector: 'job="integrations/squid"',

groupLabels: ['job'],
instanceLabels: ['instance'],

// Dashboard settings
dashboardTags: ['squid'],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dashboardTags: ['squid'],
dashboardTags: [self.uid],

dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
uid: 'squid',
dashboardNamePrefix: 'Squid',

// Logs configuration
enableLokiLogs: true,
logLabels: ['job', 'instance', 'filename'],
extraLogLabels: [],
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// Multi-cluster support
enableMultiCluster: false,
multiclusterSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',

// Alert thresholds
alertsCriticalHighPercentageRequestErrors: 5, // %
alertsWarningLowCacheHitRatio: 85, // %

// Metrics source
metricsSource: 'prometheus',

// Signal definitions
signals: {
client: (import './signals/client.libsonnet')(this),
server: (import './signals/server.libsonnet')(this),
serviceTime: (import './signals/service_time.libsonnet')(this),
},
}
64 changes: 64 additions & 0 deletions squid-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
local g = import './g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';

{
local root = self,
new(this):
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = this.config.uid;
local vars = commonlib.variables.new(
filteringSelector=this.config.filteringSelector,
groupLabels=this.config.groupLabels,
instanceLabels=this.config.instanceLabels,
varMetric='squid_server_http_requests_total',
customAllValue='.+',
enableLokiLogs=this.config.enableLokiLogs,
);
local annotations = {};
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;

{
'squid-overview.json':
g.dashboard.new(prefix + ' overview')
+ g.dashboard.withDescription('')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.clientRow,
this.grafana.rows.serverRow,
]
+
if this.config.enableLokiLogs then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should be using the logs-lib library here instead of combined logs/metrics panels.

[this.grafana.rows.logsRow]
else
[]
)
)
)
+ root.applyCommon(
vars.multiInstance,
uid + '-overview',
tags,
links,
annotations,
timezone,
refresh,
period
),
},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks([links[key].asDashboardLink() for key in std.objectFields(links)])
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
1 change: 0 additions & 1 deletion squid-mixin/dashboards/dashboards.libsonnet

This file was deleted.

Loading
Loading