Skip to content
Merged
1 change: 1 addition & 0 deletions src/k8s-extension/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Release History
1.6.5
++++++++++++++++++
* microsoft.dataprotection.kubernetes: Add support for 'DisableInformerCache' configuration.
* microsoft.azuremonitor.containers.metrics: Simplify logic and enable correct recording rule groups for managed prom extension

1.6.4
++++++++++++++++++
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,19 @@ def get_recording_rules_template(cmd, azure_monitor_workspace_resource_id):
url = f"{armendpoint}{azure_monitor_workspace_resource_id}/providers/microsoft.alertsManagement/alertRuleRecommendations?api-version={ALERTS_API}"
r = send_raw_request(cmd.cli_ctx, "GET", url, headers=headers)
data = json.loads(r.text)
return data['value']

filtered_templates = [
template for template in data.get('value', [])
# pylint: disable=line-too-long
if template.get("properties", {}).get("alertRuleType", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups" and isinstance(template.get("properties", {}).get("rulesArmTemplate", {}).get("resources"), list) and all(
isinstance(rule, dict) and "record" in rule and "expression" in rule
for resource in template["properties"]["rulesArmTemplate"]["resources"]
if resource.get("type", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups"
for rule in resource.get("properties", {}).get("rules", [])
)
]

return filtered_templates


# pylint: disable=line-too-long
Expand All @@ -39,8 +51,7 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a
for _ in range(3):
try:
headers = ['User-Agent=arc-azuremonitormetrics.put_rules.' + default_rule_group_name]
send_raw_request(cmd.cli_ctx, "PUT", url,
body=body, headers=headers)
send_raw_request(cmd.cli_ctx, "PUT", url, body=body, headers=headers)
break
except CLIError as e:
error = e
Expand All @@ -51,28 +62,28 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a
# pylint: disable=line-too-long
def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region):
default_rules_template = get_recording_rules_template(cmd, azure_monitor_workspace_resource_id)
default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "{0}{1}?api-version={2}".format(
cmd.cli_ctx.cloud.endpoints.resource_manager,
default_rule_group_id,
RULES_API
)
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 0)

default_rule_group_name = "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name)
default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format(
cluster_subscription,
cluster_resource_group_name,
default_rule_group_name
)
url = "{0}{1}?api-version={2}".format(
cmd.cli_ctx.cloud.endpoints.resource_manager,
default_rule_group_id,
RULES_API
)
put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 1)
for index, rule_template in enumerate(default_rules_template):
rule_name = rule_template["name"]
is_windows_rule = "win" in rule_name.lower()

# Skip any recording rules as ARC metrics extension doesn't have windows support
if is_windows_rule:
continue

rule_group_name = f"{rule_template['name']}-{cluster_name}"
rule_group_id = f"/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{rule_group_name}"
url = f"{cmd.cli_ctx.cloud.endpoints.resource_manager}{rule_group_id}?api-version={RULES_API}"

put_rules(
cmd,
rule_group_id,
rule_group_name,
mac_region,
azure_monitor_workspace_resource_id,
cluster_name,
default_rules_template,
url,
True,
index
)
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,10 @@ def delete_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster
cluster_resource_group_name,
"KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name)
)
delete_rule(
cmd,
cluster_subscription,
cluster_resource_group_name,
"UXRecordingRulesRuleGroup - {0}".format(cluster_name)
)

18 changes: 18 additions & 0 deletions testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Describe 'Azure Monitor Metrics Testing' {
$extensionName = "azuremonitor-metrics"
$extensionAgentName = "ama-metrics"
$extensionAgentNamespace = "kube-system"
$workspaceResourceGroup = $null # Initialize here for shared scope

. $PSScriptRoot/../../helper/Constants.ps1
. $PSScriptRoot/../../helper/Helper.ps1
Expand Down Expand Up @@ -47,6 +48,23 @@ Describe 'Azure Monitor Metrics Testing' {
$extensionExists | Should -Not -BeNullOrEmpty
}

It 'Verifies rule groups were created' {
$clusterName = $ENVCONFIG.arcClusterName
$expectedRuleGroupNames = @(
"KubernetesRecordingRulesRuleGroup-$clusterName",
"NodeRecordingRulesRuleGroup-$clusterName"
)

$ruleGroups = az resource list --resource-group $($ENVCONFIG.resourceGroup) --resource-type "Microsoft.AlertsManagement/prometheusRuleGroups" --query "[].{name:name, location:location, id:id}" | ConvertFrom-Json
$ruleGroups | Should -Not -BeNullOrEmpty -Because "Rule groups may take time to be created after extension onboarding"

foreach ($expectedName in $expectedRuleGroupNames) {
$matchingGroup = $ruleGroups | Where-Object { $_.name -eq $expectedName }
$matchingGroup | Should -Not -BeNullOrEmpty -Because "Rule group '$expectedName' should have been created by create.py"
}
}


It "Deletes the extension from the cluster" {
$output = az $Env:K8sExtensionName delete -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --force
$? | Should -BeTrue
Expand Down
Loading