diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index 8400bf6416a..09a13348d04 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -6,6 +6,7 @@ Release History 1.6.5 ++++++++++++++++++ * microsoft.dataprotection.kubernetes: Add support for 'DisableInformerCache' configuration. +* microsoft.azuremonitor.containers.metrics: Simplify logic and enable correct recording rule groups for managed prom extension 1.6.4 ++++++++++++++++++ diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/create.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/create.py index 5f022b6bb88..dfd53d0fa89 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/create.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/create.py @@ -15,7 +15,19 @@ def get_recording_rules_template(cmd, azure_monitor_workspace_resource_id): url = f"{armendpoint}{azure_monitor_workspace_resource_id}/providers/microsoft.alertsManagement/alertRuleRecommendations?api-version={ALERTS_API}" r = send_raw_request(cmd.cli_ctx, "GET", url, headers=headers) data = json.loads(r.text) - return data['value'] + + filtered_templates = [ + template for template in data.get('value', []) + # pylint: disable=line-too-long + if template.get("properties", {}).get("alertRuleType", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups" and isinstance(template.get("properties", {}).get("rulesArmTemplate", {}).get("resources"), list) and all( + isinstance(rule, dict) and "record" in rule and "expression" in rule + for resource in template["properties"]["rulesArmTemplate"]["resources"] + if resource.get("type", "").lower() == "microsoft.alertsmanagement/prometheusrulegroups" + for rule in resource.get("properties", {}).get("rules", []) + ) + ] + + return filtered_templates # pylint: disable=line-too-long @@ -39,8 +51,7 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a for _ in range(3): try: headers = ['User-Agent=arc-azuremonitormetrics.put_rules.' + default_rule_group_name] - send_raw_request(cmd.cli_ctx, "PUT", url, - body=body, headers=headers) + send_raw_request(cmd.cli_ctx, "PUT", url, body=body, headers=headers) break except CLIError as e: error = e @@ -51,28 +62,28 @@ def put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, a # pylint: disable=line-too-long def create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, mac_region): default_rules_template = get_recording_rules_template(cmd, azure_monitor_workspace_resource_id) - default_rule_group_name = "NodeRecordingRulesRuleGroup-{0}".format(cluster_name) - default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( - cluster_subscription, - cluster_resource_group_name, - default_rule_group_name - ) - url = "{0}{1}?api-version={2}".format( - cmd.cli_ctx.cloud.endpoints.resource_manager, - default_rule_group_id, - RULES_API - ) - put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 0) - default_rule_group_name = "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name) - default_rule_group_id = "/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{2}".format( - cluster_subscription, - cluster_resource_group_name, - default_rule_group_name - ) - url = "{0}{1}?api-version={2}".format( - cmd.cli_ctx.cloud.endpoints.resource_manager, - default_rule_group_id, - RULES_API - ) - put_rules(cmd, default_rule_group_id, default_rule_group_name, mac_region, azure_monitor_workspace_resource_id, cluster_name, default_rules_template, url, True, 1) + for index, rule_template in enumerate(default_rules_template): + rule_name = rule_template["name"] + is_windows_rule = "win" in rule_name.lower() + + # Skip any recording rules as ARC metrics extension doesn't have windows support + if is_windows_rule: + continue + + rule_group_name = f"{rule_template['name']}-{cluster_name}" + rule_group_id = f"/subscriptions/{cluster_subscription}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.AlertsManagement/prometheusRuleGroups/{rule_group_name}" + url = f"{cmd.cli_ctx.cloud.endpoints.resource_manager}{rule_group_id}?api-version={RULES_API}" + + put_rules( + cmd, + rule_group_id, + rule_group_name, + mac_region, + azure_monitor_workspace_resource_id, + cluster_name, + default_rules_template, + url, + True, + index + ) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/delete.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/delete.py index ac945fbaed8..622d1e0fbca 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/delete.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/azuremonitormetrics/recordingrules/delete.py @@ -35,3 +35,10 @@ def delete_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster cluster_resource_group_name, "KubernetesRecordingRulesRuleGroup-{0}".format(cluster_name) ) + delete_rule( + cmd, + cluster_subscription, + cluster_resource_group_name, + "UXRecordingRulesRuleGroup - {0}".format(cluster_name) + ) + \ No newline at end of file diff --git a/testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1 b/testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1 index d1eca16ddb8..92d2b265b50 100644 --- a/testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1 +++ b/testing/test/extensions/public/AzureMonitorMetrics.Tests.ps1 @@ -5,6 +5,7 @@ Describe 'Azure Monitor Metrics Testing' { $extensionName = "azuremonitor-metrics" $extensionAgentName = "ama-metrics" $extensionAgentNamespace = "kube-system" + $workspaceResourceGroup = $null # Initialize here for shared scope . $PSScriptRoot/../../helper/Constants.ps1 . $PSScriptRoot/../../helper/Helper.ps1 @@ -47,6 +48,23 @@ Describe 'Azure Monitor Metrics Testing' { $extensionExists | Should -Not -BeNullOrEmpty } + It 'Verifies rule groups were created' { + $clusterName = $ENVCONFIG.arcClusterName + $expectedRuleGroupNames = @( + "KubernetesRecordingRulesRuleGroup-$clusterName", + "NodeRecordingRulesRuleGroup-$clusterName" + ) + + $ruleGroups = az resource list --resource-group $($ENVCONFIG.resourceGroup) --resource-type "Microsoft.AlertsManagement/prometheusRuleGroups" --query "[].{name:name, location:location, id:id}" | ConvertFrom-Json + $ruleGroups | Should -Not -BeNullOrEmpty -Because "Rule groups may take time to be created after extension onboarding" + + foreach ($expectedName in $expectedRuleGroupNames) { + $matchingGroup = $ruleGroups | Where-Object { $_.name -eq $expectedName } + $matchingGroup | Should -Not -BeNullOrEmpty -Because "Rule group '$expectedName' should have been created by create.py" + } + } + + It "Deletes the extension from the cluster" { $output = az $Env:K8sExtensionName delete -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --force $? | Should -BeTrue