Skip to content

Commit 18dcdfb

Browse files
bavneetsingh16Bavneet Singhbragi92wanlonghenry
authored
[k8s-extension] Update extension CLI to v1.6.7 (#9043)
* add pester tests for k8s-extension * fix testcases for nodepool image issues (#5) * update readme and version release notes (#6) * fix: simplify logic and enable correct recording rule groups for managed prom extension (#7) * update readme and version release notes (#6) * fix: simplify logic and enable correct recording rule groups for managed prom extension (#7) * Extend ContainerInsights Extension for high log scale mode support (#9) * update python version to 3.13 (#10) * add pester tests for k8s-extension * fix testcases for nodepool image issues (#5) * update readme and version release notes (#6) * fix: simplify logic and enable correct recording rule groups for managed prom extension (#7) * update readme and version release notes (#6) * fix: simplify logic and enable correct recording rule groups for managed prom extension (#7) * Extend ContainerInsights Extension for high log scale mode support (#9) * update python version to 3.13 (#10) * update readme and version release notes (#12) * remove extension specific pester tests --------- Co-authored-by: Bavneet Singh <[email protected]> Co-authored-by: bragi92 <[email protected]> Co-authored-by: Long Wan <[email protected]>
1 parent 31b0ff3 commit 18dcdfb

File tree

3 files changed

+166
-3
lines changed

3 files changed

+166
-3
lines changed

src/k8s-extension/HISTORY.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
Release History
44
===============
55

6+
1.6.7
7+
+++++++++++++++++++
8+
* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode support.
9+
610
1.6.6
711
++++++++++++++++++
812
* microsoft.entraworkloadiam: Remove code that is no longer in use.

src/k8s-extension/azext_k8s_extension/partner_extensions/ContainerInsights.py

Lines changed: 161 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,21 @@
3333
logger = get_logger(__name__)
3434
DCR_API_VERSION = "2022-06-01"
3535

36+
ContainerInsightsStreams = [
37+
"Microsoft-ContainerLog",
38+
"Microsoft-ContainerLogV2-HighScale",
39+
"Microsoft-KubeEvents",
40+
"Microsoft-KubePodInventory",
41+
"Microsoft-KubeNodeInventory",
42+
"Microsoft-KubePVInventory",
43+
"Microsoft-KubeServices",
44+
"Microsoft-KubeMonAgentEvents",
45+
"Microsoft-InsightsMetrics",
46+
"Microsoft-ContainerInventory",
47+
"Microsoft-ContainerNodeInventory",
48+
"Microsoft-Perf",
49+
]
50+
3651

3752
class ContainerInsights(DefaultExtension):
3853
def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, cluster_rp,
@@ -83,6 +98,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
8398
# Delete DCR-A if it exists incase of MSI Auth
8499
useAADAuth = False
85100
isDCRAExists = False
101+
enable_high_log_scale_mode = False
86102
cluster_rp, _ = get_cluster_rp_api_version(cluster_type=cluster_type, cluster_rp=cluster_rp)
87103
try:
88104
extension = client.get(resource_group_name, cluster_rp, cluster_type, cluster_name, name)
@@ -95,10 +111,15 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
95111
return
96112

97113
subscription_id = get_subscription_id(cmd.cli_ctx)
114+
resources = cf_resources(cmd.cli_ctx, subscription_id)
98115
# handle cluster type here
99116
cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/{2}/{3}/{4}'.format(subscription_id, resource_group_name, cluster_rp, cluster_type, cluster_name)
117+
workspace_resource_id = None
100118
if (extension is not None) and (extension.configuration_settings is not None):
101119
configSettings = extension.configuration_settings
120+
# Extract workspace resource ID if present
121+
if 'logAnalyticsWorkspaceResourceID' in configSettings:
122+
workspace_resource_id = configSettings['logAnalyticsWorkspaceResourceID']
102123
# omsagent is being renamed to ama-logs. Check for both for compatibility
103124
if 'omsagent.useAADAuth' in configSettings:
104125
useAADAuthSetting = configSettings['omsagent.useAADAuth']
@@ -108,6 +129,16 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
108129
useAADAuthSetting = configSettings['amalogs.useAADAuth']
109130
if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting):
110131
useAADAuth = True
132+
133+
# Check if high log scale mode was enabled
134+
if useAADAuth and 'amalogs.enableHighLogScaleMode' in configSettings:
135+
highLogScaleSetting = configSettings['amalogs.enableHighLogScaleMode']
136+
if isinstance(highLogScaleSetting, str):
137+
enable_high_log_scale_mode = (highLogScaleSetting.lower() == "true")
138+
elif isinstance(highLogScaleSetting, bool):
139+
enable_high_log_scale_mode = highLogScaleSetting
140+
else:
141+
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true/false or boolean type')
111142
if useAADAuth:
112143
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version={DCR_API_VERSION}"
113144
for _ in range(3):
@@ -131,6 +162,41 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
131162
except Exception:
132163
pass # its OK to ignore the exception since MSI auth in preview
133164

165+
if useAADAuth:
166+
# Get the workspace region if workspace_resource_id is available
167+
workspace_region = None
168+
if workspace_resource_id:
169+
try:
170+
workspace_resource = resources.get_by_id(workspace_resource_id, '2015-11-01-preview')
171+
workspace_region = workspace_resource.location.replace(" ", "").lower()
172+
except Exception as ex:
173+
logger.warning("Skipping DCR and DCE deletion due to inability to determine workspace region")
174+
return
175+
# If workspace_region still couldn't be determined, skip deletion
176+
if not workspace_region:
177+
logger.warning("Workspace region could not be determined. Skipping DCR and DCE deletion.")
178+
return
179+
180+
# Use workspace_region for DCR name to match creation logic
181+
dcr_name = f"MSCI-{workspace_region}-{cluster_name}"
182+
dcr_name = dcr_name[0:64]
183+
184+
dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dcr_name}"
185+
dcr_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dcr_resource_id}?api-version={DCR_API_VERSION}"
186+
response = send_raw_request(cmd.cli_ctx, "GET", dcr_url)
187+
dcr_config = json.loads(response.text)
188+
# Delete the DCR
189+
for _ in range(3):
190+
try:
191+
send_raw_request(cmd.cli_ctx, "DELETE", dcr_url,)
192+
logger.info(f"Successfully deleted DCR: {dcr_name}")
193+
break
194+
except Exception as ex:
195+
logger.warning(f"Error deleting DCR: {str(ex)}")
196+
pass
197+
198+
if enable_high_log_scale_mode:
199+
_delete_dce_for_dcr(cmd, subscription_id, resource_group_name, dcr_config)
134200

135201
# Custom Validation Logic for Container Insights
136202

@@ -464,6 +530,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
464530
subscription_id = get_subscription_id(cmd.cli_ctx)
465531
workspace_resource_id = ''
466532
useAADAuth = True
533+
enableHighLogScaleMode = False # Default value
467534
if 'amalogs.useAADAuth' not in configuration_settings:
468535
configuration_settings['amalogs.useAADAuth'] = "true"
469536
extensionSettings = {}
@@ -520,6 +587,16 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
520587
raise InvalidArgumentValueError('streams must be an array type')
521588
extensionSettings["dataCollectionSettings"] = dataCollectionSettings
522589

590+
if useAADAuth and 'amalogs.enableHighLogScaleMode' in configuration_settings:
591+
enableHighLogScaleMode = configuration_settings['amalogs.enableHighLogScaleMode']
592+
if isinstance(enableHighLogScaleMode, str):
593+
enableHighLogScaleMode_str = enableHighLogScaleMode.lower()
594+
if enableHighLogScaleMode_str not in ["true", "false"]:
595+
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')
596+
enableHighLogScaleMode = (enableHighLogScaleMode_str == "true")
597+
elif not isinstance(enableHighLogScaleMode, bool):
598+
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')
599+
523600
workspace_resource_id = workspace_resource_id.strip()
524601

525602
if configuration_protected_settings is not None:
@@ -548,7 +625,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
548625
if is_ci_extension_type:
549626
if useAADAuth:
550627
logger.info("creating data collection rule and association")
551-
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings)
628+
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enableHighLogScaleMode)
552629
elif not _is_container_insights_solution_exists(cmd, workspace_resource_id):
553630
logger.info("Creating ContainerInsights solution resource, since it doesn't exist and it is using legacy authentication")
554631
_ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result()
@@ -597,6 +674,37 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
597674
configuration_settings['amalogs.domain'] = 'opinsights.azure.microsoft.scloud'
598675

599676

677+
def _delete_dce_for_dcr(cmd, subscription_id, cluster_resource_group_name, dcr_config):
678+
"""Delete Data Collection Endpoint associated with a DCR if it exists"""
679+
try:
680+
if ("properties" in dcr_config and
681+
"dataCollectionEndpointId" in dcr_config["properties"] and
682+
dcr_config["properties"]["dataCollectionEndpointId"]):
683+
684+
dce_id = dcr_config["properties"]["dataCollectionEndpointId"]
685+
dce_parts = dce_id.split('/')
686+
687+
if len(dce_parts) > 0:
688+
dce_name = dce_parts[-1]
689+
dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{dce_name}"
690+
dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dce_resource_id}?api-version=2022-06-01"
691+
# Try to delete up to 3 times
692+
for retry in range(3):
693+
try:
694+
send_raw_request(cmd.cli_ctx, "DELETE", dce_url)
695+
logger.info("Successfully deleted DCE: %s", dce_name)
696+
return True
697+
except CLIError as e:
698+
if "ResourceNotFound" in str(e):
699+
return True
700+
if retry == 2:
701+
logger.warning("Failed to delete DCE: %s - %s", dce_name, str(e))
702+
return False
703+
logger.info("Retrying DCE deletion after error: %s", str(e))
704+
except CLIError:
705+
pass
706+
return True
707+
600708
def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
601709
tags = {}
602710
_MAX_RETRY_TIMES = 3
@@ -617,7 +725,7 @@ def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
617725
return tags
618726

619727

620-
def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings):
728+
def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enable_high_log_scale_mode):
621729
from azure.core.exceptions import HttpResponseError
622730

623731
cluster_region = ''
@@ -652,6 +760,18 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
652760
dataCollectionRuleName = dataCollectionRuleName[0:64]
653761
dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}"
654762

763+
# ingestion DCE MUST be in workspace region
764+
ingestionDataCollectionEndpointName = f"MSCI-ingest-{workspace_region}-{cluster_name}"
765+
# Max length of the DCE name is 43 chars
766+
ingestionDataCollectionEndpointName = _trim_suffix_if_needed(ingestionDataCollectionEndpointName[0:43])
767+
ingestion_dce_resource_id = None
768+
769+
# create ingestion DCE if high log scale mode enabled
770+
if enable_high_log_scale_mode:
771+
ingestion_dce_resource_id = create_data_collection_endpoint(
772+
cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName
773+
)
774+
655775
# first get the association between region display names and region IDs (because for some reason
656776
# the "which RPs are available in which regions" check returns region display names)
657777
region_names_to_id = {}
@@ -677,6 +797,8 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
677797
# get existing tags on the container insights extension DCR if the customer added any
678798
existing_tags = get_existing_container_insights_extension_dcr_tags(cmd, dcr_url)
679799
streams = ["Microsoft-ContainerInsights-Group-Default"]
800+
if enable_high_log_scale_mode:
801+
streams = ContainerInsightsStreams
680802
if extensionSettings is None:
681803
extensionSettings = {}
682804
if 'dataCollectionSettings' in extensionSettings.keys():
@@ -691,6 +813,11 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
691813
}
692814
extensionSettings["dataCollectionSettings"] = dataCollectionSettings
693815

816+
if enable_high_log_scale_mode:
817+
for i, v in enumerate(streams):
818+
if v == "Microsoft-ContainerLogV2":
819+
streams[i] = "Microsoft-ContainerLogV2-HighScale"
820+
694821
# create the DCR
695822
dcr_creation_body = json.dumps(
696823
{
@@ -722,6 +849,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
722849
}
723850
]
724851
},
852+
"dataCollectionEndpointId": ingestion_dce_resource_id
725853
},
726854
}
727855
)
@@ -755,3 +883,34 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
755883
error = e
756884
else:
757885
raise error
886+
887+
888+
def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName):
889+
# create the ingestion DCE
890+
ingestion_dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{ingestionDataCollectionEndpointName}"
891+
ingestion_dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{ingestion_dce_resource_id}?api-version=2022-06-01"
892+
ingestion_dce_creation_body = json.dumps({
893+
"location": workspace_region,
894+
"kind": "Linux",
895+
"properties": {
896+
"networkAcls": {
897+
"publicNetworkAccess": "Enabled"
898+
}
899+
}
900+
})
901+
error = None
902+
for _ in range(3):
903+
try:
904+
send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body)
905+
return ingestion_dce_resource_id
906+
except AzCLIError as e:
907+
error = e
908+
if error:
909+
raise error
910+
return ingestion_dce_resource_id
911+
912+
913+
def _trim_suffix_if_needed(s, suffix="-"):
914+
if s.endswith(suffix):
915+
s = s[:-len(suffix)]
916+
return s

src/k8s-extension/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
# TODO: Add any additional SDK dependencies here
3434
DEPENDENCIES = []
3535

36-
VERSION = "1.6.6"
36+
VERSION = "1.6.7"
3737

3838
with open("README.rst", "r", encoding="utf-8") as f:
3939
README = f.read()

0 commit comments

Comments
 (0)