Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/k8s-extension/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Release History
===============

1.6.6
++++++++
* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode support.

1.6.5
++++++++++++++++++
* microsoft.dataprotection.kubernetes: Add support for 'DisableInformerCache' configuration.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@
logger = get_logger(__name__)
DCR_API_VERSION = "2022-06-01"

ContainerInsightsStreams = [
"Microsoft-ContainerLog",
"Microsoft-ContainerLogV2-HighScale",
"Microsoft-KubeEvents",
"Microsoft-KubePodInventory",
"Microsoft-KubeNodeInventory",
"Microsoft-KubePVInventory",
"Microsoft-KubeServices",
"Microsoft-KubeMonAgentEvents",
"Microsoft-InsightsMetrics",
"Microsoft-ContainerInventory",
"Microsoft-ContainerNodeInventory",
"Microsoft-Perf",
]


class ContainerInsights(DefaultExtension):
def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, cluster_rp,
Expand Down Expand Up @@ -83,6 +98,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
# Delete DCR-A if it exists incase of MSI Auth
useAADAuth = False
isDCRAExists = False
enable_high_log_scale_mode = False
cluster_rp, _ = get_cluster_rp_api_version(cluster_type=cluster_type, cluster_rp=cluster_rp)
try:
extension = client.get(resource_group_name, cluster_rp, cluster_type, cluster_name, name)
Expand All @@ -95,6 +111,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
return

subscription_id = get_subscription_id(cmd.cli_ctx)
resources = cf_resources(cmd.cli_ctx, subscription_id)
# handle cluster type here
cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/{2}/{3}/{4}'.format(subscription_id, resource_group_name, cluster_rp, cluster_type, cluster_name)
if (extension is not None) and (extension.configuration_settings is not None):
Expand All @@ -108,6 +125,14 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
useAADAuthSetting = configSettings['amalogs.useAADAuth']
if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting):
useAADAuth = True

# Check if high log scale mode was enabled
if useAADAuth and 'amalogs.enableHighLogScaleMode' in configSettings:
highLogScaleSetting = configSettings['amalogs.enableHighLogScaleMode']
if isinstance(highLogScaleSetting, str):
enable_high_log_scale_mode = (highLogScaleSetting.lower() == "true")
elif isinstance(highLogScaleSetting, bool):
enable_high_log_scale_mode = highLogScaleSetting
if useAADAuth:
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version={DCR_API_VERSION}"
for _ in range(3):
Expand All @@ -131,6 +156,28 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
except Exception:
pass # its OK to ignore the exception since MSI auth in preview

if useAADAuth:
resource = resources.get_by_id(cluster_resource_id, '2020-01-01-preview')
cluster_location = resource.location.lower()
dcr_name = f"MSCI-{cluster_location}-{cluster_name}"
dcr_name = dcr_name[0:64]

dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dcr_name}"
dcr_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dcr_resource_id}?api-version={DCR_API_VERSION}"
response = send_raw_request(cmd.cli_ctx, "GET", dcr_url)
dcr_config = json.loads(response.text)
# Delete the DCR
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "DELETE", dcr_url,)
logger.info(f"Successfully deleted DCR: {dcr_name}")
break
except Exception as ex:
logger.warning(f"Error deleting DCR: {str(ex)}")
pass

if enable_high_log_scale_mode:
_delete_dce_for_dcr(cmd, subscription_id, resource_group_name, dcr_config)

# Custom Validation Logic for Container Insights

Expand Down Expand Up @@ -464,6 +511,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
subscription_id = get_subscription_id(cmd.cli_ctx)
workspace_resource_id = ''
useAADAuth = True
enableHighLogScaleMode = False # Default value
if 'amalogs.useAADAuth' not in configuration_settings:
configuration_settings['amalogs.useAADAuth'] = "true"
extensionSettings = {}
Expand Down Expand Up @@ -520,6 +568,16 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
raise InvalidArgumentValueError('streams must be an array type')
extensionSettings["dataCollectionSettings"] = dataCollectionSettings

if useAADAuth and 'amalogs.enableHighLogScaleMode' in configuration_settings:
enableHighLogScaleMode = configuration_settings['amalogs.enableHighLogScaleMode']
if isinstance(enableHighLogScaleMode, str):
enableHighLogScaleMode_str = enableHighLogScaleMode.lower()
if enableHighLogScaleMode_str not in ["true", "false"]:
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')
enableHighLogScaleMode = (enableHighLogScaleMode_str == "true")
elif not isinstance(enableHighLogScaleMode, bool):
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')

workspace_resource_id = workspace_resource_id.strip()

if configuration_protected_settings is not None:
Expand Down Expand Up @@ -548,7 +606,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
if is_ci_extension_type:
if useAADAuth:
logger.info("creating data collection rule and association")
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings)
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enableHighLogScaleMode)
elif not _is_container_insights_solution_exists(cmd, workspace_resource_id):
logger.info("Creating ContainerInsights solution resource, since it doesn't exist and it is using legacy authentication")
_ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result()
Expand Down Expand Up @@ -597,6 +655,37 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
configuration_settings['amalogs.domain'] = 'opinsights.azure.microsoft.scloud'


def _delete_dce_for_dcr(cmd, subscription_id, cluster_resource_group_name, dcr_config):
"""Delete Data Collection Endpoint associated with a DCR if it exists"""
try:
if ("properties" in dcr_config and
"dataCollectionEndpointId" in dcr_config["properties"] and
dcr_config["properties"]["dataCollectionEndpointId"]):

dce_id = dcr_config["properties"]["dataCollectionEndpointId"]
dce_parts = dce_id.split('/')

if len(dce_parts) > 0:
dce_name = dce_parts[-1]
dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{dce_name}"
dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dce_resource_id}?api-version=2022-06-01"
# Try to delete up to 3 times
for retry in range(3):
try:
send_raw_request(cmd.cli_ctx, "DELETE", dce_url)
logger.info("Successfully deleted DCE: %s", dce_name)
return True
except CLIError as e:
if "ResourceNotFound" in str(e):
return True
if retry == 2:
logger.warning("Failed to delete DCE: %s - %s", dce_name, str(e))
return False
logger.info("Retrying DCE deletion after error: %s", str(e))
except CLIError:
pass
return True

def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
tags = {}
_MAX_RETRY_TIMES = 3
Expand All @@ -617,7 +706,7 @@ def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
return tags


def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings):
def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enable_high_log_scale_mode):
from azure.core.exceptions import HttpResponseError

cluster_region = ''
Expand Down Expand Up @@ -652,6 +741,18 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
dataCollectionRuleName = dataCollectionRuleName[0:64]
dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}"

# ingestion DCE MUST be in workspace region
ingestionDataCollectionEndpointName = f"MSCI-ingest-{workspace_region}-{cluster_name}"
# Max length of the DCE name is 43 chars
ingestionDataCollectionEndpointName = _trim_suffix_if_needed(ingestionDataCollectionEndpointName[0:43])
ingestion_dce_resource_id = None

# create ingestion DCE if high log scale mode enabled
if enable_high_log_scale_mode:
ingestion_dce_resource_id = create_data_collection_endpoint(
cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName
)

# first get the association between region display names and region IDs (because for some reason
# the "which RPs are available in which regions" check returns region display names)
region_names_to_id = {}
Expand All @@ -677,6 +778,8 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
# get existing tags on the container insights extension DCR if the customer added any
existing_tags = get_existing_container_insights_extension_dcr_tags(cmd, dcr_url)
streams = ["Microsoft-ContainerInsights-Group-Default"]
if enable_high_log_scale_mode:
streams = ContainerInsightsStreams
if extensionSettings is None:
extensionSettings = {}
if 'dataCollectionSettings' in extensionSettings.keys():
Expand All @@ -691,6 +794,11 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
}
extensionSettings["dataCollectionSettings"] = dataCollectionSettings

if enable_high_log_scale_mode:
for i, v in enumerate(streams):
if v == "Microsoft-ContainerLogV2":
streams[i] = "Microsoft-ContainerLogV2-HighScale"

# create the DCR
dcr_creation_body = json.dumps(
{
Expand Down Expand Up @@ -722,6 +830,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
}
]
},
"dataCollectionEndpointId": ingestion_dce_resource_id
},
}
)
Expand Down Expand Up @@ -755,3 +864,34 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
error = e
else:
raise error


def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName):
# create the ingestion DCE
ingestion_dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{ingestionDataCollectionEndpointName}"
ingestion_dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{ingestion_dce_resource_id}?api-version=2022-06-01"
ingestion_dce_creation_body = json.dumps({
"location": workspace_region,
"kind": "Linux",
"properties": {
"networkAcls": {
"publicNetworkAccess": "Enabled"
}
}
})
error = None
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body)
return ingestion_dce_resource_id
except AzCLIError as e:
error = e
if error:
raise error
return ingestion_dce_resource_id


def _trim_suffix_if_needed(s, suffix="-"):
if s.endswith(suffix):
s = s[:-len(suffix)]
return s
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
interactions:
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
CommandName:
- k8s-extension create
Connection:
- keep-alive
ParameterSetName:
- -g -n -c --cluster-type --extension-type --configuration-settings
User-Agent:
- AZURECLI/2.75.0 azsdk-python-core/1.31.0 Python/3.10.12 (Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.35)
method: GET
uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/azurecli-tests/providers/Microsoft.Kubernetes/connectedClusters/arc-cluster?api-version=2020-01-01-preview
response:
body:
string: '{"error":{"code":"ResourceGroupNotFound","message":"Resource group
''azurecli-tests'' could not be found."}}'
headers:
cache-control:
- no-cache
content-length:
- '106'
content-type:
- application/json; charset=utf-8
date:
- Tue, 01 Jul 2025 18:42:31 GMT
expires:
- '-1'
pragma:
- no-cache
strict-transport-security:
- max-age=31536000; includeSubDomains
x-cache:
- CONFIG_NOCACHE
x-content-type-options:
- nosniff
x-ms-failure-cause:
- gateway
x-msedge-ref:
- 'Ref A: 43ED95238AFF4CEA964D74A2529E4C51 Ref B: BL2AA2011005060 Ref C: 2025-07-01T18:42:32Z'
status:
code: 404
message: Not Found
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
interactions:
- request:
body: null
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
CommandName:
- k8s-extension create
Connection:
- keep-alive
ParameterSetName:
- -g -n -c --cluster-type --extension-type --configuration-settings
User-Agent:
- AZURECLI/2.75.0 azsdk-python-core/1.31.0 Python/3.10.12 (Linux-6.6.87.2-microsoft-standard-WSL2-x86_64-with-glibc2.35)
method: GET
uri: https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/azurecli-tests/providers/Microsoft.Kubernetes/connectedClusters/arc-cluster?api-version=2020-01-01-preview
response:
body:
string: '{"error":{"code":"ResourceGroupNotFound","message":"Resource group
''azurecli-tests'' could not be found."}}'
headers:
cache-control:
- no-cache
content-length:
- '106'
content-type:
- application/json; charset=utf-8
date:
- Tue, 01 Jul 2025 18:42:31 GMT
expires:
- '-1'
pragma:
- no-cache
strict-transport-security:
- max-age=31536000; includeSubDomains
x-cache:
- CONFIG_NOCACHE
x-content-type-options:
- nosniff
x-ms-failure-cause:
- gateway
x-msedge-ref:
- 'Ref A: BEEC6FA3AAA44D3B923946D0F5FF610C Ref B: BL2AA2011006062 Ref C: 2025-07-01T18:42:32Z'
status:
code: 404
message: Not Found
version: 1
Loading
Loading