From a7a397afbb0785736ccdbbfa5597b34e65e4717e Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 15 Oct 2024 12:55:30 -0700 Subject: [PATCH 01/27] Updating section separator regex to support --- packages/website/src/app/workshop/workshop.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index 28920950..cc2db699 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -2,7 +2,7 @@ import { getCurrentUrlWithQueryParams, MenuLink } from '../shared/link'; import { FileContents, LoaderOptions, loadFile } from '../shared/loader'; import { MarkdownHeading, getHeadings } from '../shared/markdown'; -const sectionSeparator = /(?:\n\n|\r\n\r\n)---(?:\n\n|\r\n\r\n)/; +const sectionSeparator = /(?:\n\n|\r\n\r\n)(?:---|===)(?:\n\n|\r\n\r\n)/; export interface WorkshopSection { title: string; From b649b5bc9caff0197b77cd2772298193bbac7d8b Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 15 Oct 2024 12:55:53 -0700 Subject: [PATCH 02/27] new workshop outline --- workshops/operating-aks-automatic/workshop.md | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 workshops/operating-aks-automatic/workshop.md diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md new file mode 100644 index 00000000..0d2c0e7b --- /dev/null +++ b/workshops/operating-aks-automatic/workshop.md @@ -0,0 +1,104 @@ +--- +published: true # Optional. Set to true to publish the workshop (default: false) +type: workshop # Required. +title: Streamline operations and developer onboarding with AKS Automatic # Required. Full title of the workshop +short_title: Operating AKS Automatic # Optional. Short title displayed in the header +description: Are you looking for ways to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance? Azure Kubernetes Service (AKS) Automatic is a new mode of operation for AKS that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. This session is perfect for platform operators and DevOps engineers looking to get started with AKS Automatic. # Required. +level: beginner # Required. Can be 'beginner', 'intermediate' or 'advanced' +authors: # Required. You can add as many authors as needed + - "Paul Yu" +contacts: # Required. Must match the number of authors + - "@pauldotyu" +duration_minutes: 90 # Required. Estimated duration in minutes +tags: kubernetes, azure, aks # Required. Tags for filtering and searching +wt_id: WT.mc_id=containers-153036-pauyu +--- + +## Overview + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +### Objectives + +- One +- Two +- Three +- Four + +### Prerequisites + +- One +- Two +- Three +- Four +- Five + +=== + +## Security + +Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. + +Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. + +Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. + +Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? + +Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. + +=== + +## Secrets and config management + +Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. + +Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. + +Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. + +Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? + +Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. + +=== + +## Scaling + +Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. + +Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. + +Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. + +Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? + +Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. + +=== + +## Troublshooting + +Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. + +Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. + +Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. + +Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? + +Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. + +=== + +## Summary + +Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. + +Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. + +Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. + +Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? + +Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. From 2ced2771c72e73fc17799e619c0f63e4e77db18b Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Mon, 21 Oct 2024 22:18:27 -0700 Subject: [PATCH 03/27] security rbac --- workshops/operating-aks-automatic/workshop.md | 112 ++++++++++-------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 0d2c0e7b..909356c6 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -9,96 +9,114 @@ authors: # Required. You can add as many authors as needed - "Paul Yu" contacts: # Required. Must match the number of authors - "@pauldotyu" -duration_minutes: 90 # Required. Estimated duration in minutes +duration_minutes: 75 # Required. Estimated duration in minutes tags: kubernetes, azure, aks # Required. Tags for filtering and searching wt_id: WT.mc_id=containers-153036-pauyu --- ## Overview -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +This lab is meant to be a hands-on experience for platform operators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. ### Objectives -- One -- Two -- Three -- Four +- ### Prerequisites -- One -- Two -- Three -- Four -- Five +The lab environment has been pre-configured for you with an AKS Automatic cluster pre-provisioned with monitoring and logging enabled. -=== +You will need the Azure CLI installed on your local machine. You can install it from [here](https://docs.microsoft.com/cli/azure/install-azure-cli). -## Security +With the Azure CLI installed, you will need to install the **aks-preview** extension to leverage preview features in AKS. -Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. +Open a terminal and install the AKS preview extension with the following command: -Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. +```bash +az extension add --name aks-preview +``` -Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. +=== -Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? +## Security -Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. +Security above all else. This section aims to get you comfortable with managing user access to the Kubernetes API, implementing container security, and practice managing upgrades within the cluster, both node OS images and Kubernetes version upgrades. -=== +### Granting permissions to the AKS cluster -## Secrets and config management +The first thing you need to do is grant the necessary permissions to the AKS cluster. AKS Automatic clusters are Azure RBAC enabled, which means you can assign roles to users, groups, and service principals to manage access to the cluster. When users try to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials for authentication and their assigned roles will determine what they can do within the cluster. -Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. +To grant permissions to the AKS cluster, you will need to assign a role. The following built-in roles for Azure-RBAC enabled clusters are available to assign to users. -Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. +- [Azure Kubernetes Service RBAC Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-admin) +- [Azure Kubernetes Service RBAC Cluster Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-cluster-admin) +- [Azure Kubernetes Service RBAC Reader](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-reader) +- [Azure Kubernetes Service RBAC Writer](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-writer) -Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. +Using Azure Cloud Shell, run the following command to get the AKS cluster credentials -Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? +```bash +az aks get-credentials --resource-group myResourceGroup --name myAKSCluster +``` -Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. +Create a namespace for the developer to use. -=== - -## Scaling +```bash +kubectl create namespace blue +``` -Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. +Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. After you have logged in, the command to create the namespace should be successful. -Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. +> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command. -Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. +```bash +az aks install-cli +``` -Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? +Run the following command to get the AKS cluster ID and the developer user principal ID -Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. +```bash +AKS_ID=$(az aks show --resource-group myResourceGroup --name myAKSCluster --query id --output tsv) +DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(User2).Username --query id --output tsv) +``` -=== +Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to a developer scoped to the **blue** namespace. -## Troublshooting +```bash +az role assignment create --role "Azure Kubernetes Service RBAC Writer" --assignee $DEV_USER_PRINCIPAL_ID --scope $AKS_ID/namespaces/blue +``` -Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. +The kubelogin plugin stores the OIDC token in the `~/.kube/cache/kubelogin` directory. In order to test the permissions with a different user, you will need to delete the cached credentials. -Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. +Instead of deleting the credentials altogether, we can simply move it to a different directory. Run the following command to move the cached credentials to the parent directory. -Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. +```bash +mv ~/.kube/cache/kubelogin/*.json .. +``` -Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? +> https://github.com/int128/kubelogin/issues/29 -Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. +Run a kukbectl command to trigger a new login -=== +```bash +kubectl get namespace blue +``` -## Summary +Since the cached credentials have been moved, you will be prompted to log in again. This time, login using the User2 credentials. After logging in, you should see the blue namespace. Now let's see if we can create a pod in the blue namespace. -Lorem ipsum odor amet, consectetuer adipiscing elit. Velit ullamcorper at vivamus egestas nostra potenti curabitur. Venenatis imperdiet class mus urna platea et felis. Posuere justo sodales sodales phasellus porttitor risus pellentesque. Mi fermentum urna sapien venenatis maximus magna. Erat est morbi felis, id porttitor ac. Senectus dapibus scelerisque convallis duis eros in urna. Dictum vitae porttitor faucibus fusce nunc donec fusce mus. Dolor adipiscing pharetra auctor vulputate, in ridiculus mus parturient donec. Placerat commodo non himenaeos class nulla pharetra feugiat vulputate cras. +```bash +kubectl auth can-i create pods --namespace blue +``` -Egestas scelerisque primis; suspendisse senectus platea adipiscing. Felis vehicula ornare turpis vitae malesuada molestie augue sociosqu. Torquent turpis adipiscing tellus nisl dignissim elementum. Interdum congue etiam nibh euismod aliquam ac turpis. Faucibus class sed placerat senectus neque fusce nisl porttitor. Purus elementum quis turpis integer porttitor; maecenas felis. Eu at mollis quam cubilia blandit, inceptos sagittis cursus. +To confirm the developer cannot create pods in the default namespace, run the following command -Cras consectetur nascetur conubia porttitor vivamus ad nullam. Tortor habitant sociosqu convallis commodo et magnis commodo senectus. Elit tristique eleifend nunc sodales odio. Rutrum volutpat vivamus vitae pulvinar integer. Metus phasellus porttitor tempor lobortis magnis; donec vulputate. Platea aliquet ultricies nam orci nulla semper dignissim. Amet viverra leo nunc curae litora. At rutrum cras eget aliquam class facilisis finibus tellus taciti. Nostra class finibus est netus finibus. +```bash +kubectl auth can-i create pods --namespace default +``` -Vehicula nascetur eleifend malesuada rutrum pharetra ante. Nullam fusce turpis malesuada rutrum morbi quisque quis quis id. Aliquam cubilia nibh cubilia laoreet vel; tempor diam id. Nisl lobortis justo maximus amet tempus malesuada lectus sodales quam. Mollis felis dictumst ultrices tincidunt consectetur aenean magna malesuada potenti. Vitae nullam egestas mi facilisis erat luctus. Efficitur cursus malesuada semper mauris senectus convallis facilisi? +After testing the permissions, delete the developer user's cached credentials, then move the admin's cached credentials back to the `~/.kube/cache/kubelogin` directory. -Egestas convallis non ac cras mollis pharetra sociosqu vehicula. Condimentum odio maecenas habitant aliquet tempus a. Fames nisl nunc enim interdum lacus luctus adipiscing. Conubia ipsum ligula commodo sit pharetra porttitor odio. Primis dolor suscipit proin dignissim phasellus nibh. Rutrum posuere nibh ridiculus, lacus nulla luctus. Ipsum montes elementum condimentum dictum facilisi vestibulum nascetur. Taciti ornare ultrices feugiat cursus molestie viverra ligula cubilia vehicula. +```bash +rm ~/.kube/cache/kubelogin/*.json +mv ../*.json ~/.kube/cache/kubelogin/ +``` From 1a1a8c0777b06130c4dbc02aca1c52610aa83f96 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 22 Oct 2024 12:22:38 -0700 Subject: [PATCH 04/27] config management --- workshops/operating-aks-automatic/workshop.md | 327 +++++++++++++++++- 1 file changed, 310 insertions(+), 17 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 909356c6..d966ae6e 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -30,17 +30,24 @@ You will need the Azure CLI installed on your local machine. You can install it With the Azure CLI installed, you will need to install the **aks-preview** extension to leverage preview features in AKS. -Open a terminal and install the AKS preview extension with the following command: +Open a terminal, log into Azure, and install the AKS preview extension with the following command: ```bash +az login az extension add --name aks-preview ``` +Set the default location for resources we will create in this lab. + +```bash +az configure --defaults location=$(az group show -n myResourceGroup --query location -o tsv) +``` + === ## Security -Security above all else. This section aims to get you comfortable with managing user access to the Kubernetes API, implementing container security, and practice managing upgrades within the cluster, both node OS images and Kubernetes version upgrades. +Security above all else is the mantra! With AKS Automatic, you can leverage Microsoft Entra ID for authentication and authorization right out of the box. This means that setting up Kubernetes Role-Based Access Control (RBAC) is as simple as assigning roles to users, groups, and service principals to manage access to the cluster. When users try to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials for authentication and their assigned roles will determine what they can do within the cluster. ### Granting permissions to the AKS cluster @@ -62,7 +69,7 @@ az aks get-credentials --resource-group myResourceGroup --name myAKSCluster Create a namespace for the developer to use. ```bash -kubectl create namespace blue +kubectl create namespace dev ``` Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. After you have logged in, the command to create the namespace should be successful. @@ -73,50 +80,336 @@ Since this is the first time you are running a kubectl command, you will be prom az aks install-cli ``` -Run the following command to get the AKS cluster ID and the developer user principal ID +Run the following command to get the AKS cluster's resource ID and the developer's user principal ID. ```bash AKS_ID=$(az aks show --resource-group myResourceGroup --name myAKSCluster --query id --output tsv) DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(User2).Username --query id --output tsv) ``` -Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to a developer scoped to the **blue** namespace. +Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. ```bash -az role assignment create --role "Azure Kubernetes Service RBAC Writer" --assignee $DEV_USER_PRINCIPAL_ID --scope $AKS_ID/namespaces/blue +az role assignment create --role "Azure Kubernetes Service RBAC Writer" --assignee $DEV_USER_PRINCIPAL_ID --scope $AKS_ID/namespaces/dev ``` -The kubelogin plugin stores the OIDC token in the `~/.kube/cache/kubelogin` directory. In order to test the permissions with a different user, you will need to delete the cached credentials. +When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stored the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to test the permissions with a different user, you can simply move it to a different directory. -Instead of deleting the credentials altogether, we can simply move it to a different directory. Run the following command to move the cached credentials to the parent directory. +Run the following command to move the cached credentials to the parent directory. ```bash -mv ~/.kube/cache/kubelogin/*.json .. +mv ~/.kube/cache/kubelogin/*.json ~/.kube/cache/ ``` -> https://github.com/int128/kubelogin/issues/29 +Run a kukbectl command to trigger a new login and authenticate with the developer's user account. -Run a kukbectl command to trigger a new login +```bash +kubectl get namespace dev +``` + +After logging in, you should see the **dev** namespace. Next, check to see if the developer can create a Pod in the **dev** namespace by running the following command. ```bash -kubectl get namespace blue +kubectl auth can-i create pods --namespace dev ``` -Since the cached credentials have been moved, you will be prompted to log in again. This time, login using the User2 credentials. After logging in, you should see the blue namespace. Now let's see if we can create a pod in the blue namespace. +You should see the output **yes**. This means the developer has the necessary permissions to create Pods in the **dev** namespace. Next test to see if the developer can create Pods in the default namespace. Let's put it to the test and deploy a sample application in the assigned namespace using Helm. ```bash -kubectl auth can-i create pods --namespace blue +helm repo add aks-store-demo https://azure-samples.github.io/aks-store-demo +helm install demo aks-store-demo/aks-store-demo-chart --namespace dev --set aiService.create=true ``` -To confirm the developer cannot create pods in the default namespace, run the following command +You should the application has successfully deployed in the **dev** namespace. + +> [!NOTE] +> The application will take a few minutes to deploy. We will come back to this later. + +Now, check to see if the developer can create a Pod in the default namespace by running the following command. ```bash kubectl auth can-i create pods --namespace default ``` -After testing the permissions, delete the developer user's cached credentials, then move the admin's cached credentials back to the `~/.kube/cache/kubelogin` directory. +You should see the output **no**. This means the developer does not have the necessary permissions to create Pods in the default namespace. + +Great job! You have successfully granted permissions to the AKS cluster. + +After testing the permissions, delete the developer user's cached credentials, then move the admin user's cached credentials back to the **~/.kube/cache/kubelogin** directory by running the following commands. ```bash rm ~/.kube/cache/kubelogin/*.json -mv ../*.json ~/.kube/cache/kubelogin/ +mv ~/.kube/cache/*.json ~/.kube/cache/kubelogin/ +``` + +### Deployment Safeguards + +As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. Deployment Safeguards is a feature in AKS Automatic that helps enforce best practices and policies for your AKS clusters. It is implemented via Azure Policy and a set of policies known as an Initiative is applied to your AKS cluster to ensure that resources running within it are secure, compliant, and well-managed. With AKS Automatic it is enabled in Warning mode. + +The policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each Policy description, the targeted resource, and the mutation that can be applied when the feature is set to Enforcement mode. When in Enforcement mode, resources will be mutated to comply with the policies so it is important to understand the impact of each policy. + +Try deploying a Pod without any best practices in place. + +```bash +kubectl run mynginx --image=nginx:latest +``` + +You should see the following warning messages in the output. + +```text +Warning: [azurepolicy-k8sazurev2containerenforceprob-e00c7e64611b1137ed2b] Container in your Pod has no . Required probes: ["readinessProbe", "livenessProbe"] +Warning: [azurepolicy-k8sazurev2containerenforceprob-e00c7e64611b1137ed2b] Container in your Pod has no . Required probes: ["readinessProbe", "livenessProbe"] +Warning: [azurepolicy-k8sazurev3containerlimits-4e7bbc2617e5447639a7] container has no resource limits +Warning: [azurepolicy-k8sazurev1containerrestrictedi-88f886218244b623dd93] nginx in default does not have imagePullSecrets. Unauthenticated image pulls are not recommended. +pod/nginx created +``` + +These warnings are here to help remind you of the best practices that should be followed when deploying Pods in the AKS cluster. You can see that there are warnings about not having a [livenessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request), [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes), [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits), and [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret). + +Run the following command to delete the Pod. + +```bash +kubectl delete pod mynginx +``` + +Now, deploy the Pod with some best practices in place. + +```bash +kubectl apply -f - < [!NOTE] +> If you see multiple sign-in options, choose the one that has `azure-account.login` next to it. + +Next, press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster. + +> [!NOTE] +> If you see multiple subscriptions, choose the one that has `azure-account.selectSubscriptions` next to it. + +Open the terminal in Visual Studio Code and download the ConstraintTemplate file to your local machine then open the file in Visual Studio Code by running the following commands. + +```bash +wget https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml +code constrainttemplate.yaml +``` + +With the constrainttemplate.yaml file open in Visual Studio Code, press **Ctrl+Shift+P** and type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** then select the **Base64Encoded** option. + +This will generate a new Azure Policy definition in the JSON format. You will need to fill in details everywhere you see the text `/* EDIT HERE */`. For **apiGroups** field, you can use the value `[""]` to target all API groups and for the **kind** field, you can use the value `["Pod"]` to target Pods. + +Here is what the JSON should look like: https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json + +Let's deploy the custom policy definition and assign it to the AKS cluster. + +Navigate to the Azure Portal and search for **Policy** in the search bar. + +Click on **Azure Policy** and then click on **Definitions** under the **Authoring** section. + +Click on **+ Policy definition** then enter the following details: + +- **Definition location**: Click the button next to the textbox, then select your subscription +- **Name**: Enter `AKS Approved registries only` +- **Description**: Enter `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` +- **Category**: Click **Use existing** then select **Kubernetes** from the dropdown +- **Policy rule**: Copy and paste the JSON from the the sample policy definition file [here](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json) + +Click **Save** + +Next, click on **Assign policy** button and for **Scope** you can optionally click the button next to the textbox, then select the resource group that contains the AKS cluster. + +Click **Next** then uncheck the **Only show parameters that need input or review** checkbox. This will enable you to change the **Effect** to **Deny**. + +In the **Image registry** parameter, enter the value of `mcr.microsoft.com/` then click **Review + create**. + +Click the **Create** button to assign the policy to the AKS cluster. + +This can take up to 20 minutes to take effect. We will come back to this later. + +For more information on how to create a policy definition from a ConstraintTemplate or MutationTemplate, refer to the following documentation links: + +- [Create policy definition from a constraint template or mutation template](https://learn.microsoft.com/azure/governance/policy/how-to/extension-for-vscode#create-policy-definition-from-a-constraint-template-or-mutation-template) +- [Understand Azure Policy for Kubernetes clusters](https://learn.microsoft.com/azure/governance/policy/concepts/policy-for-kubernetes) +- [OPA Gatekeeper Library](https://github.com/open-policy-agent/gatekeeper-library/) + +=== + +## Secrets and config management + +Developers need a way to integrate their workloads with Azure services and make the configs available to their workloads in the cluster. They also need to ensure password-less authentication with Microsoft Entra ID is leveraged as much as possible. This section aims to get AKS operators comfortable with setting up a centralized configuration store, syncing configs to the cluster as Kubernetes ConfigMaps, and setting up connectors to integrate with other Azure services. + +### Syncing configurations to the cluster + +Azure Key Vault is a cloud service for securely storing and accessing secrets. A secret is anything that you want to tightly control access to, such as API keys, passwords, or certificates. Azure App Configuration is a managed service that helps developers centralize their application configurations. It provides a service to store, manage, and retrieve application settings and feature flags. + +We can leverage these two services to store our application configurations and secrets and make them available to our workloads running in the AKS cluster. + +Let's start by creating an Azure App Configuration store. + +```bash +AC_NAME=$(az appconfig create --name myAppConfig$RANDOM --resource-group myResourceGroup --query name -o tsv) +``` + +It's best practice to create a User-Assigned Managed Identity to access the Azure App Configuration store. This way, you can control the access to the store and ensure that only the workloads that need access to the configurations can access them. + +```bash +AC_ID=$(az identity create --name $AC_NAME-identity --resource-group myResourceGroup --query id -o tsv) +``` + +AKS offers an extension called the Azure App Configuratoin Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default but you can leverage the AKS Service Connector to install it. + +In the Azure Portal, navigate to the AKS cluster and click on **Service Connector (Preview)** under the **Settings** section. + +Click on the **+ Create** button to create a new Service Connector. + +In the **Basics** tab, enter the following details: + +- **Kubernetes namespace**: Enter `dev` +- **Service type**: Select **App Configuration** +- **Enable App Configuration extension on Kubernetes**: Check the box +- **Connection name**: Leave as default +- **App Configuration**: Select the Azure App Configuration store you created earlier + +Click **Next: Authentication** + +In the **Authentication** tab, leave **Workload Identity** selected, then select the **User-assigned managed identity** option and select the managed identity you created earlier. + +Click **Next: Networking** then click **Next: Review + create** and finally click **Create** as soon as validation check has passed. + +> [!NOTE] +> This can take up to 5 minutes to complete. + +After the Service Connector has been created, you can verify that the Azure App Configuration Provider for Kubernetes extension has been installed in the AKS cluster. + +```bash +kubectl get pods -n azappconfig-system +``` + +The Azure App Configuration Provider for Kubernetes extension also installed new Custom Resource Definitions (CRDs) which you can use to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. + +Before you deploy the sync configuration manifest, let's create some configurations that one of the applications will use. + +```bash +az appconfig kv set --name $AC_NAME --key Key1 --value Value1 --yes +az appconfig kv set --name $AC_NAME --key Key2 --value Value2 --yes +az appconfig kv set --name $AC_NAME --key Key3 --value Value3 --yes +az appconfig kv set --name $AC_NAME --key Key4 --value Value4 --yes +az appconfig kv set --name $AC_NAME --key Key5 --value Value5 --yes +``` + +We can now deploy a sync configuration manifest to sync the configurations from Azure App Configuration to Kubernetes ConfigMaps. But first we will need some values for the manifest. + +Run the following command to get the Azure App Configuration store's endpoint. + +```bash +AC_ENDPOINT=$(az appconfig show -n $AC_NAME --query endpoint --output tsv) +``` + +To connect to the Azure App Configuration store, it is best to use Workload Identity. The AKS Automatic cluster is already configured with Workload Identity, and you created the Azure App Configuration connection using the User-Assigned Managed Identity that you created earlier. The Service Connector created a Kubernetes Service Account that you can use to sync the configurations. + +Run the following command to get the Kubernetes ServiceAccount name. + +```bash +SA_NAME=$(kubectl get sa -n dev -o jsonpath='{.items[?(@.metadata.name!="default")].metadata.name}') +``` + +Using the values you collected, create a sync configuration manifest. + +```bash +kubectl apply -n dev -f - < Date: Tue, 22 Oct 2024 17:04:58 -0700 Subject: [PATCH 05/27] scaling --- workshops/operating-aks-automatic/workshop.md | 219 +++++++++++++++--- 1 file changed, 193 insertions(+), 26 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index d966ae6e..bdac76fc 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -16,33 +16,58 @@ wt_id: WT.mc_id=containers-153036-pauyu ## Overview -This lab is meant to be a hands-on experience for platform operators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. +This lab is meant to be a hands-on experience for Azure administrators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. ### Objectives -- +By the end of this lab you will be able to: + +- Administer user access to the AKS cluster +- Ensure security best practices with Azure Policy and Deployment Safeguards +- Sync configurations to the cluster with Azure App Configuration Provider for Kubernetes +- Leverage AKS Service Connector for passwordless integration with Azure services +- Appropirately scale workloads across nodes with AKS Node Autoprovision +- Review workload scheduling best practices +- Troubleshoot workload failures with monitoring tools and Microsoft Copilot for Azure ### Prerequisites -The lab environment has been pre-configured for you with an AKS Automatic cluster pre-provisioned with monitoring and logging enabled. +The lab environment has been pre-configured for you with the following Azure resources: + +- AKS Automatic cluster with monitoring enabled +- Azure Container Registry +- Azure Log Analytics Workspace +- Azure Managed Prometheus +- Azure Managed Grafana + +You will also need the following tools: -You will need the Azure CLI installed on your local machine. You can install it from [here](https://docs.microsoft.com/cli/azure/install-azure-cli). +- [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) +- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) +- [Helm](https://helm.sh/docs/intro/install/) -With the Azure CLI installed, you will need to install the **aks-preview** extension to leverage preview features in AKS. +All command-line instructions in this lab should be executed in a Bash shell. If you are using Windows, you can use the Windows Subsystem for Linux (WSL) or Azure Cloud Shell. -Open a terminal, log into Azure, and install the AKS preview extension with the following command: +Before you get started, you should log in to the Azure CLI with the following command: ```bash az login +``` + +You will also need to install the **aks-preview** extension to leverage preview features in AKS. + +```bash az extension add --name aks-preview ``` -Set the default location for resources we will create in this lab. +Finally set the default location for resources that you will create in this lab using Azure CLI. ```bash az configure --defaults location=$(az group show -n myResourceGroup --query location -o tsv) ``` +You are now ready to get started with the lab. + === ## Security @@ -310,35 +335,39 @@ It's best practice to create a User-Assigned Managed Identity to access the Azur AC_ID=$(az identity create --name $AC_NAME-identity --resource-group myResourceGroup --query id -o tsv) ``` -AKS offers an extension called the Azure App Configuratoin Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default but you can leverage the AKS Service Connector to install it. - -In the Azure Portal, navigate to the AKS cluster and click on **Service Connector (Preview)** under the **Settings** section. +AKS offers an extension called the Azure App Configuratoin Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. -Click on the **+ Create** button to create a new Service Connector. - -In the **Basics** tab, enter the following details: - -- **Kubernetes namespace**: Enter `dev` -- **Service type**: Select **App Configuration** -- **Enable App Configuration extension on Kubernetes**: Check the box -- **Connection name**: Leave as default -- **App Configuration**: Select the Azure App Configuration store you created earlier - -Click **Next: Authentication** - -In the **Authentication** tab, leave **Workload Identity** selected, then select the **User-assigned managed identity** option and select the managed identity you created earlier. - -Click **Next: Networking** then click **Next: Review + create** and finally click **Create** as soon as validation check has passed. +```bash +az k8s-extension create \ + --cluster-type managedClusters \ + --cluster-name myAKSCluster \ + --resource-group myResourceGroup \ + --name appconfigurationkubernetesprovider \ + --extension-type Microsoft.AppConfiguration \ + --auto-upgrade false \ + --version 2.0.0 +``` > [!NOTE] > This can take up to 5 minutes to complete. -After the Service Connector has been created, you can verify that the Azure App Configuration Provider for Kubernetes extension has been installed in the AKS cluster. +After the extension has been created, you can verify that the Pods are running. ```bash kubectl get pods -n azappconfig-system ``` +We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the AKS Service Connector. The AKS Service Connector is a managed service that allows you to connect your AKS cluster to other Azure services. It will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for autentication, creating the necesary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. + +```bash +az aks connection create appconfig \ + --resource-group myResourceGroup \ + --name myAKSCluster \ + --tg myResourceGroup \ + --app-config $AC_NAME \ + --workload-identity $AC_ID +``` + The Azure App Configuration Provider for Kubernetes extension also installed new Custom Resource Definitions (CRDs) which you can use to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. Before you deploy the sync configuration manifest, let's create some configurations that one of the applications will use. @@ -413,3 +442,141 @@ kubectl get cm -n dev myconfigmap -o jsonpath='{.data}' | jq ``` Great job! You have successfully synced configurations from Azure App Configuration to Kubernetes ConfigMaps. + +=== + +## Scaling + +One key differentiator of Kubernetes is its ability to scale workloads. One key differentiator of Kubernetes in the cloud is its ability to scale nodes to handle workload scale out. This section aims to get AKS operators comfortable with managing AKS Node Autoprovision, implementing workload scheduling best practices and scaling workloads with KEDA. + +### AKS Node Autoprovision + +With AKS Automatic, the Node Autoprovision feature is enabled by default. This feature allows the AKS cluster to automatically scale the number of nodes in the cluster based on the workload requirements. The cluster will scale up when there are pending Pods that cannot be scheduled due to insufficient resources and scale down when there are nodes that are underutilized. It will also scale down and try to consolidate workloads to fewer nodes to save costs. This is something that you should account for when planning for high availability for your workloads. + +AKS Node Autoprovision is built on top of the Karpenter project which was developed by friends at AWS and is now part of the CNCF. Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. There are a few key concepts to understand when working with Karpenter: + +- **NodeClasses**: A NodeClass is a set of constraints that define the type of node that should be provisioned. For example, you can define a NodeClass that specifies the type of VM, the region, the availability zone, and the maximum number of nodes that can be provisioned. +- **NodePool**: A NodePool is a set of nodes that are provisioned based on a NodeClass. You can have multiple NodePools in a cluster, each with its own set of constraints. +- **NodeClaims**: A NodeClaim is a request for a node that matches a set of constraints. When a NodeClaim is created, Karpenter will provision a node that matches the constraints specified in the NodeClaim. + +In the AKS Automatic cluster, the default NodeClass and default NodePool are created for you. So you can start deploying workloads right away. The default NodeClass is fairly generic and should be able to handle most workloads. However, you can create additional NodePools with specific constraints if you have workloads that require specific VM attributes. + +You can view the default NodePool by running the following command. + +```bash +kubectl get nodepools default -o yaml +``` + +Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. + +```bash +kubectl apply -f - < Date: Tue, 22 Oct 2024 18:31:17 -0700 Subject: [PATCH 06/27] some cleanup --- workshops/operating-aks-automatic/workshop.md | 590 ++++++++++++++---- 1 file changed, 460 insertions(+), 130 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index bdac76fc..cab2b3e5 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -16,7 +16,9 @@ wt_id: WT.mc_id=containers-153036-pauyu ## Overview -This lab is meant to be a hands-on experience for Azure administrators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. +AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. This lab is meant to be a hands-on experience for Azure administrators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. + +Many of the features you will be working with in this workshop are in preview and may not be recommended for production workloads. However, the AKS engineering team is working hard to bring these features to general availability and will be great learning opportunities for you to understand options to support developers and streamline operations. This is not platform engineering, but it is a step in the right direction to automate many of the tasks that platform engineers do today. ### Objectives @@ -26,7 +28,7 @@ By the end of this lab you will be able to: - Ensure security best practices with Azure Policy and Deployment Safeguards - Sync configurations to the cluster with Azure App Configuration Provider for Kubernetes - Leverage AKS Service Connector for passwordless integration with Azure services -- Appropirately scale workloads across nodes with AKS Node Autoprovision +- Appropriately scale workloads across nodes with AKS Node Autoprovision - Review workload scheduling best practices - Troubleshoot workload failures with monitoring tools and Microsoft Copilot for Azure @@ -34,36 +36,45 @@ By the end of this lab you will be able to: The lab environment has been pre-configured for you with the following Azure resources: -- AKS Automatic cluster with monitoring enabled -- Azure Container Registry -- Azure Log Analytics Workspace -- Azure Managed Prometheus -- Azure Managed Grafana +- [AKS Automatic](https://learn.microsoft.com/azure/aks/intro-aks-automatic) cluster with monitoring enabled +- [Azure Container Registry](https://learn.microsoft.com/azure/container-registry/container-registry-intro) +- [Azure Log Analytics Workspace](https://learn.microsoft.com/azure/azure-monitor/logs/log-analytics-workspace-overview) +- [Azure Managed Prometheus](https://learn.microsoft.com/azure/azure-monitor/essentials/prometheus-metrics-overview) +- [Azure Managed Grafana](https://learn.microsoft.com/azure/managed-grafana/overview) + +> [!HELP] +> The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/aks.bicep) You will also need the following tools: +- [Visual Studio Code](https://code.visualstudio.com/) - [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli) - [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) +- [kubelogin](https://learn.microsoft.com/azure/aks/kubelogin-authentication) - [Helm](https://helm.sh/docs/intro/install/) -All command-line instructions in this lab should be executed in a Bash shell. If you are using Windows, you can use the Windows Subsystem for Linux (WSL) or Azure Cloud Shell. +> [!NOTE] +> All command-line instructions in this lab should be executed in a Bash shell. If you are using Windows, you can use the Windows Subsystem for Linux (WSL) or Azure Cloud Shell. -Before you get started, you should log in to the Azure CLI with the following command: +Before you get started, open a Bash shell and log in to your Azure subscription with the following command: ```bash -az login +az login --use-device-code ``` -You will also need to install the **aks-preview** extension to leverage preview features in AKS. +You will be prompted to open a browser and log in with your Azure credentials. Copy the code that is displayed and paste it in the browser to authenticate. + +You will also need to install the **aks-preview** and **k8s-extension** extensions to leverage preview features in AKS and install AKS extensions. ```bash az extension add --name aks-preview +az extension add --name k8s-extension ``` Finally set the default location for resources that you will create in this lab using Azure CLI. ```bash -az configure --defaults location=$(az group show -n myResourceGroup --query location -o tsv) +az configure --defaults location=$(az group show -n myresourcegroup --query location -o tsv) ``` You are now ready to get started with the lab. @@ -72,11 +83,11 @@ You are now ready to get started with the lab. ## Security -Security above all else is the mantra! With AKS Automatic, you can leverage Microsoft Entra ID for authentication and authorization right out of the box. This means that setting up Kubernetes Role-Based Access Control (RBAC) is as simple as assigning roles to users, groups, and service principals to manage access to the cluster. When users try to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials for authentication and their assigned roles will determine what they can do within the cluster. +Security above all else! The AKS Automatic cluster is configured with Azure Role-Based Access Control (RBAC) authentication and authorization, Azure Policy, and Deployment Safeguards enabled out of the box. This section aims to get AKS operators comfortable with administering user access to the AKS cluster, ensuring security best practices with Azure Policy and Deployment Safeguards. ### Granting permissions to the AKS cluster -The first thing you need to do is grant the necessary permissions to the AKS cluster. AKS Automatic clusters are Azure RBAC enabled, which means you can assign roles to users, groups, and service principals to manage access to the cluster. When users try to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials for authentication and their assigned roles will determine what they can do within the cluster. +With Azure RBAC enabled on the AKS cluster granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will need to run the normal **az aks get-credentials** command to download the kubeconfig file, but when users attempt to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. To grant permissions to the AKS cluster, you will need to assign a role. The following built-in roles for Azure-RBAC enabled clusters are available to assign to users. @@ -88,7 +99,7 @@ To grant permissions to the AKS cluster, you will need to assign a role. The fol Using Azure Cloud Shell, run the following command to get the AKS cluster credentials ```bash -az aks get-credentials --resource-group myResourceGroup --name myAKSCluster +az aks get-credentials --resource-group myresourcegroup --name myakscluster ``` Create a namespace for the developer to use. @@ -97,60 +108,70 @@ Create a namespace for the developer to use. kubectl create namespace dev ``` -Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. After you have logged in, the command to create the namespace should be successful. +Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. You will need to follow the same login process you went through to login into your Azure subscription but since you've already logged in before, you simply need to click through the prompts (no need to re-enter passwords). After you have logged in, the command to create the namespace should be successful. + +> [!KNOWLEDGE] +> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command: `az aks install-cli`. So if you run into an error when trying to log in, you may need to install the plugin. -> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command. +Run the following command to get the AKS cluster's resource ID. ```bash -az aks install-cli +AKS_ID=$(az aks show --resource-group myresourcegroup --name myakscluster --query id --output tsv) ``` -Run the following command to get the AKS cluster's resource ID and the developer's user principal ID. +Run the following command to get the developer's user principal ID. ```bash -AKS_ID=$(az aks show --resource-group myResourceGroup --name myAKSCluster --query id --output tsv) DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(User2).Username --query id --output tsv) ``` -Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. +Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. Scoping the permissions to the namespace ensures that the developer can only access the resources within the namespace and not the entire cluster. ```bash az role assignment create --role "Azure Kubernetes Service RBAC Writer" --assignee $DEV_USER_PRINCIPAL_ID --scope $AKS_ID/namespaces/dev ``` -When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stored the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to test the permissions with a different user, you can simply move it to a different directory. +When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stored the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to quickly test the permissions of a different user, we can simply move the JSON file to a different directory. -Run the following command to move the cached credentials to the parent directory. +Run the following command to move the cached credentials to its parent directory. ```bash mv ~/.kube/cache/kubelogin/*.json ~/.kube/cache/ ``` -Run a kukbectl command to trigger a new login and authenticate with the developer's user account. +Now, run the following command to get the dev namespace. This trigger a new authentication prompt. Proceed to log in with the developer's user account. ```bash kubectl get namespace dev ``` -After logging in, you should see the **dev** namespace. Next, check to see if the developer can create a Pod in the **dev** namespace by running the following command. +After logging in, head back to your terminal. You should see the **dev** namespace. + +Run the following command to check to see if the current user can create a pod in the **dev** namespace. ```bash kubectl auth can-i create pods --namespace dev ``` -You should see the output **yes**. This means the developer has the necessary permissions to create Pods in the **dev** namespace. Next test to see if the developer can create Pods in the default namespace. Let's put it to the test and deploy a sample application in the assigned namespace using Helm. +You should see the output **yes**. This means the developer has the necessary permissions to create pods in the **dev** namespace. + +Let's put this to the test and deploy a sample application in the assigned namespace using Helm. + +Run the following command to add the Helm repository for the AKS Store Demo application. ```bash helm repo add aks-store-demo https://azure-samples.github.io/aks-store-demo -helm install demo aks-store-demo/aks-store-demo-chart --namespace dev --set aiService.create=true ``` -You should the application has successfully deployed in the **dev** namespace. +Run the following command to install the AKS Store Demo application in the **dev** namespace. -> [!NOTE] -> The application will take a few minutes to deploy. We will come back to this later. +```bash +helm install demo aks-store-demo/aks-store-demo-chart --namespace dev +``` + +The helm install command should show a status of "deployed". This means that the application has successfully deployed in the **dev** namespace. But it will take a few minutes to deploy so let's move on. -Now, check to see if the developer can create a Pod in the default namespace by running the following command. +Finally, let's check to see if the developer can create a pod in the **default** namespace. ```bash kubectl auth can-i create pods --namespace default @@ -160,7 +181,8 @@ You should see the output **no**. This means the developer does not have the nec Great job! You have successfully granted permissions to the AKS cluster. -After testing the permissions, delete the developer user's cached credentials, then move the admin user's cached credentials back to the **~/.kube/cache/kubelogin** directory by running the following commands. +> [!IMPORTANT] +> After testing the permissions, delete the developer user's cached credentials, then move the admin user's cached credentials back to the **~/.kube/cache/kubelogin** directory by running the following commands. ```bash rm ~/.kube/cache/kubelogin/*.json @@ -169,11 +191,11 @@ mv ~/.kube/cache/*.json ~/.kube/cache/kubelogin/ ### Deployment Safeguards -As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. Deployment Safeguards is a feature in AKS Automatic that helps enforce best practices and policies for your AKS clusters. It is implemented via Azure Policy and a set of policies known as an Initiative is applied to your AKS cluster to ensure that resources running within it are secure, compliant, and well-managed. With AKS Automatic it is enabled in Warning mode. +As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature in AKS Automatic that helps enforce best practices and policies for your AKS clusters. It is implemented via [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview) and a set of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) is assigned to your AKS cluster to ensure that resources running within it are secure, compliant, and well-managed. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. -The policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each Policy description, the targeted resource, and the mutation that can be applied when the feature is set to Enforcement mode. When in Enforcement mode, resources will be mutated to comply with the policies so it is important to understand the impact of each policy. +The set of policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each policy description, the targeted resource, and the mutation that can be applied when the feature is set to **Enforcement** mode. AKS Automatic defaults to **Warning** mode which simply displays warnings in the terminal; however, when in Enforcement mode, polices will be strongly enforced by either mutating deployments to comply with the policies or denying deployments that violate policy. Therefore, it is important to understand the impact of each policy before enabling Enforcement mode. -Try deploying a Pod without any best practices in place. +Run the following command to deploy a pod without any best practices in place. ```bash kubectl run mynginx --image=nginx:latest @@ -189,15 +211,15 @@ Warning: [azurepolicy-k8sazurev1containerrestrictedi-88f886218244b623dd93] nginx pod/nginx created ``` -These warnings are here to help remind you of the best practices that should be followed when deploying Pods in the AKS cluster. You can see that there are warnings about not having a [livenessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request), [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes), [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits), and [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret). +These warnings are here to help remind you of the best practices that should be followed when deploying Pods in the AKS cluster. There are warnings about not having a [livenessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request), [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes), [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits), and [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret). -Run the following command to delete the Pod. +So let's try this again with some best practices in place. Run the following command to delete the pod that was just created. ```bash kubectl delete pod mynginx ``` -Now, deploy the Pod with some best practices in place. +Run the following command to redeploy the pod with some best practices in place. ```bash kubectl apply -f - < [!NOTE] +> You should now see that we've satisfied all but one best practice, which we'll address later. + +Nice work! Now you know where to expect warnings and how to address some of them. You can also view the compliance state of the cluster resources in the Azure Portal by navigating to the **Policy** blade. To get there, type `policy` in the search bar and click on **Policy** under **Services**. + +In the **Overview** section, you will see the **AKS Deployment Safeguards Policy Assignment**. Click on the policy assignment to view the compliance state of the cluster resources. -### More policy with OPA +### Custom policy enforcement -In addition to the Deployment Safeguards Azure Policy Initiative, you can also leverage other Azure Policy definitions to enforce organizational standards and compliance. Azure Policy for AKS is enabled by default in AKS Automatic and you can either assign built-in policies or create custom policies to enforce compliance. When the Azure Policy for AKS feature is enabled, Open Policy Agent (OPA) Gatekeeper is deployed in the AKS cluster. OPA Gatekeeper is a policy engine for Kubernetes that allows you to enforce policies written using Rego, a high-level declarative language. +[Azure Policy for AKS](https://learn.microsoft.com/azure/aks/use-azure-policy) has been enabled when AKS Automatic assigned Deployment Safeguards policy initiative. This means you can also leverage other Azure Policy definitions (built-in or custom) to enforce organizational standards and compliance. When Azure Policy for AKS feature is enabled, [Open Policy Agent (OPA) Gatekeeper](https://kubernetes.io/blog/2019/08/06/opa-gatekeeper-policy-and-governance-for-kubernetes/) is deployed in the AKS cluster. OPA Gatekeeper is a policy engine for Kubernetes that allows you to enforce policies written using [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/), a high-level declarative language. These Pods are running in the **gatekeeper-system** namespace. @@ -246,17 +279,19 @@ These Pods are running in the **gatekeeper-system** namespace. kubectl get pods -n gatekeeper-system ``` -However, it is worth noting that the OPA Gatekeeper cannot be used outside of Azure Policy. If you want to implement a ConstraintTemplate, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. +Although OPA Gatekeepr is running in the cluster, it is worth noting that this OPA Gatekeeper cannot be used outside of Azure Policy. If you want to implement a well-known or commonly used [ConstraintTemplate](https://open-policy-agent.github.io/gatekeeper/website/docs/constrainttemplates/), you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. There are **azure-policy-\*** Pods running in the cluster that are responsible for listening to Azure Policy assignments, translating them to OPA Gatekeeper ConstraintTemplates, and reporting the results back up to Azure Policy. -As an example, let's try deploying a new ConstraintTemplate to the AKS cluster. +Let's illustrate this by attempting to deploy a commonly used ConstraintTemplate that limits container images to only those from approved container registries. Run the following command to attempt to deploy the ConstraintTemplate. ```bash kubectl apply -f https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml ``` -We can work around this by translating the ConstraintTemplate to an Azure Policy definition using the Azure Policy extension for Visual Studio Code. You can install the extension from the Visual Studio Code Marketplace [here](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension). +You will see a message "_This cluster is governed by Azure Policy. Policies must be created through Azure._" + +So we need to translate this ConstraintTemplate to an Azure Policy definition. Good news is that you can use the [Azure Policy extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) to help with this process. -Open Visual Studio Code and make sure the Azure Policy extension is installed. Using your keyboard, press **Ctrl+Shift+P** to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account. +Open VS Code and make sure the Azure Policy extension is installed. To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account. > [!NOTE] > If you see multiple sign-in options, choose the one that has `azure-account.login` next to it. @@ -266,44 +301,55 @@ Next, press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then > [!NOTE] > If you see multiple subscriptions, choose the one that has `azure-account.selectSubscriptions` next to it. -Open the terminal in Visual Studio Code and download the ConstraintTemplate file to your local machine then open the file in Visual Studio Code by running the following commands. +In VS Code, click the **Azure Policy** icon and you should see subscription resources being loaded. + +Using VS Code terminal, run the following command download the sample ConstraintTemplate file to your local machine. ```bash -wget https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml -code constrainttemplate.yaml +curl -o constrainttemplate.yaml https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml ``` -With the constrainttemplate.yaml file open in Visual Studio Code, press **Ctrl+Shift+P** and type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** then select the **Base64Encoded** option. +Open the file in VS Code and press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** and select the **Base64Encoded** option. This will generate a new Azure Policy definition in the JSON format. You will need to fill in details everywhere you see the text `/* EDIT HERE */`. For **apiGroups** field, you can use the value `[""]` to target all API groups and for the **kind** field, you can use the value `["Pod"]` to target Pods. -Here is what the JSON should look like: https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json +> [!NOTE] +> The extension process might take a few minutes to complete. If you cannot get the extension to generate that's okay, you will use a sample JSON file to create the policy definition in the next step. -Let's deploy the custom policy definition and assign it to the AKS cluster. +With the Azure Policy definition written, you can create the policy definition in the Azure Portal. Open this [link](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json) and copy the JSON to the clipboard. -Navigate to the Azure Portal and search for **Policy** in the search bar. +Navigate to the Azure Portal and type `policy` in the search bar. -Click on **Azure Policy** and then click on **Definitions** under the **Authoring** section. +Click on **Policy** under **Services**, then click on **Definitions** under the **Authoring** section. Click on **+ Policy definition** then enter the following details: - **Definition location**: Click the button next to the textbox, then select your subscription -- **Name**: Enter `AKS Approved registries only` -- **Description**: Enter `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` +- **Name**: `[AKS] Approved registries only` +- **Description**: `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` - **Category**: Click **Use existing** then select **Kubernetes** from the dropdown -- **Policy rule**: Copy and paste the JSON from the the sample policy definition file [here](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json) +- **Policy rule**: Paste the JSON you copied from the link above -Click **Save** +Click **Save** then click on **Assign policy** button. -Next, click on **Assign policy** button and for **Scope** you can optionally click the button next to the textbox, then select the resource group that contains the AKS cluster. +In the **Basics** tab, enter the following details: -Click **Next** then uncheck the **Only show parameters that need input or review** checkbox. This will enable you to change the **Effect** to **Deny**. +- **Scope**: Click the button next to the textbox, select the resource group that contains the AKS cluster, and don't forget to click **Select** +- Leave the rest of the fields as default -In the **Image registry** parameter, enter the value of `mcr.microsoft.com/` then click **Review + create**. +Click **Next** -Click the **Create** button to assign the policy to the AKS cluster. +In the **Parameters** enter the following details: -This can take up to 20 minutes to take effect. We will come back to this later. +- Uncheck the **Only show parameters that need input or review** checkbox +- **Effect**: `deny` +- **Namespace exclusions**: `["kube-system","gatekeeper-system","app-routing-system","azappconfig-system"]` +- **Image registry**: Enter your container registry URL, for example `mycontainerregistry.azureci.io/` + +Click **Review + create** then click **Create** + +> [!TIP] +> This policy assignment can take up to 20 minutes to take effect. You can try to speed up the policy scan with the following command: `az policy state trigger-scan --resource-group myresourcegroup --no-wait` For more information on how to create a policy definition from a ConstraintTemplate or MutationTemplate, refer to the following documentation links: @@ -311,6 +357,8 @@ For more information on how to create a policy definition from a ConstraintTempl - [Understand Azure Policy for Kubernetes clusters](https://learn.microsoft.com/azure/governance/policy/concepts/policy-for-kubernetes) - [OPA Gatekeeper Library](https://github.com/open-policy-agent/gatekeeper-library/) +Great job! You have successfully enforced custom policies in the AKS cluster. Once the policy assignment has taken effect, you can try deploying a pod with an image from an unapproved container registry to see the policy in action. + === ## Secrets and config management @@ -323,32 +371,76 @@ Azure Key Vault is a cloud service for securely storing and accessing secrets. A We can leverage these two services to store our application configurations and secrets and make them available to our workloads running in the AKS cluster. -Let's start by creating an Azure App Configuration store. +#### Azure Key Vault + +Run the following command to create an Azure Key Vault. + +```bash +KV_NAME=$(az keyvault create --name mykeyvault$RANDOM --resource-group myresourcegroup --query name -o tsv) +``` + +Assign yourself the **Key Vault Secrets Administrator** role to the Azure Key Vault. + +```bash +az role assignment create --role "Key Vault Administrator" --assignee $(az ad signed-in-user show --query id -o tsv) --scope $(az keyvault show --name $KV_NAME --query id -o tsv) +``` + +Run the following command to create a secret in the Azure Key Vault. + +```bash +az keyvault secret set --vault-name $KV_NAME --name MySecret1 --value MySecretValue1 +``` + +#### Azure App Configuration + +Run the following command to create an Azure App Configuration store. ```bash -AC_NAME=$(az appconfig create --name myAppConfig$RANDOM --resource-group myResourceGroup --query name -o tsv) +AC_NAME=$(az appconfig create --name myappconfig$RANDOM --resource-group myresourcegroup --assign-identity --query name -o tsv) ``` It's best practice to create a User-Assigned Managed Identity to access the Azure App Configuration store. This way, you can control the access to the store and ensure that only the workloads that need access to the configurations can access them. ```bash -AC_ID=$(az identity create --name $AC_NAME-identity --resource-group myResourceGroup --query id -o tsv) +AC_ID=$(az identity create --name $AC_NAME-id --resource-group myresourcegroup --query id -o tsv) +``` + +> [!KNOWLEDGE] +> You might be wondering why we are creating a User-Assigned Managed Identity for the Azure App Configuration store even though a system-assigned managed identity was created for it. The reason is that the system-assigned managed identity cannot be used for AKS Workload Identity as it does not support federated credentials. + +Create some sample key-value pairs in the Azure App Configuration store. + +```bash +az appconfig kv set --name $AC_NAME --key MyKey1 --value MyValue1 --yes +``` + +Now add a key vault reference to the Azure App Configuration store. + +```bash +az appconfig kv set-keyvault --name $AC_NAME --key MySecret1 --secret-identifier https://$KV_NAME.vault.azure.net/secrets/MySecret1 --yes +``` + +The Azure App Configuration store will have a reference to the secret in the Azure Key Vault and the intent is to use the user-assigned managed identity to access the secret in the key vault. However, this identity needs to be granted access to the key vault. Run the following command to allow the configuration store's managed identity to read secrets from the key vault. + +```bash +az role assignment create --role "Key Vault Secrets User" --assignee $(az identity show --id $AC_ID --query principalId -o tsv) --scope $(az keyvault show --name $KV_NAME --query id -o tsv) ``` -AKS offers an extension called the Azure App Configuratoin Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. +#### Azure App Configuration Provider for Kubernetes + +AKS offers an extension called the Azure App Configuration Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. ```bash az k8s-extension create \ --cluster-type managedClusters \ - --cluster-name myAKSCluster \ - --resource-group myResourceGroup \ + --cluster-name myakscluster \ + --resource-group myresourcegroup \ --name appconfigurationkubernetesprovider \ --extension-type Microsoft.AppConfiguration \ --auto-upgrade false \ --version 2.0.0 ``` -> [!NOTE] > This can take up to 5 minutes to complete. After the extension has been created, you can verify that the Pods are running. @@ -357,28 +449,20 @@ After the extension has been created, you can verify that the Pods are running. kubectl get pods -n azappconfig-system ``` -We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the AKS Service Connector. The AKS Service Connector is a managed service that allows you to connect your AKS cluster to other Azure services. It will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for autentication, creating the necesary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. +#### AKS Service Connector + +We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the AKS Service Connector. The AKS Service Connector is a managed service that allows you to connect your AKS cluster to other Azure services. It will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for authentication, creating the necessary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. ```bash -az aks connection create appconfig \ - --resource-group myResourceGroup \ - --name myAKSCluster \ - --tg myResourceGroup \ - --app-config $AC_NAME \ - --workload-identity $AC_ID +az aks connection create appconfig --kube-namespace dev --name myakscluster --resource-group myresourcegroup --target-resource-group myresourcegroup --app-config $AC_NAME --workload-identity $AC_ID --client-type none ``` -The Azure App Configuration Provider for Kubernetes extension also installed new Custom Resource Definitions (CRDs) which you can use to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. +> [!KNOWLEDGE] +> The AKS Service Connector is used here for the Azure App Configuration Provider for Kubernetes pods to use to authenticate with the Azure App Configuration store. The authentication is handled in a passwordless manner using AKS Workload Identity. The AKS Service Connector can also be used to connect your application pods to other Azure services like Azure Key Vault, Azure Storage, and Azure SQL Database, etc. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/quickstart-portal-aks-connection?tabs=UMI). -Before you deploy the sync configuration manifest, let's create some configurations that one of the applications will use. +#### Syncing configurations -```bash -az appconfig kv set --name $AC_NAME --key Key1 --value Value1 --yes -az appconfig kv set --name $AC_NAME --key Key2 --value Value2 --yes -az appconfig kv set --name $AC_NAME --key Key3 --value Value3 --yes -az appconfig kv set --name $AC_NAME --key Key4 --value Value4 --yes -az appconfig kv set --name $AC_NAME --key Key5 --value Value5 --yes -``` +The Azure App Configuration Provider for Kubernetes extension also installed new Custom Resource Definitions (CRDs) which you can use to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. We can now deploy a sync configuration manifest to sync the configurations from Azure App Configuration to Kubernetes ConfigMaps. But first we will need some values for the manifest. @@ -403,7 +487,7 @@ kubectl apply -n dev -f - < [!KNOWLEDGE] +> While syncing secrets from Azure App Configuration to Kubernetes Secrets is supported, it is not best to keep secrets in Kubernetes Secrets because Kubernetes Secrets are not encrypted but rather base64 encoded. If you need to authenticate with Azure services from your applications, it is best to use AKS Workload Identity and Microsoft Entra ID for passwordless authentication. + The app config sync is set to refresh every 10 seconds and you can choose which key to listen for changes. In this case, we are only listening for changes to the Key1 configuration. If you update the value for Key1 in the Azure App Configuration store, you should see the value updated in the Kubernetes ConfigMap after the next refresh. Run the following command to update the value for Key1 in the Azure App Configuration store. ```bash -az appconfig kv set --name $AC_NAME --key Key1 --value NewValue1 --yes +az appconfig kv set --name $AC_NAME --key MyKey1 --value MyNewValue1 --yes ``` After a minute or so, you can check to see if the configurations have been updated in the Kubernetes ConfigMap. ```bash -kubectl get cm -n dev myconfigmap -o jsonpath='{.data}' | jq +kubectl get cm -n dev myconfigmap -o jsonpath='{.data}' ``` -Great job! You have successfully synced configurations from Azure App Configuration to Kubernetes ConfigMaps. +Great job! You have successfully synced configurations from Azure App Configuration to Kubernetes ConfigMaps and Secrets. === ## Scaling -One key differentiator of Kubernetes is its ability to scale workloads. One key differentiator of Kubernetes in the cloud is its ability to scale nodes to handle workload scale out. This section aims to get AKS operators comfortable with managing AKS Node Autoprovision, implementing workload scheduling best practices and scaling workloads with KEDA. +One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of **Kubernetes in the cloud** is its ability to scale the node pool to handle more workloads to meet user demand. This section aims to get you comfortable with all the scaling capabilities of AKS Automatic and understand workload scheduling best practices. ### AKS Node Autoprovision -With AKS Automatic, the Node Autoprovision feature is enabled by default. This feature allows the AKS cluster to automatically scale the number of nodes in the cluster based on the workload requirements. The cluster will scale up when there are pending Pods that cannot be scheduled due to insufficient resources and scale down when there are nodes that are underutilized. It will also scale down and try to consolidate workloads to fewer nodes to save costs. This is something that you should account for when planning for high availability for your workloads. +With AKS Automatic, the [Node Autoprovision](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and has been [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. + +> [!NOTE] +> The term **Node Autoprovision** may be used interchangeably with **Karpenter** in this lab. + +The AKS Automatic cluster deploys a system node pool that will run all the system components; things that AKS Automatic will manage. As workloads are deployed to the cluster, Node Autoprovision will automatically scale up a new node on demand. As soon as you deploy an AKS Cluster, there are no user nodes running; just the system node pool. As you deploy workloads, the Node Autoprovision feature will automatically provision a new node to run the workload. Conversely, as you delete workloads, the Node Autoprovision feature will automatically scale down the number of nodes to save costs. But this means that pods will remain in pending state until the newly provisioned node is ready or the workloads will be disrupted as they are moved to other nodes during consolidation events. So you need to account for this when planning for high availability for your workloads. -AKS Node Autoprovision is built on top of the Karpenter project which was developed by friends at AWS and is now part of the CNCF. Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. There are a few key concepts to understand when working with Karpenter: +There are a few key Karpenter concepts to understand when working with Node Autoprovision. Let's start by understanding the following concepts: -- **NodeClasses**: A NodeClass is a set of constraints that define the type of node that should be provisioned. For example, you can define a NodeClass that specifies the type of VM, the region, the availability zone, and the maximum number of nodes that can be provisioned. -- **NodePool**: A NodePool is a set of nodes that are provisioned based on a NodeClass. You can have multiple NodePools in a cluster, each with its own set of constraints. -- **NodeClaims**: A NodeClaim is a request for a node that matches a set of constraints. When a NodeClaim is created, Karpenter will provision a node that matches the constraints specified in the NodeClaim. +- **NodeClasses**: A [NodeClass](https://karpenter.sh/docs/concepts/nodeclasses/) is a set of constraints that define the type of node that should be provisioned. For example, you can define a NodeClass that specifies the type of VM, the region, the availability zone, and the maximum number of nodes that can be provisioned. In AKS, a default AKSNodeClass is created for you which specifies the OS image (currently [AzureLinux](https://github.com/microsoft/azurelinux)), and OS disk size of 128GB. +- **NodePool**: A [NodePool](https://karpenter.sh/docs/concepts/nodepools/) is a set of nodes that are provisioned based on a NodeClass. You can have multiple NodePools in a cluster, each with its own set of constraints. In AKS Automatic, the default NodePool is created for you. You can create additional NodePools with specific constraints if you have workloads that require specific VM attributes. For examples of various NodePool constraints, see the the [examples](https://github.com/Azure/karpenter-provider-azure/tree/main/examples/v1beta1) in the Karpenter Azure provider repository. +- **NodeClaims**: A [NodeClaim](https://karpenter.sh/docs/concepts/nodeclaims/) is a request for a node that matches a set of constraints. When a NodeClaim is created, Karpenter will provision a node that matches the constraints specified in the NodeClaim, and thus, a VM is born! -In the AKS Automatic cluster, the default NodeClass and default NodePool are created for you. So you can start deploying workloads right away. The default NodeClass is fairly generic and should be able to handle most workloads. However, you can create additional NodePools with specific constraints if you have workloads that require specific VM attributes. +As mentioned above, the default NodeClass and default NodePool are created for you. So you can start deploying workloads right away. The default NodeClass is fairly generic and should be able to handle most workloads. You can view the default NodePool by running the following command. @@ -467,7 +573,7 @@ You can view the default NodePool by running the following command. kubectl get nodepools default -o yaml ``` -Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. +However, you may want to create additional NodePools with specific constraints if you have teams that need to deploy workloads that require specific VM attributes. Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. ```bash kubectl apply -f - < [!NOTE] +> Remember we created a new policy that only allows images from specified container registries. ```bash -ACR_NAME=$(az acr list --resource-group myResourceGroup --query "[0].name" -o tsv) +ACR_NAME=$(az acr list --resource-group myresourcegroup --query "[0].name" -o tsv) az acr import --name $ACR_NAME --source ghcr.io/azure-samples/aks-store-demo/product-service:1.5.2 --image product-service:1.5.2 ``` -Run the following command to create a Pod that tolerates the taint. +Run the following command to create a pod that tolerates the taint. ```bash kubectl apply -f - < [!IMPORTANT] +> The Vertical Pod Autoscaler will evict pods only if the number of replicas is greater than 1. Otherwise, it will not evict the pod. + +Once you see the pods being restarted, press **Ctrl+C** to exit the watch then run the following command to confirm the resource requests and limits have been set. + +```bash +kubectl describe po -n dev $(kubectl get pod -n dev -l app=product-service -o jsonpath='{.items[0].metadata.name}') | grep -i requests -A2 +``` + +With requests in place, the scheduler can make better decisions about where to place the pod. The Vertical Pod Autoscaler will also adjust the resource requests based on the pod's usage. + +#### Dealing with disruptions + +When deploying workloads to Kubernetes, it is important to ensure that your workloads are highly available and resilient to voluntary and involuntary disruptions. This is especially important when running workloads with Karpenter because nodes can be provisioned and deprovisioned automatically. There are a few best practices to follow to ensure that your workloads are highly available and resilient to disruptions. + +The first thing you can do is to set [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) for your workloads. PodDisruptionBudgets are used to ensure that a certain number of Pods are available during maintenance or disruptions. By setting PodDisruptionBudgets, you can ensure that your workloads are not abruptly terminated during maintenance or node scale down events. + +The YAML spec for a PodDisruptionBudget is relatively easy to write and understand. But if you are not sure of how to write one, you can use Microsoft Copilot for Azure to generate the YAML for you. + +Follow these steps to create a PodDisruptionBudget for the product-service running in the dev namespace. + +- Navigate to your AKS cluster in the Azure Portal. +- Under the **Kubernetes resources** section, click on **Workloads**, then click on the **+ Create** button to expand the dropdown. +- Click on the **Apply a YAML** button. Here you will be presented with a blank YAML editor. +- Put your cursor in the editor and press **Alt+I** to open the prompt dialog. +- In the textbox type the following text and click the **send** button. + ```text + create a pod disruption budget for the product-service running in the dev namespace to run at least 1 replica at all times + ``` +- You will see the YAML generated for you. Click **Apply** then the **Add** button to create the PodDisruptionBudget. + +Karpenter also supports [Consolidation Policies](https://karpenter.sh/docs/concepts/disruption/#consolidation) which are used to determine when to consolidate nodes. By default, Karpenter will consolidate nodes when they are underutilized. This means that Karpenter will automatically scale down nodes when they are not being used to save costs. You can also set [node disruption budgets](https://karpenter.sh/docs/concepts/disruption/#nodepool-disruption-budgets) in the NodePool manifest to specify the percentage of nodes that can be consolidated at a time. Lastly, if you want to simply prevent a pod or node from being disrupted, you can use the `karpenter.sh/do-not-disrupt: true` annotation at the [pod level](https://karpenter.sh/docs/concepts/disruption/#pod-level-controls) or at the [node level](https://karpenter.sh/docs/concepts/disruption/#node-level-controls). + +#### Pod affinity and anti-affinity + +Pod affinity and anti-affinity are used to influence the scheduling of Pods in a Kubernetes cluster. We saw an example of this earlier when we deployed a pod with node affinity and tolerations to ensure that the pod was scheduled on a node that matched the criteria. Pod anti-affinity is used to ensure that Pods are not scheduled on the same node. If you noticed, the product-service deployment included 3 replicas but they were all scheduled on the same node. + +You can confirm this by running the following command: + +```bash +kubectl get po -n dev -l app=product-service -o wide +``` + +If the node goes away for whatever reason, all 3 replicas will be disrupted. What's worst is if the node encountered a hardware failure, all 3 replicas will be disrupted at the same time even with the PodDisruptionBudget in place. To ensure that the replicas are scheduled on different nodes, you can set Pod anti-affinity rules. + +Run the following command to update the product-service deployment with Pod anti-affinity rules. + +```bash +kubectl apply -f - < [!KNOWLEDGE] +> Pod anti-affinity rules will spread the pods across different nodes, but there's no guarantee that the nodes will be in different availability zones. Pod topology spread constraints will ensure that the pods are spread across different availability zones as the topology key is set to `topology.kubernetes.io/zone`. The `maxSkew` field specifies the maximum difference between the number of pods in any two zones. The `whenUnsatisfiable` field specifies what action to take if the constraint cannot be satisfied. In this case, we set it to `DoNotSchedule` which means that the pod will not be scheduled if the constraint cannot be satisfied. More information on spreading pods across different zones can be found [here](https://learn.microsoft.com/azure/aks/aks-zone-resiliency#ensure-pods-are-spread-across-azs). + +Run the following command and you should start to see the nodes coming up in different availability zones. + +```bash +kubectl get nodes -o custom-columns=NAME:'{.metadata.name}',OS:'{.status.nodeInfo.osImage}',SKU:'{.metadata.labels.karpenter\.azure\.com/sku-name}',ZONE:'{.metadata.labels.topology\.kubernetes\.io/zone}' +``` + +Excellent! You have now know about some of the best practices for workload scheduling in Kubernetes to ensure that your workloads are scheduled efficiently and effectively. From d6af0ed00dfb7a006626da7d391dd8e460e42a62 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Wed, 23 Oct 2024 18:28:58 -0700 Subject: [PATCH 07/27] troubleshooting --- workshops/operating-aks-automatic/workshop.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index cab2b3e5..f0f64332 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -910,3 +910,21 @@ kubectl get nodes -o custom-columns=NAME:'{.metadata.name}',OS:'{.status.nodeInf ``` Excellent! You have now know about some of the best practices for workload scheduling in Kubernetes to ensure that your workloads are scheduled efficiently and effectively. + +## Troubleshooting + +Let’s face it. Applications will fail. Being able to quickly identify and mitigate issues is crucial and in this section, you will become familiar with troubleshooting tools and techniques and lean heavily on Azure Copilot to help uncover and solve problems. + +Be sure to check out the following resources for more information on troubleshooting AKS: + +- [Work with AKS clusters efficiently using Microsoft Copilot in Azure](https://learn.microsoft.com/azure/copilot/work-aks-clusters) +- [Azure Kubernetes Service (AKS) troubleshooting documentation](https://learn.microsoft.com/troubleshoot/azure/azure-kubernetes/welcome-azure-kubernetes) +- [Set up Advanced Network Observability for Azure Kubernetes Service (AKS)](https://learn.microsoft.com/azure/aks/advanced-network-observability-cli?tabs=cilium) + +## Conclusion + +Congratulations! You have completed the workshop on operating AKS Automatic. You have learned how to create an AKS Automatic cluster, enforce custom policies, sync configurations to the cluster, scale workloads, and apply best practices for workload scheduling. You have also learned how to troubleshoot issues in AKS. You are now well-equipped to operate AKS Automatic clusters and ensure that your workloads are running efficiently and effectively. + +This lab is also available at https://aka.ms/aks/labs along with others, so feel free to check them out. + +If you have any feedback or questions, please feel free to reach out to us at https://aka.ms/aks/feedback. From cfec6c472b9aa1b943b8e50cb6253d1020580e71 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Thu, 24 Oct 2024 07:01:38 -0700 Subject: [PATCH 08/27] small edits --- workshops/operating-aks-automatic/workshop.md | 378 ++++++++++-------- 1 file changed, 222 insertions(+), 156 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index f0f64332..90ae644b 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -18,23 +18,23 @@ wt_id: WT.mc_id=containers-153036-pauyu AKS Automatic is a new mode of operation for Azure Kubernetes Service (AKS) that simplifies cluster management, reduces manual tasks, and builds in enterprise-grade best practices and policy enforcement. This lab is meant to be a hands-on experience for Azure administrators and DevOps engineers looking to get started with AKS Automatic. You will learn how to automate many administrative tasks in Kubernetes and make it easier for development teams to deploy their apps while maintaining security and compliance. -Many of the features you will be working with in this workshop are in preview and may not be recommended for production workloads. However, the AKS engineering team is working hard to bring these features to general availability and will be great learning opportunities for you to understand options to support developers and streamline operations. This is not platform engineering, but it is a step in the right direction to automate many of the tasks that platform engineers do today. +Many of the features you will be working with in this workshop are in preview and may not be recommended for production workloads. However, the AKS engineering team is working hard to bring these features to general availability and will be great learning opportunities for you to understand options to support developers and streamline operations. This is not platform engineering, but it is a step in the right direction to automate many of the tasks that platform engineers do today. If you would like to learn more about platform engineering with AKS, please check out this [repo](https://github.com/azure-samples/aks-platform-engineering). ### Objectives By the end of this lab you will be able to: -- Administer user access to the AKS cluster -- Ensure security best practices with Azure Policy and Deployment Safeguards +- Manage user access to the AKS cluster +- Understand AKS Deployment Safeguards and how to enforce custom policies - Sync configurations to the cluster with Azure App Configuration Provider for Kubernetes - Leverage AKS Service Connector for passwordless integration with Azure services -- Appropriately scale workloads across nodes with AKS Node Autoprovision -- Review workload scheduling best practices +- Scale workloads across nodes with AKS Node Autoprovision +- Understand workload scheduling best practices - Troubleshoot workload failures with monitoring tools and Microsoft Copilot for Azure ### Prerequisites -The lab environment has been pre-configured for you with the following Azure resources: +The lab environment has been pre-configured for you with the following Azure resources in the resource group named **myresourcegroup**: - [AKS Automatic](https://learn.microsoft.com/azure/aks/intro-aks-automatic) cluster with monitoring enabled - [Azure Container Registry](https://learn.microsoft.com/azure/container-registry/container-registry-intro) @@ -53,7 +53,7 @@ You will also need the following tools: - [kubelogin](https://learn.microsoft.com/azure/aks/kubelogin-authentication) - [Helm](https://helm.sh/docs/intro/install/) -> [!NOTE] +> [!ALERT] > All command-line instructions in this lab should be executed in a Bash shell. If you are using Windows, you can use the Windows Subsystem for Linux (WSL) or Azure Cloud Shell. Before you get started, open a Bash shell and log in to your Azure subscription with the following command: @@ -87,31 +87,31 @@ Security above all else! The AKS Automatic cluster is configured with Azure Role ### Granting permissions to the AKS cluster -With Azure RBAC enabled on the AKS cluster granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will need to run the normal **az aks get-credentials** command to download the kubeconfig file, but when users attempt to execute kubectl commands against the cluster, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. +With [Azure RBAC for Kubernetes authorization](https://learn.microsoft.com/azure/aks/manage-azure-rbac?tabs=azure-cli) enabled on the AKS cluster granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will need to run the normal **az aks get-credentials** command to download the kubeconfig file, but when users attempt to execute commands against the Kubernetes API Server, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. -To grant permissions to the AKS cluster, you will need to assign a role. The following built-in roles for Azure-RBAC enabled clusters are available to assign to users. +To grant permissions to the AKS cluster, you will need to assign an Azure role. The following built-in roles are available for user assignment. - [Azure Kubernetes Service RBAC Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-admin) - [Azure Kubernetes Service RBAC Cluster Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-cluster-admin) - [Azure Kubernetes Service RBAC Reader](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-reader) - [Azure Kubernetes Service RBAC Writer](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-writer) -Using Azure Cloud Shell, run the following command to get the AKS cluster credentials +In your shell, run the following command to get the AKS cluster credentials ```bash az aks get-credentials --resource-group myresourcegroup --name myakscluster ``` -Create a namespace for the developer to use. +A Kubernetes [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) is a way to isolate resources in a cluster and is common practice to create namespaces for different teams or environments. Run the following command to create a namespace for the dev team to use. ```bash kubectl create namespace dev ``` -Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. You will need to follow the same login process you went through to login into your Azure subscription but since you've already logged in before, you simply need to click through the prompts (no need to re-enter passwords). After you have logged in, the command to create the namespace should be successful. +Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. Follow the same login process you went through to login into your Azure subscription. After you've successfully logged in, the command to create the namespace should be successful. -> [!KNOWLEDGE] -> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command: `az aks install-cli`. So if you run into an error when trying to log in, you may need to install the plugin. +> [!HINT] +> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command: **az aks install-cli**. So if you run into an error when trying to log in, you may need to install the plugin. Run the following command to get the AKS cluster's resource ID. @@ -122,7 +122,7 @@ AKS_ID=$(az aks show --resource-group myresourcegroup --name myakscluster --quer Run the following command to get the developer's user principal ID. ```bash -DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(User2).Username --query id --output tsv) +DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(Dev).Username --query id --output tsv) ``` Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. Scoping the permissions to the namespace ensures that the developer can only access the resources within the namespace and not the entire cluster. @@ -145,7 +145,7 @@ Now, run the following command to get the dev namespace. This trigger a new auth kubectl get namespace dev ``` -After logging in, head back to your terminal. You should see the **dev** namespace. +After logging in, head back to your terminal. You should see the **dev** namespace, its status and age. This means that the developer has the necessary permissions to access the **dev** namespace. Run the following command to check to see if the current user can create a pod in the **dev** namespace. @@ -166,20 +166,20 @@ helm repo add aks-store-demo https://azure-samples.github.io/aks-store-demo Run the following command to install the AKS Store Demo application in the **dev** namespace. ```bash -helm install demo aks-store-demo/aks-store-demo-chart --namespace dev +helm install demo aks-store-demo/aks-store-demo-chart --namespace dev --set aiService.create=true ``` -The helm install command should show a status of "deployed". This means that the application has successfully deployed in the **dev** namespace. But it will take a few minutes to deploy so let's move on. +The helm install command should show a status of "deployed". This means that the application has successfully deployed in the **dev** namespace. It will take a few minutes to deploy, so let's move on. -Finally, let's check to see if the developer can create a pod in the **default** namespace. +Finally, let's check to see if the developer can create a pod outside of their assigned namespace. Run the following command to test against the **default** namespace. ```bash kubectl auth can-i create pods --namespace default ``` -You should see the output **no**. This means the developer does not have the necessary permissions to create Pods in the default namespace. +You should see the output **no - User does not have access to the resource in Azure. Update role assignment to allow access**. This is exactly what we want to see. If you need to grant the user access to another namespace, you can simply assign the role to the user with the appropriate scope. Or if you need to grand a user access to the entire cluster, you can assign the role to the user with the scope of the AKS cluster and omit the namespace altogether. -Great job! You have successfully granted permissions to the AKS cluster. +Great job! You now know how to manage user access to the AKS cluster and how to scope permissions to specific namespaces. > [!IMPORTANT] > After testing the permissions, delete the developer user's cached credentials, then move the admin user's cached credentials back to the **~/.kube/cache/kubelogin** directory by running the following commands. @@ -191,9 +191,9 @@ mv ~/.kube/cache/*.json ~/.kube/cache/kubelogin/ ### Deployment Safeguards -As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature in AKS Automatic that helps enforce best practices and policies for your AKS clusters. It is implemented via [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview) and a set of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) is assigned to your AKS cluster to ensure that resources running within it are secure, compliant, and well-managed. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. +As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature that helps enforce best practices and policies for your AKS clusters. In AKS Automatic clusters it is enabled by default and is implemented using [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview). A group of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) is assigned to your cluster to monitor resources running within it are secure, compliant, and follows best practices. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. -The set of policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each policy description, the targeted resource, and the mutation that can be applied when the feature is set to **Enforcement** mode. AKS Automatic defaults to **Warning** mode which simply displays warnings in the terminal; however, when in Enforcement mode, polices will be strongly enforced by either mutating deployments to comply with the policies or denying deployments that violate policy. Therefore, it is important to understand the impact of each policy before enabling Enforcement mode. +The group of policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each policy description, the targeted resource, and the mutation that can be applied when the assignment is set to **Enforcement** mode. AKS Automatic defaults to **Warning** mode which simply displays warnings in the terminal as a gentle reminder to implement best practices. You may have seen Deployment Safeguards at work when you deployed the demo application using Helm. When Deployment Safeguards is in Enforcement mode, polices will be strongly enforced by either mutating deployments to comply with the policies or denying deployments that violate policy. Therefore, it is important to understand the impact of each policy before enabling Enforcement mode. Run the following command to deploy a pod without any best practices in place. @@ -211,7 +211,7 @@ Warning: [azurepolicy-k8sazurev1containerrestrictedi-88f886218244b623dd93] nginx pod/nginx created ``` -These warnings are here to help remind you of the best practices that should be followed when deploying Pods in the AKS cluster. There are warnings about not having a [livenessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request), [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes), [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits), and [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret). +These warnings are here to help remind you of the best practices that should be followed when deploying pods in the AKS cluster. There are warnings about not having a [livenessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-http-request), [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes), [resource limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits), and [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret). So let's try this again with some best practices in place. Run the following command to delete the pod that was just created. @@ -250,36 +250,41 @@ spec: port: 80 initialDelaySeconds: 3 periodSeconds: 3 - dnsPolicy: ClusterFirst - restartPolicy: Always -status: {} EOF ``` -The pod manifest is a bit more complex this time but it includes the following best practices: - -- resource limits and requests -- livenessProbe -- readinessProbe +This pod manifest is a bit more complex this time but it includes the following best practices that includes resource limits and requests as well as liveness and readiness probes. -> [!NOTE] +> [!HINT] > You should now see that we've satisfied all but one best practice, which we'll address later. -Nice work! Now you know where to expect warnings and how to address some of them. You can also view the compliance state of the cluster resources in the Azure Portal by navigating to the **Policy** blade. To get there, type `policy` in the search bar and click on **Policy** under **Services**. +You can also view the compliance state of the cluster resources in the Azure Portal. + +Head over to the Azure portal. In the search bar, type `policy` and click on **Policy** under **Services**. + +In the **Overview** section, you will see the **AKS Deployment Safeguards Policy Assignment** in the middle of the page. + +Click on the policy assignment to view the compliance state of the cluster resources. You should see some of the things that were displayed in the terminal output as not being compliant. -In the **Overview** section, you will see the **AKS Deployment Safeguards Policy Assignment**. Click on the policy assignment to view the compliance state of the cluster resources. +Nice work! Now you know where to expect warnings and how to address some of them. ### Custom policy enforcement -[Azure Policy for AKS](https://learn.microsoft.com/azure/aks/use-azure-policy) has been enabled when AKS Automatic assigned Deployment Safeguards policy initiative. This means you can also leverage other Azure Policy definitions (built-in or custom) to enforce organizational standards and compliance. When Azure Policy for AKS feature is enabled, [Open Policy Agent (OPA) Gatekeeper](https://kubernetes.io/blog/2019/08/06/opa-gatekeeper-policy-and-governance-for-kubernetes/) is deployed in the AKS cluster. OPA Gatekeeper is a policy engine for Kubernetes that allows you to enforce policies written using [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/), a high-level declarative language. +[Azure Policy add-on for AKS](https://learn.microsoft.com/azure/aks/use-azure-policy) has been enabled when AKS Automatic assigned Deployment Safeguards policy initiative. This means you can also leverage additional Azure Policy definitions (built-in or custom) to enforce organizational standards and compliance. When the Azure Policy for AKS feature is enabled, [Open Policy Agent (OPA) Gatekeeper](https://kubernetes.io/blog/2019/08/06/opa-gatekeeper-policy-and-governance-for-kubernetes/) is deployed in the AKS cluster. OPA Gatekeeper is a policy engine for Kubernetes that allows you to enforce policies written using [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/), a high-level declarative language. So when Azure policies are assigned to the AKS cluster, they are translated to OPA Gatekeeper [ConstraintTemplates](https://open-policy-agent.github.io/gatekeeper/website/docs/constrainttemplates/) and enforced in the cluster. -These Pods are running in the **gatekeeper-system** namespace. +The Gatekeeper pods are running in the **gatekeeper-system** namespace. ```bash kubectl get pods -n gatekeeper-system ``` -Although OPA Gatekeepr is running in the cluster, it is worth noting that this OPA Gatekeeper cannot be used outside of Azure Policy. If you want to implement a well-known or commonly used [ConstraintTemplate](https://open-policy-agent.github.io/gatekeeper/website/docs/constrainttemplates/), you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. There are **azure-policy-\*** Pods running in the cluster that are responsible for listening to Azure Policy assignments, translating them to OPA Gatekeeper ConstraintTemplates, and reporting the results back up to Azure Policy. +You can also view the ConstraintTemplates that are available in the cluster. + +```bash +kubectl get constrainttemplates +``` + +Although Gatekeepr is running in the cluster, it is worth noting that this Gatekeeper cannot be used outside of Azure Policy. That is, if you want to implement a well-known or commonly used ConstraintTemplates, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. From there **azure-policy-\*** pods running in the **kube-system** namespace listens for Azure Policy assignments, translates them to ConstraintTemplates, deploys the custom Constraints (cluster policy), and reports the cluster policy results back up to Azure Policy. Let's illustrate this by attempting to deploy a commonly used ConstraintTemplate that limits container images to only those from approved container registries. Run the following command to attempt to deploy the ConstraintTemplate. @@ -287,69 +292,77 @@ Let's illustrate this by attempting to deploy a commonly used ConstraintTemplate kubectl apply -f https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml ``` -You will see a message "_This cluster is governed by Azure Policy. Policies must be created through Azure._" - -So we need to translate this ConstraintTemplate to an Azure Policy definition. Good news is that you can use the [Azure Policy extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) to help with this process. +In the output you should see **This cluster is governed by Azure Policy. Policies must be created through Azure.** -Open VS Code and make sure the Azure Policy extension is installed. To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account. +So we need to translate this ConstraintTemplate to an Azure Policy definition and if you are unsure about how to translate ConstraintTemplates to Azure Policy JSON, the [Azure Policy extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) is available to help. -> [!NOTE] -> If you see multiple sign-in options, choose the one that has `azure-account.login` next to it. +Open VS Code. -Next, press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster. - -> [!NOTE] -> If you see multiple subscriptions, choose the one that has `azure-account.selectSubscriptions` next to it. +> [!ALERT] +> If you are on a Windows machine, make sure you have the [Remote - WSL](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-wsl) extension installed. -In VS Code, click the **Azure Policy** icon and you should see subscription resources being loaded. +Connect to WSL by pressing **Ctrl+Shift+P** on your keyboard and typing **WSL: Connect to WSL**. -Using VS Code terminal, run the following command download the sample ConstraintTemplate file to your local machine. +Make sure the Azure Policy extension is installed. If not, you can install it from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension). -```bash -curl -o constrainttemplate.yaml https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml -``` +To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account. -Open the file in VS Code and press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** and select the **Base64Encoded** option. +> [!HELP] +> If you see multiple sign-in options, choose the one that has **azure-account.login** next to it. -This will generate a new Azure Policy definition in the JSON format. You will need to fill in details everywhere you see the text `/* EDIT HERE */`. For **apiGroups** field, you can use the value `[""]` to target all API groups and for the **kind** field, you can use the value `["Pod"]` to target Pods. +Next, press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster. -> [!NOTE] -> The extension process might take a few minutes to complete. If you cannot get the extension to generate that's okay, you will use a sample JSON file to create the policy definition in the next step. +> [!HELP] +> If you see multiple subscriptions, choose the one that has **azure-account.selectSubscriptions** next to it. -With the Azure Policy definition written, you can create the policy definition in the Azure Portal. Open this [link](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json) and copy the JSON to the clipboard. +In VS Code, click the **Azure Policy** icon and you should see the subscription resources and policies panes being loaded. -Navigate to the Azure Portal and type `policy` in the search bar. +Open the VS Code terminal and run the following command download the sample ConstraintTemplate file to your local machine. -Click on **Policy** under **Services**, then click on **Definitions** under the **Authoring** section. +```bash +curl -o constrainttemplate.yaml https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml +``` -Click on **+ Policy definition** then enter the following details: +Open the constrainttemplate.yaml file in VS Code. -- **Definition location**: Click the button next to the textbox, then select your subscription -- **Name**: `[AKS] Approved registries only` -- **Description**: `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` -- **Category**: Click **Use existing** then select **Kubernetes** from the dropdown -- **Policy rule**: Paste the JSON you copied from the link above +The constraint template includes Rego code that enforces that all containers in the AKS cluster are sourced from approved container registries. The approved container registries are defined in the **registry** parameter and this is where you can specify the container registry URL. -Click **Save** then click on **Assign policy** button. +To convert this template to Azure Policy JSON, press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** and select the **Base64Encoded** option. -In the **Basics** tab, enter the following details: +This will generate a new Azure Policy definition in the JSON format. You will need to fill in details everywhere you see the text **/_ EDIT HERE _/**. For **apiGroups** field, you can use the value **[""]** to target all API groups and for the **kind** field, you can use the value **["Pod"]** to target pods. -- **Scope**: Click the button next to the textbox, select the resource group that contains the AKS cluster, and don't forget to click **Select** -- Leave the rest of the fields as default +> [!ALERT] +> The extension activation process might take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file to create the policy definition in the next step. -Click **Next** +The Azure Policy definition will need to be deployed using the Azure Portal. Run the following command download the sample ConstraintTemplate file to your local machine. -In the **Parameters** enter the following details: +```bash +curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json +``` -- Uncheck the **Only show parameters that need input or review** checkbox -- **Effect**: `deny` -- **Namespace exclusions**: `["kube-system","gatekeeper-system","app-routing-system","azappconfig-system"]` -- **Image registry**: Enter your container registry URL, for example `mycontainerregistry.azureci.io/` +To create the policy definition and assign it to the AKS cluster, follow these steps: -Click **Review + create** then click **Create** +- Open **constrainttemplate-as-policy.json** file and copy the JSON to the clipboard +- Navigate back to the Azure Policy blade in the Azure Portal +- Click on **Definitions** under the **Authoring** section +- Click on **+ Policy definition** then enter the following details: + - **Definition location**: Click the button next to the textbox, then select your subscription in the dropdown and click **Select** + - **Name**: `[AKS] Approved registries only` + - **Description**: `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` + - **Category**: Click the **Use existing** radio button then select **Kubernetes** from the dropdown + - **Policy rule**: Clear the existing content and paste the JSON you copied from the **constrainttemplate-as-policy.json** file +- Click **Save** then click on **Assign policy** button +- In the **Basics** tab, enter the following details: + - **Scope**: Click the button next to the textbox, select the **myresourcegroup** which contains the AKS cluster and click **Select** +- Click **Next** +- In the **Parameters** tab, enter the following details: + - Uncheck the **Only show parameters that need input or review** checkbox + - **Effect**: Select **deny** from the dropdown + - **Namespace exclusions**: Replace the existing content with `["kube-system","kube-node-lease","kube-public", "gatekeeper-system","app-routing-system","azappconfig-system","sc-system"]` + - **Image registry**: Enter your container registry URL, for example `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` +- Click **Review + create** then click **Create** -> [!TIP] -> This policy assignment can take up to 20 minutes to take effect. You can try to speed up the policy scan with the following command: `az policy state trigger-scan --resource-group myresourcegroup --no-wait` +Awesome! You have successfully enforced custom policies in the AKS cluster. Once the policy assignment has taken effect, you can try deploying a pod with an image from an unapproved container registry to see the policy in action. However, this policy assignment can take up to 15 minutes to take effect, so let's move on to the next section. For more information on how to create a policy definition from a ConstraintTemplate or MutationTemplate, refer to the following documentation links: @@ -357,21 +370,19 @@ For more information on how to create a policy definition from a ConstraintTempl - [Understand Azure Policy for Kubernetes clusters](https://learn.microsoft.com/azure/governance/policy/concepts/policy-for-kubernetes) - [OPA Gatekeeper Library](https://github.com/open-policy-agent/gatekeeper-library/) -Great job! You have successfully enforced custom policies in the AKS cluster. Once the policy assignment has taken effect, you can try deploying a pod with an image from an unapproved container registry to see the policy in action. - === ## Secrets and config management -Developers need a way to integrate their workloads with Azure services and make the configs available to their workloads in the cluster. They also need to ensure password-less authentication with Microsoft Entra ID is leveraged as much as possible. This section aims to get AKS operators comfortable with setting up a centralized configuration store, syncing configs to the cluster as Kubernetes ConfigMaps, and setting up connectors to integrate with other Azure services. +Developers need a way to integrate their workloads with Azure services and make the configs available to their workloads in the cluster. They also need to ensure password-less authentication with Microsoft Entra ID is leveraged as much as possible. This section aims to get you comfortable with setting up a centralized configuration store, syncing configs to the cluster as Kubernetes ConfigMaps, and setting up connectors to integrate with other Azure services. -### Syncing configurations to the cluster +### Sync app configs to the cluster -Azure Key Vault is a cloud service for securely storing and accessing secrets. A secret is anything that you want to tightly control access to, such as API keys, passwords, or certificates. Azure App Configuration is a managed service that helps developers centralize their application configurations. It provides a service to store, manage, and retrieve application settings and feature flags. +[Azure Key Vault](https://learn.microsoft.com/azure/key-vault/general/overview) is a cloud service for securely storing and accessing secrets. A secret is anything that you want to tightly control access to, such as API keys, passwords, or certificates. [Azure App Configuration](https://learn.microsoft.com/azure/azure-app-configuration/overview) is a managed service that helps developers centralize their application configurations. It provides a service to store, manage, and retrieve application settings and feature flags. You can also reference secrets stored in Azure Key Vault from Azure App Configuration. -We can leverage these two services to store our application configurations and secrets and make them available to our workloads running in the AKS cluster. +We can leverage these two services to store our application configurations and secrets and make them available to our workloads running in the AKS cluster using native Kubernetes resources; [ConfigMaps](https://kubernetes.io/docs/concepts/configuration/configmap/) and [Secrets](https://kubernetes.io/docs/concepts/configuration/secret/). -#### Azure Key Vault +#### Provision Azure resources Run the following command to create an Azure Key Vault. @@ -391,30 +402,25 @@ Run the following command to create a secret in the Azure Key Vault. az keyvault secret set --vault-name $KV_NAME --name MySecret1 --value MySecretValue1 ``` -#### Azure App Configuration - Run the following command to create an Azure App Configuration store. ```bash -AC_NAME=$(az appconfig create --name myappconfig$RANDOM --resource-group myresourcegroup --assign-identity --query name -o tsv) +AC_NAME=$(az appconfig create --name myappconfig$RANDOM --resource-group myresourcegroup --query name -o tsv) ``` -It's best practice to create a User-Assigned Managed Identity to access the Azure App Configuration store. This way, you can control the access to the store and ensure that only the workloads that need access to the configurations can access them. +It's best practice to create a User-Assigned Managed Identity to access Azure resources. This identity will be used to access only data in the Azure App Configuration store and the Azure Key Vault and nothing else. ```bash AC_ID=$(az identity create --name $AC_NAME-id --resource-group myresourcegroup --query id -o tsv) ``` -> [!KNOWLEDGE] -> You might be wondering why we are creating a User-Assigned Managed Identity for the Azure App Configuration store even though a system-assigned managed identity was created for it. The reason is that the system-assigned managed identity cannot be used for AKS Workload Identity as it does not support federated credentials. - -Create some sample key-value pairs in the Azure App Configuration store. +Create simple key-value pair in the Azure App Configuration store. ```bash az appconfig kv set --name $AC_NAME --key MyKey1 --value MyValue1 --yes ``` -Now add a key vault reference to the Azure App Configuration store. +Now add a key vault reference to the Azure App Configuration store. This will point to the secret that was created in the Azure Key Vault in the previous step. ```bash az appconfig kv set-keyvault --name $AC_NAME --key MySecret1 --secret-identifier https://$KV_NAME.vault.azure.net/secrets/MySecret1 --yes @@ -426,43 +432,49 @@ The Azure App Configuration store will have a reference to the secret in the Azu az role assignment create --role "Key Vault Secrets User" --assignee $(az identity show --id $AC_ID --query principalId -o tsv) --scope $(az keyvault show --name $KV_NAME --query id -o tsv) ``` +> [!TIP] +> You might be wondering "what about the role assignment for the Azure App Configuration store?" We'll get to that in the next section. + #### Azure App Configuration Provider for Kubernetes -AKS offers an extension called the Azure App Configuration Provider for Kubernetes that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. +AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. + +Run the following command to install the Azure App Configuration Provider for Kubernetes extension. ```bash -az k8s-extension create \ - --cluster-type managedClusters \ - --cluster-name myakscluster \ - --resource-group myresourcegroup \ - --name appconfigurationkubernetesprovider \ - --extension-type Microsoft.AppConfiguration \ - --auto-upgrade false \ - --version 2.0.0 +az k8s-extension create --cluster-type managedClusters --cluster-name myakscluster --resource-group myresourcegroup --name appconfigurationkubernetesprovider --extension-type Microsoft.AppConfiguration --auto-upgrade false --version 2.0.0 ``` +> [!ALERT] > This can take up to 5 minutes to complete. -After the extension has been created, you can verify that the Pods are running. +After the extension has been created, you can verify that the pods are running. ```bash kubectl get pods -n azappconfig-system ``` -#### AKS Service Connector +#### Passwordless authentication to Azure services + +We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the [AKS Service Connector](https://learn.microsoft.com/azure/service-connector/how-to-use-service-connector-in-aks). The AKS Service Connector will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for authentication, creating the necessary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. It makes it really simple to get your application pods connected to Azure services using [AKS Workload Identity](https://learn.microsoft.com/azure/aks/workload-identity-deploy-cluster). -We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the AKS Service Connector. The AKS Service Connector is a managed service that allows you to connect your AKS cluster to other Azure services. It will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for authentication, creating the necessary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. +Run the following command to create an AKS Service Connector to connect the AKS cluster to the Azure App Configuration store. ```bash az aks connection create appconfig --kube-namespace dev --name myakscluster --resource-group myresourcegroup --target-resource-group myresourcegroup --app-config $AC_NAME --workload-identity $AC_ID --client-type none ``` -> [!KNOWLEDGE] -> The AKS Service Connector is used here for the Azure App Configuration Provider for Kubernetes pods to use to authenticate with the Azure App Configuration store. The authentication is handled in a passwordless manner using AKS Workload Identity. The AKS Service Connector can also be used to connect your application pods to other Azure services like Azure Key Vault, Azure Storage, and Azure SQL Database, etc. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/quickstart-portal-aks-connection?tabs=UMI). +> [!ALERT] +> This can take up to 5 minutes to complete. -#### Syncing configurations +This command will create a service connector to allow pods in the **dev** namespace to connect to the Azure App Configuration store using the User-Assigned Managed Identity that was created earlier. The service connector will grant the User-Assigned Managed Identity the necessary permissions to access the Azure App Configuration store and configure a [federated credential](https://learn.microsoft.com/graph/api/resources/federatedidentitycredentials-overview?view=graph-rest-1.0) on the managed identity that will allow the Kubernetes [ServiceAccount](https://kubernetes.io/docs/concepts/security/service-accounts/) to authenticate via workload identity. This is a powerful feature that allows you to connect your application pods to Azure services without having to manage any credentials. -The Azure App Configuration Provider for Kubernetes extension also installed new Custom Resource Definitions (CRDs) which you can use to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. +> [!TIP] +> The AKS Service Connector can also be used to connect your application pods to many other Azure services that support Microsoft Entra ID authentication. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/overview#what-services-are-supported-by-service-connector). + +#### Config sync to Kubernetes + +The Azure App Configuration Provider for Kubernetes extension also installed new Kubernetes [Custom Resource Definitions (CRDs)](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) which you can use to sync configurations from the Azure App Configuration store to Kubernetes ConfigMaps and optionally Kubernetes Secrets. We can now deploy a sync configuration manifest to sync the configurations from Azure App Configuration to Kubernetes ConfigMaps. But first we will need some values for the manifest. @@ -472,7 +484,7 @@ Run the following command to get the Azure App Configuration store's endpoint. AC_ENDPOINT=$(az appconfig show -n $AC_NAME --query endpoint --output tsv) ``` -To connect to the Azure App Configuration store, it is best to use Workload Identity. The AKS Automatic cluster is already configured with Workload Identity, and you created the Azure App Configuration connection using the User-Assigned Managed Identity that you created earlier. The Service Connector created a Kubernetes Service Account that you can use to sync the configurations. +As mentioned above, we will use Workload Identity to connect to the Azure App Configuration store in a passwordless manner. The AKS Automatic cluster is already configured with Workload Identity, and the AKS Service Connector created a Kubernetes ServiceAccount that you can use to authenticate to the Azure App Configuration store and ultimately the Azure Key Vault. Run the following command to get the Kubernetes ServiceAccount name. @@ -548,22 +560,22 @@ Great job! You have successfully synced configurations from Azure App Configurat ## Scaling -One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of **Kubernetes in the cloud** is its ability to scale the node pool to handle more workloads to meet user demand. This section aims to get you comfortable with all the scaling capabilities of AKS Automatic and understand workload scheduling best practices. +One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of **Kubernetes in the cloud** is its ability to scale the node pool to handle more workloads to meet user demand. This section aims to get you comfortable with scaling capabilities of AKS Automatic and understand workload scheduling best practices. -### AKS Node Autoprovision +### Cluster autoscaling -With AKS Automatic, the [Node Autoprovision](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and has been [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. +With AKS Automatic, the [Node Autoprovision (NAP)](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default and will serve as the default cluster autoscaler. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. -> [!NOTE] -> The term **Node Autoprovision** may be used interchangeably with **Karpenter** in this lab. +> [!HINT] +> The term **Node Autoprovision (NAP)** may be used interchangeably with **Karpenter** in this lab. -The AKS Automatic cluster deploys a system node pool that will run all the system components; things that AKS Automatic will manage. As workloads are deployed to the cluster, Node Autoprovision will automatically scale up a new node on demand. As soon as you deploy an AKS Cluster, there are no user nodes running; just the system node pool. As you deploy workloads, the Node Autoprovision feature will automatically provision a new node to run the workload. Conversely, as you delete workloads, the Node Autoprovision feature will automatically scale down the number of nodes to save costs. But this means that pods will remain in pending state until the newly provisioned node is ready or the workloads will be disrupted as they are moved to other nodes during consolidation events. So you need to account for this when planning for high availability for your workloads. +The AKS Automatic cluster deploys a system node pool that will run all the system components; things that AKS Automatic will manage. As workloads are deployed to the cluster, NAP will automatically scale up a new node on demand. As soon as you deploy an AKS Cluster, there are no user nodes running; just the system node pool. As you deploy workloads, the NAP feature will automatically provision a new node to run the workload. Conversely, as you delete workloads, the NAP feature will automatically scale down the number of nodes to save costs. But this means that pods will remain in pending state until the newly provisioned node is ready or the workloads will be disrupted as they are moved to other nodes during consolidation events. So you need to account for this when planning for high availability for your workloads. -There are a few key Karpenter concepts to understand when working with Node Autoprovision. Let's start by understanding the following concepts: +There are a few key Karpenter concepts to understand when working with NAP. Let's start by understanding the following concepts: -- **NodeClasses**: A [NodeClass](https://karpenter.sh/docs/concepts/nodeclasses/) is a set of constraints that define the type of node that should be provisioned. For example, you can define a NodeClass that specifies the type of VM, the region, the availability zone, and the maximum number of nodes that can be provisioned. In AKS, a default AKSNodeClass is created for you which specifies the OS image (currently [AzureLinux](https://github.com/microsoft/azurelinux)), and OS disk size of 128GB. -- **NodePool**: A [NodePool](https://karpenter.sh/docs/concepts/nodepools/) is a set of nodes that are provisioned based on a NodeClass. You can have multiple NodePools in a cluster, each with its own set of constraints. In AKS Automatic, the default NodePool is created for you. You can create additional NodePools with specific constraints if you have workloads that require specific VM attributes. For examples of various NodePool constraints, see the the [examples](https://github.com/Azure/karpenter-provider-azure/tree/main/examples/v1beta1) in the Karpenter Azure provider repository. -- **NodeClaims**: A [NodeClaim](https://karpenter.sh/docs/concepts/nodeclaims/) is a request for a node that matches a set of constraints. When a NodeClaim is created, Karpenter will provision a node that matches the constraints specified in the NodeClaim, and thus, a VM is born! +- [NodeClass](https://karpenter.sh/docs/concepts/nodeclasses/) - set of constraints that define the type of node that should be provisioned. For example, you can define a NodeClass that specifies the type of VM, the region, the availability zone, and the maximum number of nodes that can be provisioned. In AKS, a default AKSNodeClass is created for you which specifies the OS image (currently [AzureLinux](https://github.com/microsoft/azurelinux)), and OS disk size of 128GB. +- [NodePool](https://karpenter.sh/docs/concepts/nodepools/) - set of nodes that are provisioned based on a NodeClass. You can have multiple NodePools in a cluster, each with its own set of constraints. In AKS Automatic, the default NodePool is created for you. You can create additional NodePools with specific constraints if you have workloads that require specific VM attributes. For examples of various NodePool constraints, see the the [examples](https://github.com/Azure/karpenter-provider-azure/tree/main/examples/v1beta1) in the Karpenter Azure provider repository. +- [NodeClaim](https://karpenter.sh/docs/concepts/nodeclaims/) - request for a node that matches a set of constraints. When a NodeClaim is created, Karpenter will provision a node that matches the constraints specified in the NodeClaim, and thus, a VM is born! As mentioned above, the default NodeClass and default NodePool are created for you. So you can start deploying workloads right away. The default NodeClass is fairly generic and should be able to handle most workloads. @@ -573,7 +585,9 @@ You can view the default NodePool by running the following command. kubectl get nodepools default -o yaml ``` -However, you may want to create additional NodePools with specific constraints if you have teams that need to deploy workloads that require specific VM attributes. Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. +However, you may want to create additional NodePools with specific constraints if you have teams that need to deploy workloads that require specific VM attributes. + +Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. ```bash kubectl apply -f - < [!NOTE] +> [!HINT] > Remember we created a new policy that only allows images from specified container registries. +Run the following command to get the name of the Azure Container Registry. + ```bash ACR_NAME=$(az acr list --resource-group myresourcegroup --query "[0].name" -o tsv) +``` + +Run the following command to import the product-service container image into the Azure Container Registry. + +```bash az acr import --name $ACR_NAME --source ghcr.io/azure-samples/aks-store-demo/product-service:1.5.2 --image product-service:1.5.2 ``` -Run the following command to create a pod that tolerates the taint. +Run the following command to replace the existing product-service pod with ones that tolerates the taint. This will ensure that the pod is scheduled on the dev NodePool. ```bash -kubectl apply -f - < [!IMPORTANT] -> The Vertical Pod Autoscaler will evict pods only if the number of replicas is greater than 1. Otherwise, it will not evict the pod. +> The VPA will evict pods only if the number of replicas is greater than 1. Otherwise, it will not evict the pod. Once you see the pods being restarted, press **Ctrl+C** to exit the watch then run the following command to confirm the resource requests and limits have been set. ```bash -kubectl describe po -n dev $(kubectl get pod -n dev -l app=product-service -o jsonpath='{.items[0].metadata.name}') | grep -i requests -A2 +kubectl describe po -n dev $(kubectl get pod -n dev -l app=product-service --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[0].metadata.name}') | grep -i requests -A2 ``` -With requests in place, the scheduler can make better decisions about where to place the pod. The Vertical Pod Autoscaler will also adjust the resource requests based on the pod's usage. +With requests in place, the scheduler can make better decisions about where to place the pod. The VPA will also adjust the resource requests based on the pod's usage. This is also especially important when using pod autocaling features like the Kubernetes [HorizontalPodAutoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) or [KEDA](https://keda.sh). + +> [!KNOWLEDGE] +> KEDA is the Kubernetes-based Event Driven Autoscaler. With KEDA, you can scale your workloads based on the number of events in a queue, the length of a stream, or any other custom metric. It won't be covered in this lab, but the KEDA add-on for AKS is enabled by default in AKS Automatic clusters and you can learn more about it [here](https://learn.microsoft.com/azure/aks/keda-about). #### Dealing with disruptions When deploying workloads to Kubernetes, it is important to ensure that your workloads are highly available and resilient to voluntary and involuntary disruptions. This is especially important when running workloads with Karpenter because nodes can be provisioned and deprovisioned automatically. There are a few best practices to follow to ensure that your workloads are highly available and resilient to disruptions. -The first thing you can do is to set [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) for your workloads. PodDisruptionBudgets are used to ensure that a certain number of Pods are available during maintenance or disruptions. By setting PodDisruptionBudgets, you can ensure that your workloads are not abruptly terminated during maintenance or node scale down events. +The first thing you can do is to set [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) for your workloads. PodDisruptionBudgets are used to ensure that a certain number of pods are available during maintenance or disruptions. By setting PodDisruptionBudgets, you can ensure that your workloads are not abruptly terminated during maintenance or node scale down events. -The YAML spec for a PodDisruptionBudget is relatively easy to write and understand. But if you are not sure of how to write one, you can use Microsoft Copilot for Azure to generate the YAML for you. +The YAML spec for a PodDisruptionBudget is relatively easy to write and understand. But if you are not sure of how to write one, you can use [Microsoft Copilot for Azure](https://learn.microsoft.com/azure/copilot/overview) to generate the YAML for you. Follow these steps to create a PodDisruptionBudget for the product-service running in the dev namespace. @@ -767,13 +797,20 @@ Follow these steps to create a PodDisruptionBudget for the product-service runni ```text create a pod disruption budget for the product-service running in the dev namespace to run at least 1 replica at all times ``` -- You will see the YAML generated for you. Click **Apply** then the **Add** button to create the PodDisruptionBudget. +- You will see the YAML generated for you +- Click **Accept** +- Click the **Dry-run** button to ensure the YAML is valid +- Click **Add** button + +Great! So now that you have a PodDisruptionBudget in place, you can be sure that at least one replica of the product-service will be available at all times. This is especially important when running workloads with Karpenter because it will try to consolidate as much as possible. -Karpenter also supports [Consolidation Policies](https://karpenter.sh/docs/concepts/disruption/#consolidation) which are used to determine when to consolidate nodes. By default, Karpenter will consolidate nodes when they are underutilized. This means that Karpenter will automatically scale down nodes when they are not being used to save costs. You can also set [node disruption budgets](https://karpenter.sh/docs/concepts/disruption/#nodepool-disruption-budgets) in the NodePool manifest to specify the percentage of nodes that can be consolidated at a time. Lastly, if you want to simply prevent a pod or node from being disrupted, you can use the `karpenter.sh/do-not-disrupt: true` annotation at the [pod level](https://karpenter.sh/docs/concepts/disruption/#pod-level-controls) or at the [node level](https://karpenter.sh/docs/concepts/disruption/#node-level-controls). +#### More Karpenter features -#### Pod affinity and anti-affinity +There are other ways to deal with Karpenter's desire to consolidate nodes and still maintain a healthy app. Karpenter also supports [Consolidation Policies](https://karpenter.sh/docs/concepts/disruption/#consolidation) which allows you to customize the consolidation behavior. You can also set [node disruption budgets](https://karpenter.sh/docs/concepts/disruption/#nodepool-disruption-budgets) in the NodePool manifest to specify the percentage of nodes that can be consolidated at a time. Lastly, if you want to simply prevent a pod or node from being disrupted, you can use the **karpenter.sh/do-not-disrupt: true** annotation at the [pod level](https://karpenter.sh/docs/concepts/disruption/#pod-level-controls) or at the [node level](https://karpenter.sh/docs/concepts/disruption/#node-level-controls). -Pod affinity and anti-affinity are used to influence the scheduling of Pods in a Kubernetes cluster. We saw an example of this earlier when we deployed a pod with node affinity and tolerations to ensure that the pod was scheduled on a node that matched the criteria. Pod anti-affinity is used to ensure that Pods are not scheduled on the same node. If you noticed, the product-service deployment included 3 replicas but they were all scheduled on the same node. +#### More on affinity and anti-affinity + +[Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) in Kubernetes is a way for you to influence the scheduling of pods in a Kubernetes cluster. We saw an example of this earlier when we deployed a pod with node affinity and tolerations to ensure that the pod was scheduled on a node that matched the criteria. Pod anti-affinity is used to ensure that pods are not scheduled on the same node. If you noticed, the product-service deployment included three replicas but they were all scheduled on the same node. You can confirm this by running the following command: @@ -781,12 +818,12 @@ You can confirm this by running the following command: kubectl get po -n dev -l app=product-service -o wide ``` -If the node goes away for whatever reason, all 3 replicas will be disrupted. What's worst is if the node encountered a hardware failure, all 3 replicas will be disrupted at the same time even with the PodDisruptionBudget in place. To ensure that the replicas are scheduled on different nodes, you can set Pod anti-affinity rules. +If a node hardware failure occurs, all three replicas will be disrupted at the same time even with the PodDisruptionBudget in place. To ensure that the replicas are spread across different nodes, you can set pods anti-affinity rules. -Run the following command to update the product-service deployment with Pod anti-affinity rules. +Run the following command to replace the product-service deployment with pod anti-affinity rules in place. ```bash -kubectl apply -f - < [!KNOWLEDGE] -> Pod anti-affinity rules will spread the pods across different nodes, but there's no guarantee that the nodes will be in different availability zones. Pod topology spread constraints will ensure that the pods are spread across different availability zones as the topology key is set to `topology.kubernetes.io/zone`. The `maxSkew` field specifies the maximum difference between the number of pods in any two zones. The `whenUnsatisfiable` field specifies what action to take if the constraint cannot be satisfied. In this case, we set it to `DoNotSchedule` which means that the pod will not be scheduled if the constraint cannot be satisfied. More information on spreading pods across different zones can be found [here](https://learn.microsoft.com/azure/aks/aks-zone-resiliency#ensure-pods-are-spread-across-azs). +Here, the pod topology spread constraints will ensure that the pods are spread across different availability zones as the topology key is set to **topology.kubernetes.io/zone**. The **maxSkew** field specifies the maximum difference between the number of pods in any two zones. The **whenUnsatisfiable** field specifies what action to take if the constraint cannot be satisfied. In this case, we set it to **DoNotSchedule** which means that the pod will not be scheduled if the constraint cannot be satisfied. More information on spreading pods across different zones can be found [here](https://learn.microsoft.com/azure/aks/aks-zone-resiliency#ensure-pods-are-spread-across-azs). Run the following command and you should start to see the nodes coming up in different availability zones. ```bash -kubectl get nodes -o custom-columns=NAME:'{.metadata.name}',OS:'{.status.nodeInfo.osImage}',SKU:'{.metadata.labels.karpenter\.azure\.com/sku-name}',ZONE:'{.metadata.labels.topology\.kubernetes\.io/zone}' +kubectl get nodes -l karpenter.sh/nodepool=devpool -o custom-columns=NAME:'{.metadata.name}',OS:'{.status.nodeInfo.osImage}',SKU:'{.metadata.labels.karpenter\.azure\.com/sku-name}',ZONE:'{.metadata.labels.topology\.kubernetes\.io/zone}' -w +``` + +Once you see the nodes coming up in different availability zones, press **Ctrl+C** to exit the watch. + +Now, run the following command to watch the pods and confirm that the replicas are scheduled across different availability zones. + +```bash +kubectl get po -n dev -l app=product-service -o wide ``` Excellent! You have now know about some of the best practices for workload scheduling in Kubernetes to ensure that your workloads are scheduled efficiently and effectively. From a1dfc5d483dea0630c5a981a72b03bb4c4962a76 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Fri, 25 Oct 2024 10:02:48 -0700 Subject: [PATCH 09/27] renaming sections and adding troubleshooting --- workshops/operating-aks-automatic/workshop.md | 233 +++++++++++++++--- 1 file changed, 194 insertions(+), 39 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 90ae644b..291fb355 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -81,7 +81,7 @@ You are now ready to get started with the lab. === -## Security +## Security and governance Security above all else! The AKS Automatic cluster is configured with Azure Role-Based Access Control (RBAC) authentication and authorization, Azure Policy, and Deployment Safeguards enabled out of the box. This section aims to get AKS operators comfortable with administering user access to the AKS cluster, ensuring security best practices with Azure Policy and Deployment Safeguards. @@ -99,7 +99,9 @@ To grant permissions to the AKS cluster, you will need to assign an Azure role. In your shell, run the following command to get the AKS cluster credentials ```bash -az aks get-credentials --resource-group myresourcegroup --name myakscluster +az aks get-credentials \ +--resource-group myresourcegroup \ +--name myakscluster ``` A Kubernetes [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) is a way to isolate resources in a cluster and is common practice to create namespaces for different teams or environments. Run the following command to create a namespace for the dev team to use. @@ -116,19 +118,29 @@ Since this is the first time you are running a kubectl command, you will be prom Run the following command to get the AKS cluster's resource ID. ```bash -AKS_ID=$(az aks show --resource-group myresourcegroup --name myakscluster --query id --output tsv) +AKS_ID=$(az aks show \ +--resource-group myresourcegroup \ +--name myakscluster \ +--query id \ +--output tsv) ``` Run the following command to get the developer's user principal ID. ```bash -DEV_USER_PRINCIPAL_ID=$(az ad user show --id @lab.CloudPortalCredential(Dev).Username --query id --output tsv) +DEV_USER_PRINCIPAL_ID=$(az ad user show \ +--id @lab.CloudPortalCredential(Dev).Username \ +--query id \ +--output tsv) ``` Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. Scoping the permissions to the namespace ensures that the developer can only access the resources within the namespace and not the entire cluster. ```bash -az role assignment create --role "Azure Kubernetes Service RBAC Writer" --assignee $DEV_USER_PRINCIPAL_ID --scope $AKS_ID/namespaces/dev +az role assignment create \ +--role "Azure Kubernetes Service RBAC Writer" \ +--assignee $DEV_USER_PRINCIPAL_ID \ +--scope $AKS_ID/namespaces/dev ``` When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stored the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to quickly test the permissions of a different user, we can simply move the JSON file to a different directory. @@ -166,7 +178,9 @@ helm repo add aks-store-demo https://azure-samples.github.io/aks-store-demo Run the following command to install the AKS Store Demo application in the **dev** namespace. ```bash -helm install demo aks-store-demo/aks-store-demo-chart --namespace dev --set aiService.create=true +helm install demo aks-store-demo/aks-store-demo-chart \ +--namespace dev \ +--set aiService.create=true ``` The helm install command should show a status of "deployed". This means that the application has successfully deployed in the **dev** namespace. It will take a few minutes to deploy, so let's move on. @@ -376,73 +390,107 @@ For more information on how to create a policy definition from a ConstraintTempl Developers need a way to integrate their workloads with Azure services and make the configs available to their workloads in the cluster. They also need to ensure password-less authentication with Microsoft Entra ID is leveraged as much as possible. This section aims to get you comfortable with setting up a centralized configuration store, syncing configs to the cluster as Kubernetes ConfigMaps, and setting up connectors to integrate with other Azure services. -### Sync app configs to the cluster - [Azure Key Vault](https://learn.microsoft.com/azure/key-vault/general/overview) is a cloud service for securely storing and accessing secrets. A secret is anything that you want to tightly control access to, such as API keys, passwords, or certificates. [Azure App Configuration](https://learn.microsoft.com/azure/azure-app-configuration/overview) is a managed service that helps developers centralize their application configurations. It provides a service to store, manage, and retrieve application settings and feature flags. You can also reference secrets stored in Azure Key Vault from Azure App Configuration. We can leverage these two services to store our application configurations and secrets and make them available to our workloads running in the AKS cluster using native Kubernetes resources; [ConfigMaps](https://kubernetes.io/docs/concepts/configuration/configmap/) and [Secrets](https://kubernetes.io/docs/concepts/configuration/secret/). -#### Provision Azure resources +### Provision Azure resources Run the following command to create an Azure Key Vault. ```bash -KV_NAME=$(az keyvault create --name mykeyvault$RANDOM --resource-group myresourcegroup --query name -o tsv) +KV_NAME=$(az keyvault create \ +--name mykeyvault$RANDOM \ +--resource-group myresourcegroup \ +--query name \ +--output tsv) ``` Assign yourself the **Key Vault Secrets Administrator** role to the Azure Key Vault. ```bash -az role assignment create --role "Key Vault Administrator" --assignee $(az ad signed-in-user show --query id -o tsv) --scope $(az keyvault show --name $KV_NAME --query id -o tsv) +az role assignment create \ +--role "Key Vault Administrator" \ +--assignee $(az ad signed-in-user show --query id -o tsv) \ +--scope $(az keyvault show --name $KV_NAME --query id -o tsv) ``` Run the following command to create a secret in the Azure Key Vault. ```bash -az keyvault secret set --vault-name $KV_NAME --name MySecret1 --value MySecretValue1 +az keyvault secret set \ +--vault-name $KV_NAME \ +--name MySecret1 \ +--value MySecretValue1 ``` Run the following command to create an Azure App Configuration store. ```bash -AC_NAME=$(az appconfig create --name myappconfig$RANDOM --resource-group myresourcegroup --query name -o tsv) +AC_NAME=$(az appconfig create \ +--name myappconfig$RANDOM \ +--resource-group myresourcegroup \ +--query name \ +--output tsv) ``` It's best practice to create a User-Assigned Managed Identity to access Azure resources. This identity will be used to access only data in the Azure App Configuration store and the Azure Key Vault and nothing else. ```bash -AC_ID=$(az identity create --name $AC_NAME-id --resource-group myresourcegroup --query id -o tsv) +AC_ID=$(az identity create \ +--name $AC_NAME-id \ +--resource-group myresourcegroup \ +--query id \ +--output tsv) ``` Create simple key-value pair in the Azure App Configuration store. ```bash -az appconfig kv set --name $AC_NAME --key MyKey1 --value MyValue1 --yes +az appconfig kv set \ +--name $AC_NAME \ +--key MyKey1 \ +--value MyValue1 \ +--yes ``` Now add a key vault reference to the Azure App Configuration store. This will point to the secret that was created in the Azure Key Vault in the previous step. ```bash -az appconfig kv set-keyvault --name $AC_NAME --key MySecret1 --secret-identifier https://$KV_NAME.vault.azure.net/secrets/MySecret1 --yes +az appconfig kv set-keyvault \ +--name $AC_NAME \ +--key MySecret1 \ +--secret-identifier https://$KV_NAME.vault.azure.net/secrets/MySecret1 \ +--yes ``` The Azure App Configuration store will have a reference to the secret in the Azure Key Vault and the intent is to use the user-assigned managed identity to access the secret in the key vault. However, this identity needs to be granted access to the key vault. Run the following command to allow the configuration store's managed identity to read secrets from the key vault. ```bash -az role assignment create --role "Key Vault Secrets User" --assignee $(az identity show --id $AC_ID --query principalId -o tsv) --scope $(az keyvault show --name $KV_NAME --query id -o tsv) +az role assignment create \ +--role "Key Vault Secrets User" \ +--assignee $(az identity show --id $AC_ID --query principalId --output tsv) \ +--scope $(az keyvault show --name $KV_NAME --query id --output tsv) ``` > [!TIP] > You might be wondering "what about the role assignment for the Azure App Configuration store?" We'll get to that in the next section. -#### Azure App Configuration Provider for Kubernetes +### Azure App Configuration Provider for Kubernetes AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. Run the following command to install the Azure App Configuration Provider for Kubernetes extension. ```bash -az k8s-extension create --cluster-type managedClusters --cluster-name myakscluster --resource-group myresourcegroup --name appconfigurationkubernetesprovider --extension-type Microsoft.AppConfiguration --auto-upgrade false --version 2.0.0 +az k8s-extension create \ +--cluster-type managedClusters \ +--cluster-name myakscluster \ +--resource-group myresourcegroup \ +--name appconfigurationkubernetesprovider \ +--extension-type Microsoft.AppConfiguration \ +--auto-upgrade false \ +--version 2.0.0 ``` > [!ALERT] @@ -454,14 +502,21 @@ After the extension has been created, you can verify that the pods are running. kubectl get pods -n azappconfig-system ``` -#### Passwordless authentication to Azure services +### Passwordless authentication to Azure services We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the [AKS Service Connector](https://learn.microsoft.com/azure/service-connector/how-to-use-service-connector-in-aks). The AKS Service Connector will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for authentication, creating the necessary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. It makes it really simple to get your application pods connected to Azure services using [AKS Workload Identity](https://learn.microsoft.com/azure/aks/workload-identity-deploy-cluster). Run the following command to create an AKS Service Connector to connect the AKS cluster to the Azure App Configuration store. ```bash -az aks connection create appconfig --kube-namespace dev --name myakscluster --resource-group myresourcegroup --target-resource-group myresourcegroup --app-config $AC_NAME --workload-identity $AC_ID --client-type none +az aks connection create appconfig \ +--kube-namespace dev \ +--name myakscluster \ +--resource-group myresourcegroup \ +--target-resource-group myresourcegroup \ +--app-config $AC_NAME \ +--workload-identity $AC_ID \ +--client-type none ``` > [!ALERT] @@ -472,7 +527,7 @@ This command will create a service connector to allow pods in the **dev** namesp > [!TIP] > The AKS Service Connector can also be used to connect your application pods to many other Azure services that support Microsoft Entra ID authentication. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/overview#what-services-are-supported-by-service-connector). -#### Config sync to Kubernetes +### Config sync to Kubernetes The Azure App Configuration Provider for Kubernetes extension also installed new Kubernetes [Custom Resource Definitions (CRDs)](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) which you can use to sync configurations from the Azure App Configuration store to Kubernetes ConfigMaps and optionally Kubernetes Secrets. @@ -481,7 +536,10 @@ We can now deploy a sync configuration manifest to sync the configurations from Run the following command to get the Azure App Configuration store's endpoint. ```bash -AC_ENDPOINT=$(az appconfig show -n $AC_NAME --query endpoint --output tsv) +AC_ENDPOINT=$(az appconfig show \ +--name $AC_NAME \ +--query endpoint \ +--output tsv) ``` As mentioned above, we will use Workload Identity to connect to the Azure App Configuration store in a passwordless manner. The AKS Automatic cluster is already configured with Workload Identity, and the AKS Service Connector created a Kubernetes ServiceAccount that you can use to authenticate to the Azure App Configuration store and ultimately the Azure Key Vault. @@ -545,7 +603,11 @@ The app config sync is set to refresh every 10 seconds and you can choose which Run the following command to update the value for Key1 in the Azure App Configuration store. ```bash -az appconfig kv set --name $AC_NAME --key MyKey1 --value MyNewValue1 --yes +az appconfig kv set \ +--name $AC_NAME \ +--key MyKey1 \ +--value MyNewValue1 \ +--yes ``` After a minute or so, you can check to see if the configurations have been updated in the Kubernetes ConfigMap. @@ -558,7 +620,7 @@ Great job! You have successfully synced configurations from Azure App Configurat === -## Scaling +## Scaling and workload scheduling One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of **Kubernetes in the cloud** is its ability to scale the node pool to handle more workloads to meet user demand. This section aims to get you comfortable with scaling capabilities of AKS Automatic and understand workload scheduling best practices. @@ -650,13 +712,19 @@ Now that the dev team has their own NodePool, you can try scheduling a pod that Run the following command to get the name of the Azure Container Registry. ```bash -ACR_NAME=$(az acr list --resource-group myresourcegroup --query "[0].name" -o tsv) +ACR_NAME=$(az acr list \ +--resource-group myresourcegroup \ +--query "[0].name" \ +--output tsv) ``` Run the following command to import the product-service container image into the Azure Container Registry. ```bash -az acr import --name $ACR_NAME --source ghcr.io/azure-samples/aks-store-demo/product-service:1.5.2 --image product-service:1.5.2 +az acr import \ +--name $ACR_NAME \ +--source ghcr.io/azure-samples/aks-store-demo/product-service:1.5.2 \ +--image product-service:1.5.2 ``` Run the following command to replace the existing product-service pod with ones that tolerates the taint. This will ensure that the pod is scheduled on the dev NodePool. @@ -720,17 +788,13 @@ Once you see the new node being provisioned, press **Ctrl+C** to exit the watch kubectl get po -n dev -l app=product-service -o wide ``` -Congrats! You have successfully created a new NodePool for your dev team and have the proper constraints in place to ensure that the right workloads are scheduled on the right nodes. - -### Workload scheduling best practices - With NAP, you can ensure that your dev teams have the right resources to run their workloads without having to worry about the underlying infrastructure. As demonstrated, you can create NodePools with specific constraints to handle different types of workloads. But it is important to remember that the workload manifests include the necessary scheduling attributes such as **nodeAffinity** and **tolerations** to ensure that the workloads are scheduled on the right nodes. Otherwise, they may be scheduled on the default NodePool which is fairly generic and welcomes all workloads. When deploying workloads to Kubernetes, it is important to follow best practices to ensure that your workloads are scheduled efficiently and effectively. This includes setting resource requests and limits, using PodDisruptionBudgets, and setting pod anti-affinity rules. Let's explore a few best practices for workload scheduling. -#### Resource requests +### Resource requests When deploying workloads to Kubernetes, it is important to set resource requests and limits. Resource requests are used by the scheduler to find the best node to place the pod on. Resource limits are used to prevent a pod from consuming more resources than it should. By setting resource requests and limits, you can ensure that your workloads are scheduled efficiently and effectively. We saw an example of this earlier when we deployed a pod with resource requests and limits after seeing warnings about not having them set. @@ -779,7 +843,7 @@ With requests in place, the scheduler can make better decisions about where to p > [!KNOWLEDGE] > KEDA is the Kubernetes-based Event Driven Autoscaler. With KEDA, you can scale your workloads based on the number of events in a queue, the length of a stream, or any other custom metric. It won't be covered in this lab, but the KEDA add-on for AKS is enabled by default in AKS Automatic clusters and you can learn more about it [here](https://learn.microsoft.com/azure/aks/keda-about). -#### Dealing with disruptions +### Dealing with disruptions When deploying workloads to Kubernetes, it is important to ensure that your workloads are highly available and resilient to voluntary and involuntary disruptions. This is especially important when running workloads with Karpenter because nodes can be provisioned and deprovisioned automatically. There are a few best practices to follow to ensure that your workloads are highly available and resilient to disruptions. @@ -804,11 +868,11 @@ Follow these steps to create a PodDisruptionBudget for the product-service runni Great! So now that you have a PodDisruptionBudget in place, you can be sure that at least one replica of the product-service will be available at all times. This is especially important when running workloads with Karpenter because it will try to consolidate as much as possible. -#### More Karpenter features +### More Karpenter features There are other ways to deal with Karpenter's desire to consolidate nodes and still maintain a healthy app. Karpenter also supports [Consolidation Policies](https://karpenter.sh/docs/concepts/disruption/#consolidation) which allows you to customize the consolidation behavior. You can also set [node disruption budgets](https://karpenter.sh/docs/concepts/disruption/#nodepool-disruption-budgets) in the NodePool manifest to specify the percentage of nodes that can be consolidated at a time. Lastly, if you want to simply prevent a pod or node from being disrupted, you can use the **karpenter.sh/do-not-disrupt: true** annotation at the [pod level](https://karpenter.sh/docs/concepts/disruption/#pod-level-controls) or at the [node level](https://karpenter.sh/docs/concepts/disruption/#node-level-controls). -#### More on affinity and anti-affinity +### More on affinity and anti-affinity [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) in Kubernetes is a way for you to influence the scheduling of pods in a Kubernetes cluster. We saw an example of this earlier when we deployed a pod with node affinity and tolerations to ensure that the pod was scheduled on a node that matched the criteria. Pod anti-affinity is used to ensure that pods are not scheduled on the same node. If you noticed, the product-service deployment included three replicas but they were all scheduled on the same node. @@ -878,7 +942,7 @@ kubectl get po -n dev -l app=product-service -o wide -w Once you see the pods being scheduled on different nodes, press **Ctrl+C** to exit the watch. -#### Pod topology spread constraints +### Pod topology spread constraints To take the concept of spreading pods across nodes even further, you can use [Pod topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/) on top of pod anti-affinity rules. Pod topology spread constraints are used to ensure that pods are spread across different fault domains such as [Azure availability zones](https://learn.microsoft.com/azure/reliability/availability-zones-overview?tabs=azure-cli). With AKS Automatic, one of its requirements is to ensure the region its being deployed to supports availability zones. Therefore, you can be sure that the nodes provisioned by Karpenter can be spread across different availability zones. @@ -977,16 +1041,107 @@ kubectl get po -n dev -l app=product-service -o wide Excellent! You have now know about some of the best practices for workload scheduling in Kubernetes to ensure that your workloads are scheduled efficiently and effectively. -## Troubleshooting +=== + +## Troubleshooting workloads + +Let's face it. Applications will fail. Being able to quickly identify and mitigate issues is crucial and in this section, you will become familiar with common kubernetes troubleshooting techniques and lean heavily on Microsoft Copilot for Azure to help uncover and solve problems. + +### Troubleshooting with Azure Copilot + +[Microsoft Copilot for Azure](https://learn.microsoft.com/azure/copilot/overview) is a tool built into the Azure portal that enables you to diagnose and troubleshoot issues. It is not only limited to AKS but you can use it to help troubleshoot any issues with your Azure resources. The Azure Copilot provides a guided experience to lead you through the troubleshooting process and helps you understand concepts by offering explanations, suggestions, and resource URLs to learn more. + +Use the Azure Copilot to help you find and fix issues with your workloads. + +In the Azure Portal, navigate to your AKS cluster and click on the Copilot button found at the top of the page. A panel will open on the right side of the screen and you will be presented with some suggested prompts. + +Ask the Copilot `how's the health of my pods?` + +You should be presented with a kubectl command that you can run to get the status of your pods. Click the **Yes** button to execute the command from the Run command page. + +In the Run command page, the kubectl command will be pre-populated in the textbox at the bottom of the page. Click on **Run** to execute the command. You will see the output of the kubectl command in the main panel. + +Scroll through the output and see if you can spot the issue. + +There is a problem with the ai-service pod. + +Ask the Copilot `I see the the ai-service pod in the dev namespace with crashloopbackoff status. What does that mean?` The Copilot should provide you with an explanation of what the crashloopbackoff status means and how to troubleshoot it. + +You were not specific with the pod name so the Copilot gave you a general command to run, so re-prompt the Copilot to restate the commands by giving it the exact pod name `The exact pod name is ai-service-xxxxx. What commands should I run again?` + +Some of the commands may include a **Run** button that can enable the Azure Cloud Shell, don't use this as you'd need to re-authenticate from within the Cloud Shell. Instead, copy the **kubectl describe** pod command and run it in the Run command window to get more information about the pod. The **kubectl describe** command will provide you with more information about the pod including the events that led to the crashloopbackoff status. You might get a little more information about the issue if you look through the pod logs. The Copilot should have also provided you with a **kubectl logs** command to get the logs of the pod. Run that command to get the logs. + +You should see that the ai-service pod is failing because it is missing environment variables that are required to connect to Azure OpenAI. Do you have an Azure OpenAI service running? If you are not sure, you can ask the Copilot `Do I have an Azure OpenAI service running?` + +The Copilot will provide you with an [Azure Resource Graph](https://learn.microsoft.com/azure/governance/resource-graph/overview) command and run it behind the scenes to determine if you have an Azure OpenAI service running. + +It should have determined there is no Azure OpenAI service running. + +You go back to your dev team and they tell you that they will need an Azure OpenAI service with the GPT-3.5 Turbo model to run the ai-service pod. + +Ask the Copilot `How do I create an Azure OpenAI service with the GPT-3.5 Turbo model?` -Let’s face it. Applications will fail. Being able to quickly identify and mitigate issues is crucial and in this section, you will become familiar with troubleshooting tools and techniques and lean heavily on Azure Copilot to help uncover and solve problems. +The instructions should be very close to what you need. You can either follow the instructions and/or reference the docs it replies with or you can run the following commands to quickly create an Azure OpenAI service account. -Be sure to check out the following resources for more information on troubleshooting AKS: +```bash +AI_NAME=$(az cognitiveservices account create \ +--name myaiservice$RANDOM \ +--resource-group myresourcegroup \ +--kind OpenAI \ +--sku S0 \ +--query name \ +--output tsv) +``` + +Next, run the following command to deploy a GPT-3.5 Turbo model. + +```bash +az cognitiveservices account deployment create \ +--name $AI_NAME \ +--resource-group myresourcegroup \ +--deployment-name gpt-35-turbo \ +--model-name gpt-35-turbo \ +--model-version "0301" \ +--model-format OpenAI \ +--sku-capacity 1 \ +--sku-name "Standard" +``` + +> [!IMPORTANT] +> The model version above may not be available in your region. You can the model availability [here](https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=python-secure#standard-deployment-model-availability) + +The dev team also tells you that the ai-service pod requires a ConfigMap named **ai-service-configs** with the following environment variables to connect to the Azure OpenAI service. + +- **AZURE_OPENAI_DEPLOYMENT_NAME** set to "gpt-35-turbo" +- **AZURE_OPENAI_ENDPOINT** set to the endpoint of the Azure OpenAI service +- **USE_AZURE_OPENAI** set to "True" + +Additionally the ai-service pod requires a Secret named **ai-service-secrets** with the following variable to authenticate to the Azure OpenAI service. + +- **OPENAI_API_KEY** set to the API key of the Azure OpenAI service + +Can you complete the rest of the steps to get the ai-service pod running? + +> [!HINT] +> You can put the environment variables in the Azure App Configuration store and sync them to the Kubernetes ConfigMap. You can then update the ai-service deployment to use the ConfigMap for the environment variables. + +How can you go about updating this to use passwordless authentication with AKS Workload Identity instead? + +> [!HINT] +> A complete walkthrough of the solution can be found [here](https://learn.microsoft.com/azure/aks/open-ai-secure-access-quickstart) + +### Troubleshooting with kubectl + +The Azure Copilot gave you some pretty good suggestions to start troubleshooting with kubectl. The **kubectl describe** command is a great way to get more information about a pod. You can also use the **kubectl logs** command to get the logs of a pod. One thing to note about using the **kubectl logs** command is that it only works for pods that are running. If the pod is in a crashloopbackoff status, you may not be able to get the logs of the pod that failed. In this case you can use the **--previous** flag to get the logs of the previous container that failed. + +Finally, be sure to checkout the [Troubleshooting Applications](https://kubernetes.io/docs/tasks/debug/debug-application/) guide found on the Kubernetes documentation site and the following resources for more information on troubleshooting AKS: - [Work with AKS clusters efficiently using Microsoft Copilot in Azure](https://learn.microsoft.com/azure/copilot/work-aks-clusters) - [Azure Kubernetes Service (AKS) troubleshooting documentation](https://learn.microsoft.com/troubleshoot/azure/azure-kubernetes/welcome-azure-kubernetes) - [Set up Advanced Network Observability for Azure Kubernetes Service (AKS)](https://learn.microsoft.com/azure/aks/advanced-network-observability-cli?tabs=cilium) +=== + ## Conclusion Congratulations! You have completed the workshop on operating AKS Automatic. You have learned how to create an AKS Automatic cluster, enforce custom policies, sync configurations to the cluster, scale workloads, and apply best practices for workload scheduling. You have also learned how to troubleshoot issues in AKS. You are now well-equipped to operate AKS Automatic clusters and ensure that your workloads are running efficiently and effectively. From fd997791ec62b76fbba602879e70d47c61aecff9 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 12:39:28 -0700 Subject: [PATCH 10/27] mapping markdown tags --- packages/website/src/app/workshop/workshop.ts | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index cc2db699..c08be9b6 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -33,6 +33,60 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P markdown = `

${fileContents.meta.title}

\n\n${markdown}`; } + // Replace the specific content for [!NOTE] + markdown = markdown.replace( + /> \[!NOTE]\n(> .*\n)*/g, + (match) => { + const contentWithoutNoteTag = match.replace(/> \[!NOTE]\n/, ''); + return `
\n\n${contentWithoutNoteTag}\n
\n`; + } + ); + + // Replace the specific content for [!HELP] + markdown = markdown.replace( + /> \[!HELP]\n(> .*\n)*/g, + (match) => { + const contentWithoutHelpTag = match.replace(/> \[!HELP]\n/, ''); + return `
\n\n${contentWithoutHelpTag}\n
\n`; + } + ); + + // Replace the specific content for [!ALERT] + markdown = markdown.replace( + /> \[!ALERT]\n(> .*\n)*/g, + (match) => { + const contentWithoutAlertTag = match.replace(/> \[!ALERT]\n/, ''); + return `
\n\n${contentWithoutAlertTag}\n
\n`; + } + ); + + // Replace the specific content for [!HINT] + markdown = markdown.replace( + /> \[!HINT]\n(> .*\n)*/g, + (match) => { + const contentWithoutHintTag = match.replace(/> \[!HINT]\n/, ''); + return `
\n\n${contentWithoutHintTag}\n
\n`; + } + ); + + // Replace the specific content for [!TIP] + markdown = markdown.replace( + /> \[!TIP]\n(> .*\n)*/g, + (match) => { + const contentWithoutTipTag = match.replace(/> \[!TIP]\n/, ''); + return `
\n\n${contentWithoutTipTag}\n
\n`; + } + ); + + // Replace the specific content for [!KNOWLEDGE] + markdown = markdown.replace( + /> \[!KNOWLEDGE]\n(> .*\n)*/g, + (match) => { + const contentWithoutKnowledgeTag = match.replace(/> \[!KNOWLEDGE]\n/, ''); + return `
\n\n${contentWithoutKnowledgeTag}\n
\n`; + } + ); + return { title, headings, markdown }; }); return { From 2c1dcb87df206980ceea3857f6d88fc03eb8953f Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 12:39:39 -0700 Subject: [PATCH 11/27] refactor markdown tag mapping --- packages/website/src/app/workshop/workshop.ts | 64 ++++--------------- 1 file changed, 12 insertions(+), 52 deletions(-) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index c08be9b6..9e24729e 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -33,59 +33,19 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P markdown = `

${fileContents.meta.title}

\n\n${markdown}`; } - // Replace the specific content for [!NOTE] - markdown = markdown.replace( - /> \[!NOTE]\n(> .*\n)*/g, - (match) => { - const contentWithoutNoteTag = match.replace(/> \[!NOTE]\n/, ''); - return `
\n\n${contentWithoutNoteTag}\n
\n`; - } - ); - - // Replace the specific content for [!HELP] - markdown = markdown.replace( - /> \[!HELP]\n(> .*\n)*/g, - (match) => { - const contentWithoutHelpTag = match.replace(/> \[!HELP]\n/, ''); - return `
\n\n${contentWithoutHelpTag}\n
\n`; - } - ); - - // Replace the specific content for [!ALERT] - markdown = markdown.replace( - /> \[!ALERT]\n(> .*\n)*/g, - (match) => { - const contentWithoutAlertTag = match.replace(/> \[!ALERT]\n/, ''); - return `
\n\n${contentWithoutAlertTag}\n
\n`; - } - ); - - // Replace the specific content for [!HINT] - markdown = markdown.replace( - /> \[!HINT]\n(> .*\n)*/g, - (match) => { - const contentWithoutHintTag = match.replace(/> \[!HINT]\n/, ''); - return `
\n\n${contentWithoutHintTag}\n
\n`; - } - ); - - // Replace the specific content for [!TIP] - markdown = markdown.replace( - /> \[!TIP]\n(> .*\n)*/g, - (match) => { - const contentWithoutTipTag = match.replace(/> \[!TIP]\n/, ''); - return `
\n\n${contentWithoutTipTag}\n
\n`; - } - ); + function replaceMarkdownTag(markdown: string, tag: string, className: string, dataTitle: string): string { + const regex = new RegExp(`> \\[!${tag}]\\n(> .*\\n)*`, 'g'); + return markdown.replace(regex, (match) => { + const contentWithoutTag = match.replace(new RegExp(`> \\[!${tag}]\\n`), ''); + return `
\n\n${contentWithoutTag}\n
\n`; + }); + } - // Replace the specific content for [!KNOWLEDGE] - markdown = markdown.replace( - /> \[!KNOWLEDGE]\n(> .*\n)*/g, - (match) => { - const contentWithoutKnowledgeTag = match.replace(/> \[!KNOWLEDGE]\n/, ''); - return `
\n\n${contentWithoutKnowledgeTag}\n
\n`; - } - ); + markdown = replaceMarkdownTag(markdown, 'KNOWLEDGE', 'tip', 'knowledge'); + markdown = replaceMarkdownTag(markdown, 'ALERT', 'important', 'alert'); + markdown = replaceMarkdownTag(markdown, 'NOTE', 'task', 'note'); + markdown = replaceMarkdownTag(markdown, 'HELP', 'warning', 'help'); + markdown = replaceMarkdownTag(markdown, 'HINT', 'tip', 'hint'); return { title, headings, markdown }; }); From a4632925fdf81d38719642c3b5c20b500ba51771 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 12:43:05 -0700 Subject: [PATCH 12/27] make markdown swap case insensitive --- packages/website/src/app/workshop/workshop.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index 9e24729e..a28f97e3 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -34,9 +34,9 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P } function replaceMarkdownTag(markdown: string, tag: string, className: string, dataTitle: string): string { - const regex = new RegExp(`> \\[!${tag}]\\n(> .*\\n)*`, 'g'); + const regex = new RegExp(`> \\[!${tag}]\\n(> .*\\n)*`, 'gi'); return markdown.replace(regex, (match) => { - const contentWithoutTag = match.replace(new RegExp(`> \\[!${tag}]\\n`), ''); + const contentWithoutTag = match.replace(new RegExp(`> \\[!${tag}]\\n`, 'i'), ''); return `
\n\n${contentWithoutTag}\n
\n`; }); } From a84eaea7251f032aba98196c57f3b350488824ac Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 13:16:13 -0700 Subject: [PATCH 13/27] security edits --- workshops/operating-aks-automatic/workshop.md | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 291fb355..9b7568df 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -42,7 +42,7 @@ The lab environment has been pre-configured for you with the following Azure res - [Azure Managed Prometheus](https://learn.microsoft.com/azure/azure-monitor/essentials/prometheus-metrics-overview) - [Azure Managed Grafana](https://learn.microsoft.com/azure/managed-grafana/overview) -> [!HELP] +> [!NOTE] > The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/aks.bicep) You will also need the following tools: @@ -64,7 +64,7 @@ az login --use-device-code You will be prompted to open a browser and log in with your Azure credentials. Copy the code that is displayed and paste it in the browser to authenticate. -You will also need to install the **aks-preview** and **k8s-extension** extensions to leverage preview features in AKS and install AKS extensions. +You will also need to install the **aks-preview** and **k8s-extension** extensions to leverage preview features in AKS and install AKS extensions. Run the following commands to install the extensions. ```bash az extension add --name aks-preview @@ -77,17 +77,17 @@ Finally set the default location for resources that you will create in this lab az configure --defaults location=$(az group show -n myresourcegroup --query location -o tsv) ``` -You are now ready to get started with the lab. +You are now ready to get started with the lab! === ## Security and governance -Security above all else! The AKS Automatic cluster is configured with Azure Role-Based Access Control (RBAC) authentication and authorization, Azure Policy, and Deployment Safeguards enabled out of the box. This section aims to get AKS operators comfortable with administering user access to the AKS cluster, ensuring security best practices with Azure Policy and Deployment Safeguards. +Being able to manage user access to the AKS cluster and enforce policies is critical to maintaining a secure and compliant environment. In this section, you will learn how to grant permissions to the AKS cluster, enforce policies with AKS Deployment Safeguards, and enforce custom policies with Azure Policy. ### Granting permissions to the AKS cluster -With [Azure RBAC for Kubernetes authorization](https://learn.microsoft.com/azure/aks/manage-azure-rbac?tabs=azure-cli) enabled on the AKS cluster granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will need to run the normal **az aks get-credentials** command to download the kubeconfig file, but when users attempt to execute commands against the Kubernetes API Server, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. +With [Azure RBAC for Kubernetes authorization](https://learn.microsoft.com/azure/aks/manage-azure-rbac?tabs=azure-cli) enabled on the AKS Automatic cluster, granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will run the normal **az aks get-credentials** command to download the [kubeconfig file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/), but when users attempt to execute commands against the Kubernetes API Server, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. To grant permissions to the AKS cluster, you will need to assign an Azure role. The following built-in roles are available for user assignment. @@ -96,7 +96,7 @@ To grant permissions to the AKS cluster, you will need to assign an Azure role. - [Azure Kubernetes Service RBAC Reader](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-reader) - [Azure Kubernetes Service RBAC Writer](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-writer) -In your shell, run the following command to get the AKS cluster credentials +In your shell, run the following command to get the AKS cluster credentials. ```bash az aks get-credentials \ @@ -104,16 +104,18 @@ az aks get-credentials \ --name myakscluster ``` -A Kubernetes [Namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) is a way to isolate resources in a cluster and is common practice to create namespaces for different teams or environments. Run the following command to create a namespace for the dev team to use. +A Kubernetes [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) is often used to isolate resources in a cluster and is common practice to create namespaces for different teams or environments. Run the following command to create a namespace for the dev team to use. ```bash kubectl create namespace dev ``` -Since this is the first time you are running a kubectl command, you will be prompted to log in against Microsoft Entra ID. Follow the same login process you went through to login into your Azure subscription. After you've successfully logged in, the command to create the namespace should be successful. +Since this is the first time you are running a [kubectl](https://kubernetes.io/docs/reference/kubectl/) command, you will be prompted to log in against Microsoft Entra ID. -> [!HINT] -> The kubelogin plugin is used to authenticate with Microsoft Entra ID and can be easily installed with the following command: **az aks install-cli**. So if you run into an error when trying to log in, you may need to install the plugin. +Follow the same login process you went through to login into your Azure subscription. After you've successfully logged in, the command to create the namespace should be successful. + +> [!HELP] +> If you run into an error when trying to log in, you may need to install the [kubelogin](https://github.com/Azure/kubelogin) plugin which is used to authenticate with Microsoft Entra ID. It can be easily installed with the following command: **az aks install-cli**. Run the following command to get the AKS cluster's resource ID. @@ -143,7 +145,7 @@ az role assignment create \ --scope $AKS_ID/namespaces/dev ``` -When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stored the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to quickly test the permissions of a different user, we can simply move the JSON file to a different directory. +When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stores the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to quickly test the permissions of a different user, we can simply move the JSON file to a different directory. Run the following command to move the cached credentials to its parent directory. @@ -151,21 +153,23 @@ Run the following command to move the cached credentials to its parent directory mv ~/.kube/cache/kubelogin/*.json ~/.kube/cache/ ``` -Now, run the following command to get the dev namespace. This trigger a new authentication prompt. Proceed to log in with the developer's user account. +Now, run the following command to get the dev namespace. ```bash kubectl get namespace dev ``` -After logging in, head back to your terminal. You should see the **dev** namespace, its status and age. This means that the developer has the necessary permissions to access the **dev** namespace. +Since there is no cached token in the kubelogin directory, this will trigger a new authentication prompt. Proceed to log in with the developer's user account. + +After logging in, head back to your terminal. You should see details of the **dev** namespace. This means that the dev user has the necessary permissions to access the **dev** namespace. -Run the following command to check to see if the current user can create a pod in the **dev** namespace. +Run the following command to check to see if the dev user can create a pod in the **dev** namespace. ```bash kubectl auth can-i create pods --namespace dev ``` -You should see the output **yes**. This means the developer has the necessary permissions to create pods in the **dev** namespace. +You should see the output **yes**. This means the dev user has the necessary permissions to create pods in the **dev** namespace. Let's put this to the test and deploy a sample application in the assigned namespace using Helm. @@ -175,7 +179,7 @@ Run the following command to add the Helm repository for the AKS Store Demo appl helm repo add aks-store-demo https://azure-samples.github.io/aks-store-demo ``` -Run the following command to install the AKS Store Demo application in the **dev** namespace. +Run the following command to install the [AKS Store Demo](https://github.com/azure-samples/aks-store-demo) application in the **dev** namespace. ```bash helm install demo aks-store-demo/aks-store-demo-chart \ @@ -185,17 +189,19 @@ helm install demo aks-store-demo/aks-store-demo-chart \ The helm install command should show a status of "deployed". This means that the application has successfully deployed in the **dev** namespace. It will take a few minutes to deploy, so let's move on. -Finally, let's check to see if the developer can create a pod outside of their assigned namespace. Run the following command to test against the **default** namespace. +Finally, check to see if the developer can create a pod outside of the dev namespace. Run the following command to test against the **default** namespace. ```bash kubectl auth can-i create pods --namespace default ``` -You should see the output **no - User does not have access to the resource in Azure. Update role assignment to allow access**. This is exactly what we want to see. If you need to grant the user access to another namespace, you can simply assign the role to the user with the appropriate scope. Or if you need to grand a user access to the entire cluster, you can assign the role to the user with the scope of the AKS cluster and omit the namespace altogether. +You should see the output **no - User does not have access to the resource in Azure. Update role assignment to allow access**. + +This is exactly what we want to see. If you need to grant the user access to another namespace, you can simply assign the role to the user with the appropriate scope. Or if you need to grand a user access to the entire cluster, you can assign the role to the user with the scope of the AKS cluster and omit the namespace altogether. Great job! You now know how to manage user access to the AKS cluster and how to scope permissions to specific namespaces. -> [!IMPORTANT] +> [!ALERT] > After testing the permissions, delete the developer user's cached credentials, then move the admin user's cached credentials back to the **~/.kube/cache/kubelogin** directory by running the following commands. ```bash @@ -473,7 +479,7 @@ az role assignment create \ --scope $(az keyvault show --name $KV_NAME --query id --output tsv) ``` -> [!TIP] +> [!HINT] > You might be wondering "what about the role assignment for the Azure App Configuration store?" We'll get to that in the next section. ### Azure App Configuration Provider for Kubernetes From a54e69fe033ceb5ad10005ccb200b347d626bc92 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 14:56:19 -0700 Subject: [PATCH 14/27] replacing lab cloud portal credentials --- packages/website/src/app/workshop/workshop.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index a28f97e3..c23c9fb8 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -47,6 +47,9 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P markdown = replaceMarkdownTag(markdown, 'HELP', 'warning', 'help'); markdown = replaceMarkdownTag(markdown, 'HINT', 'tip', 'hint'); + // Replace all occurrences of the @lab.CloudPortalCredential(*).Username markdown tag with a placeholder + markdown = markdown.replace(/@lab\.CloudPortalCredential\([^)]+\)\.Username/gi, ''); + return { title, headings, markdown }; }); return { From e830d01eb5a9f2d39176f48169107b2569c5d274 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 15:06:53 -0700 Subject: [PATCH 15/27] adding workshop setup files --- .../assets/files/appconfigprovider.yaml | 24 + .../files/constrainttemplate-as-policy.json | 138 ++++++ .../assets/files/constrainttemplate.yaml | 26 ++ .../assets/files/nginx.yaml | 27 ++ .../assets/files/nodepool.yaml | 40 ++ .../files/productservice-topologyspread.yaml | 48 ++ .../assets/files/productservice.yaml | 32 ++ .../assets/files/verticalpodautoscaler.yaml | 10 + .../assets/setup/bicep/aks.bicep | 413 ++++++++++++++++++ .../assets/setup/bicep/aks.bicepparam | 3 + .../assets/setup/scripts/postbuild.ps1 | 11 + .../assets/setup/scripts/prebuild.ps1 | 22 + 12 files changed, 794 insertions(+) create mode 100644 workshops/operating-aks-automatic/assets/files/appconfigprovider.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json create mode 100644 workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/nginx.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/nodepool.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/productservice-topologyspread.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/productservice.yaml create mode 100644 workshops/operating-aks-automatic/assets/files/verticalpodautoscaler.yaml create mode 100644 workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep create mode 100644 workshops/operating-aks-automatic/assets/setup/bicep/aks.bicepparam create mode 100644 workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 create mode 100644 workshops/operating-aks-automatic/assets/setup/scripts/prebuild.ps1 diff --git a/workshops/operating-aks-automatic/assets/files/appconfigprovider.yaml b/workshops/operating-aks-automatic/assets/files/appconfigprovider.yaml new file mode 100644 index 00000000..d56a9e25 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/appconfigprovider.yaml @@ -0,0 +1,24 @@ +apiVersion: azconfig.io/v1 +kind: AzureAppConfigurationProvider +metadata: + name: devconfigs +spec: + endpoint: ${AC_ENDPOINT} + configuration: + refresh: + enabled: true + interval: 10s + monitoring: + keyValues: + - key: MyKey1 + target: + configMapName: myconfigmap + auth: + workloadIdentity: + serviceAccountName: ${SA_NAME} + secret: + auth: + workloadIdentity: + serviceAccountName: ${SA_NAME} + target: + secretName: mysecret diff --git a/workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json b/workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json new file mode 100644 index 00000000..1aaf0385 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json @@ -0,0 +1,138 @@ +{ + "properties": { + "policyType": "Custom", + "mode": "Microsoft.Kubernetes.Data", + "displayName": "Approved registries only", + "description": "This policy requires that all containers in a Kubernetes cluster are sourced from approved container registries.", + "policyRule": { + "if": { + "field": "type", + "in": [ + "Microsoft.ContainerService/managedClusters" + ] + }, + "then": { + "effect": "[parameters('effect')]", + "details": { + "templateInfo": { + "sourceType": "Base64Encoded", + "content": "YXBpVmVyc2lvbjogdGVtcGxhdGVzLmdhdGVrZWVwZXIuc2gvdjFiZXRhMQpraW5kOiBDb25zdHJhaW50VGVtcGxhdGUKbWV0YWRhdGE6CiAgbmFtZTogazhzcmVxdWlyZWRyZWdpc3RyeQpzcGVjOgogIGNyZDoKICAgIHNwZWM6CiAgICAgIG5hbWVzOgogICAgICAgIGtpbmQ6IEs4c1JlcXVpcmVkUmVnaXN0cnkKICAgICAgdmFsaWRhdGlvbjoKICAgICAgICBvcGVuQVBJVjNTY2hlbWE6CiAgICAgICAgICBwcm9wZXJ0aWVzOgogICAgICAgICAgICByZWdpc3RyeToKICAgICAgICAgICAgICB0eXBlOiBzdHJpbmcKICB0YXJnZXRzOgogICAgLSB0YXJnZXQ6IGFkbWlzc2lvbi5rOHMuZ2F0ZWtlZXBlci5zaAogICAgICByZWdvOiB8CiAgICAgICAgcGFja2FnZSBrOHNyZXF1aXJlZHJlZ2lzdHJ5CiAgICAgICAgdmlvbGF0aW9uW3sibXNnIjogbXNnLCAiZGV0YWlscyI6IHsiUmVnaXN0cnkgbXVzdCBiZSI6IHJlcXVpcmVkfX1dIHsKICAgICAgICAgIGlucHV0LnJldmlldy5vYmplY3Qua2luZCA9PSAiUG9kIgogICAgICAgICAgc29tZSBpCiAgICAgICAgICBpbWFnZSA6PSBpbnB1dC5yZXZpZXcub2JqZWN0LnNwZWMuY29udGFpbmVyc1tpXS5pbWFnZQogICAgICAgICAgcmVxdWlyZWQgOj0gaW5wdXQucGFyYW1ldGVycy5yZWdpc3RyeQogICAgICAgICAgbm90IHN0YXJ0c3dpdGgoaW1hZ2UscmVxdWlyZWQpCiAgICAgICAgICBtc2cgOj0gc3ByaW50ZigiRm9yYmlkZGVuIHJlZ2lzdHJ5OiAldiIsIFtpbWFnZV0pCiAgICAgICAgfQo=" + }, + "apiGroups": [ + "" + ], + "kinds": [ + "Pod" + ], + "namespaces": "[parameters('namespaces')]", + "excludedNamespaces": "[parameters('excludedNamespaces')]", + "labelSelector": "[parameters('labelSelector')]", + "values": { + "registry": "[parameters('registry')]" + } + } + } + }, + "parameters": { + "effect": { + "type": "String", + "metadata": { + "displayName": "Effect", + "description": "'audit' allows a non-compliant resource to be created or updated, but flags it as non-compliant. 'deny' blocks the non-compliant resource creation or update. 'disabled' turns off the policy." + }, + "allowedValues": [ + "audit", + "deny", + "disabled" + ], + "defaultValue": "audit" + }, + "excludedNamespaces": { + "type": "Array", + "metadata": { + "displayName": "Namespace exclusions", + "description": "List of Kubernetes namespaces to exclude from policy evaluation." + }, + "defaultValue": [ + "kube-system", + "gatekeeper-system", + "azure-arc" + ] + }, + "namespaces": { + "type": "Array", + "metadata": { + "displayName": "Namespace inclusions", + "description": "List of Kubernetes namespaces to only include in policy evaluation. An empty list means the policy is applied to all resources in all namespaces." + }, + "defaultValue": [] + }, + "labelSelector": { + "type": "Object", + "metadata": { + "displayName": "Kubernetes label selector", + "description": "Label query to select Kubernetes resources for policy evaluation. An empty label selector matches all Kubernetes resources." + }, + "defaultValue": {}, + "schema": { + "description": "A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all resources.", + "type": "object", + "properties": { + "matchLabels": { + "description": "matchLabels is a map of {key,value} pairs.", + "type": "object", + "additionalProperties": { + "type": "string" + }, + "minProperties": 1 + }, + "matchExpressions": { + "description": "matchExpressions is a list of values, a key, and an operator.", + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "description": "key is the label key that the selector applies to.", + "type": "string" + }, + "operator": { + "description": "operator represents a key's relationship to a set of values.", + "type": "string", + "enum": [ + "In", + "NotIn", + "Exists", + "DoesNotExist" + ] + }, + "values": { + "description": "values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "key", + "operator" + ], + "additionalProperties": false + }, + "minItems": 1 + } + }, + "additionalProperties": false + } + }, + "registry": { + "type": "String", + "metadata": { + "displayName": "Image registry", + "description": "The container image registry to allow." + } + } + } + } +} \ No newline at end of file diff --git a/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml b/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml new file mode 100644 index 00000000..c506eb9c --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml @@ -0,0 +1,26 @@ +apiVersion: templates.gatekeeper.sh/v1beta1 +kind: ConstraintTemplate +metadata: + name: k8srequiredregistry +spec: + crd: + spec: + names: + kind: K8sRequiredRegistry + validation: + openAPIV3Schema: + properties: + registry: + type: string + targets: + - target: admission.k8s.gatekeeper.sh + rego: | + package k8srequiredregistry + violation[{"msg": msg, "details": {"Registry must be": required}}] { + input.review.object.kind == "Pod" + some i + image := input.review.object.spec.containers[i].image + required := input.parameters.registry + not startswith(image,required) + msg := sprintf("Forbidden registry: %v", [image]) + } diff --git a/workshops/operating-aks-automatic/assets/files/nginx.yaml b/workshops/operating-aks-automatic/assets/files/nginx.yaml new file mode 100644 index 00000000..d4627d0a --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/nginx.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + run: mynginx + name: mynginx +spec: + containers: + - image: nginx:latest + name: mynginx + resources: + limits: + cpu: 5m + memory: 4Mi + requests: + cpu: 3m + memory: 2Mi + livenessProbe: + tcpSocket: + port: 80 + initialDelaySeconds: 3 + periodSeconds: 3 + readinessProbe: + tcpSocket: + port: 80 + initialDelaySeconds: 3 + periodSeconds: 3 diff --git a/workshops/operating-aks-automatic/assets/files/nodepool.yaml b/workshops/operating-aks-automatic/assets/files/nodepool.yaml new file mode 100644 index 00000000..ab487a89 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/nodepool.yaml @@ -0,0 +1,40 @@ +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + annotations: + kubernetes.io/description: General purpose NodePool for dev workloads + name: devpool +spec: + disruption: + budgets: + - nodes: 100% + consolidationPolicy: WhenUnderutilized + expireAfter: Never + template: + metadata: + labels: + team: dev + spec: + nodeClassRef: + name: default + taints: + - key: team + value: dev + effect: NoSchedule + requirements: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + - key: kubernetes.io/os + operator: In + values: + - linux + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - key: karpenter.azure.com/sku-family + operator: In + values: + - D diff --git a/workshops/operating-aks-automatic/assets/files/productservice-topologyspread.yaml b/workshops/operating-aks-automatic/assets/files/productservice-topologyspread.yaml new file mode 100644 index 00000000..65194dfe --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/productservice-topologyspread.yaml @@ -0,0 +1,48 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: product-service + namespace: dev +spec: + replicas: 3 + selector: + matchLabels: + app: product-service + template: + metadata: + labels: + app: product-service + spec: + containers: + - name: product-service + image: ${ACR_NAME}.azurecr.io/product-service:1.5.2 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: team + operator: In + values: + - dev + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - product-service + topologyKey: "kubernetes.io/hostname" + tolerations: + - key: "team" + operator: "Equal" + value: "dev" + effect: "NoSchedule" + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: "topology.kubernetes.io/zone" + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app: product-service diff --git a/workshops/operating-aks-automatic/assets/files/productservice.yaml b/workshops/operating-aks-automatic/assets/files/productservice.yaml new file mode 100644 index 00000000..9a1593b2 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/productservice.yaml @@ -0,0 +1,32 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: product-service + namespace: dev +spec: + replicas: 3 + selector: + matchLabels: + app: product-service + template: + metadata: + labels: + app: product-service + spec: + containers: + - name: product-service + image: ${ACR_NAME}.azurecr.io/product-service:1.5.2 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: team + operator: In + values: + - dev + tolerations: + - key: "team" + operator: "Equal" + value: "dev" + effect: "NoSchedule" diff --git a/workshops/operating-aks-automatic/assets/files/verticalpodautoscaler.yaml b/workshops/operating-aks-automatic/assets/files/verticalpodautoscaler.yaml new file mode 100644 index 00000000..86d4b272 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/files/verticalpodautoscaler.yaml @@ -0,0 +1,10 @@ +apiVersion: "autoscaling.k8s.io/v1" +kind: VerticalPodAutoscaler +metadata: + name: product-service-vpa + namespace: dev +spec: + targetRef: + apiVersion: "apps/v1" + kind: Deployment + name: product-service diff --git a/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep new file mode 100644 index 00000000..babdd1b1 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep @@ -0,0 +1,413 @@ +@description('The basename of the resource.') +param nameSuffix string +// @description('The user object id for the cluster admin.') +// @secure() +// param userObjectId string + +resource logWorkspace 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { + name: 'mylogs${take(uniqueString(nameSuffix), 4)}' + location: resourceGroup().location + identity: { + type: 'SystemAssigned' + } + properties: { + sku: { + name: 'PerGB2018' + } + } +} + +resource metricsWorkspace 'Microsoft.Monitor/accounts@2023-04-03' = { + name: 'myprometheus${take(uniqueString(nameSuffix), 4)}' + location: resourceGroup().location +} + +resource grafanaDashboard 'Microsoft.Dashboard/grafana@2023-09-01' = { + name: 'mygrafana${take(uniqueString(nameSuffix), 4)}' + location: resourceGroup().location + sku: { + name: 'Standard' + } + identity: { + type: 'SystemAssigned' + } + properties: { + grafanaIntegrations: { + azureMonitorWorkspaceIntegrations: [ + { + azureMonitorWorkspaceResourceId: metricsWorkspace.id + } + ] + } + } +} + +// resource grafanaAdminRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { +// name: guid(subscription().id, resourceGroup().id, userObjectId, 'Grafana Admin') +// scope: grafanaDashboard +// properties: { +// principalId: userObjectId +// principalType: 'User' +// roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', '22926164-76b3-42b3-bc55-97df8dab3e41') +// } +// } + +resource aksCluster 'Microsoft.ContainerService/managedClusters@2024-03-02-preview' = { + name: 'myakscluster' + location: resourceGroup().location + sku: { + name: 'Automatic' + tier: 'Standard' + } + properties: { + agentPoolProfiles: [ + { + name: 'systempool' + count: 3 + osType: 'Linux' + mode: 'System' + } + ] + addonProfiles: { + omsagent: { + enabled: true + config: { + logAnalyticsWorkspaceResourceID: logWorkspace.id + useAADAuth: 'true' + } + } + } + azureMonitorProfile: { + metrics: { + enabled: true + kubeStateMetrics: { + metricLabelsAllowlist: '*' + metricAnnotationsAllowList: '*' + } + } + containerInsights: { + enabled: true + logAnalyticsWorkspaceResourceId: logWorkspace.id + } + } + } + identity: { + type: 'SystemAssigned' + } +} + +resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = { + name: 'mycontainerregistry${take(uniqueString(nameSuffix), 4)}' + location: resourceGroup().location + sku: { + name: 'Standard' + } + identity: { + type: 'SystemAssigned' + } +} + +resource containerRegistryRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(subscription().id, resourceGroup().id, containerRegistry.name, 'AcrPullRole') + scope: containerRegistry + properties: { + principalId: aksCluster.properties.identityProfile.kubeletIdentity.objectId + principalType: 'ServicePrincipal' + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d') + } +} + +// resource clusterAdminRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { +// name: guid(subscription().id, resourceGroup().id, userObjectId, 'Azure Kubernetes Service RBAC Cluster Admin') +// scope: aksCluster +// properties: { +// principalId: userObjectId +// principalType: 'User' +// roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', 'b1ff04bb-8a4e-4dc4-8eb5-8693973ce19b') +// } +// } + +resource dataCollectionEndpoint 'Microsoft.Insights/dataCollectionEndpoints@2022-06-01' = { + name: 'MSProm-${resourceGroup().location}-${aksCluster.name}' + location: resourceGroup().location + kind: 'Linux' + properties: { + description: 'Data Collection Endpoint for Prometheus' + } +} + +resource dataCollectionRuleAssociationEndpoint 'Microsoft.Insights/dataCollectionRuleAssociations@2022-06-01' = { + name: 'configurationAccessEndpoint' + scope: aksCluster + properties: { + dataCollectionEndpointId: dataCollectionEndpoint.id + } +} + +resource dataCollectionRuleMSCI 'Microsoft.Insights/dataCollectionRules@2022-06-01' = { + name: 'MSCI-${resourceGroup().location}-${aksCluster.name}' + location: resourceGroup().location + kind: 'Linux' + properties: { + dataSources: { + syslog: [] + extensions: [ + { + streams: [ + 'Microsoft-ContainerInsights-Group-Default' + ] + extensionName: 'ContainerInsights' + extensionSettings: { + dataCollectionSettings: { + interval: '1m' + namespaceFilteringMode: 'Off' + enableContainerLogV2: true + } + } + name: 'ContainerInsightsExtension' + } + ] + } + destinations: { + logAnalytics: [ + { + workspaceResourceId: logWorkspace.id + name: 'ciworkspace' + } + ] + } + dataFlows: [ + { + streams: [ + 'Microsoft-ContainerInsights-Group-Default' + ] + destinations: [ + 'ciworkspace' + ] + } + ] + } +} + +resource dataCollectionRuleAssociationMSCI 'Microsoft.Insights/dataCollectionRuleAssociations@2022-06-01' = { + name: 'MSCI-${resourceGroup().location}-${aksCluster.name}' + scope: aksCluster + properties: { + dataCollectionRuleId: dataCollectionRuleMSCI.id + } +} + +resource dataCollectionRuleMSProm 'Microsoft.Insights/dataCollectionRules@2022-06-01' = { + name: 'MSProm-${resourceGroup().location}-${aksCluster.name}' + location: resourceGroup().location + kind: 'Linux' + properties: { + dataCollectionEndpointId: dataCollectionEndpoint.id + dataSources: { + prometheusForwarder: [ + { + streams: [ + 'Microsoft-PrometheusMetrics' + ] + name: 'PrometheusDataSource' + } + ] + } + destinations: { + monitoringAccounts: [ + { + accountResourceId: metricsWorkspace.id + name: 'MonitoringAccount1' + } + ] + } + dataFlows: [ + { + streams: [ + 'Microsoft-PrometheusMetrics' + ] + destinations: [ + 'MonitoringAccount1' + ] + } + ] + } +} + +resource dataCollectionRuleAssociationMSProm 'Microsoft.Insights/dataCollectionRuleAssociations@2022-06-01' = { + name: 'MSProm-${resourceGroup().location}-${aksCluster.name}' + scope: aksCluster + properties: { + dataCollectionRuleId: dataCollectionRuleMSProm.id + } +} + +resource prometheusK8sRules 'Microsoft.AlertsManagement/prometheusRuleGroups@2023-09-01-preview' = { + name: 'KubernetesRecordingRulesRuleGroup - ${aksCluster.name}' + location: resourceGroup().location + properties: { + enabled: true + description: 'Kubernetes Recording Rules RuleGroup' + clusterName: aksCluster.name + scopes: [ + metricsWorkspace.id + aksCluster.id + ] + interval: 'PT1M' + rules: [ + { + record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate' + expression: 'sum by (cluster, namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="cadvisor", image!=""}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}))' + } + { + record: 'node_namespace_pod_container:container_memory_working_set_bytes' + expression: 'container_memory_working_set_bytes{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))' + } + { + record: 'node_namespace_pod_container:container_memory_rss' + expression: 'container_memory_rss{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))' + } + { + record: 'node_namespace_pod_container:container_memory_cache' + expression: 'container_memory_cache{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))' + } + { + record: 'node_namespace_pod_container:container_memory_swap' + expression: 'container_memory_swap{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))' + } + { + record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_requests' + expression: 'kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1))' + } + { + record: 'namespace_memory:kube_pod_container_resource_requests:sum' + expression: 'sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)))' + } + { + record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests' + expression: 'kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1))' + } + { + record: 'namespace_cpu:kube_pod_container_resource_requests:sum' + expression: 'sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)))' + } + { + record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_limits' + expression: 'kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~"Pending|Running"} == 1))' + } + { + record: 'namespace_memory:kube_pod_container_resource_limits:sum' + expression: 'sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)))' + } + { + record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits' + expression: 'kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) )' + } + { + record: 'namespace_cpu:kube_pod_container_resource_limits:sum' + expression: 'sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~"Pending|Running"} == 1)))' + } + { + record: 'namespace_workload_pod:kube_pod_owner:relabel' + expression: 'max by (cluster, namespace, workload, pod) ((label_replace(label_replace(kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, "replicaset", "$1", "owner_name", "(.*)") * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (1, max by (replicaset, namespace, owner_name) (kube_replicaset_owner{job="kube-state-metrics"})), "workload", "$1", "owner_name", "(.*)" )))' + labels: { + workload_type: 'deployment' + } + } + { + record: 'namespace_workload_pod:kube_pod_owner:relabel' + expression: 'max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, "workload", "$1", "owner_name", "(.*)")))' + labels: { + workload_type: 'daemonset' + } + } + { + record: 'namespace_workload_pod:kube_pod_owner:relabel' + expression: 'max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, "workload", "$1", "owner_name", "(.*)")))' + labels: { + workload_type: 'statefulset' + } + } + { + record: 'namespace_workload_pod:kube_pod_owner:relabel' + expression: 'max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, "workload", "$1", "owner_name", "(.*)")))' + labels: { + workload_type: 'job' + } + } + { + record: ':node_memory_MemAvailable_bytes:sum' + expression: 'sum(node_memory_MemAvailable_bytes{job="node"} or (node_memory_Buffers_bytes{job="node"} + node_memory_Cached_bytes{job="node"} + node_memory_MemFree_bytes{job="node"} + node_memory_Slab_bytes{job="node"})) by (cluster)' + } + { + record: 'cluster:node_cpu:ratio_rate5m' + expression: 'sum(rate(node_cpu_seconds_total{job="node",mode!="idle",mode!="iowait",mode!="steal"}[5m])) by (cluster) /count(sum(node_cpu_seconds_total{job="node"}) by (cluster, instance, cpu)) by (cluster)' + } + ] + } +} + +resource prometheusNodeRules 'Microsoft.AlertsManagement/prometheusRuleGroups@2023-09-01-preview' = { + name: 'NodeRecordingRulesRuleGroup - ${aksCluster.name}' + location: resourceGroup().location + properties: { + enabled: true + description: 'Node Recording Rules RuleGroup' + clusterName: aksCluster.name + scopes: [ + metricsWorkspace.id + aksCluster.id + ] + interval: 'PT1M' + rules: [ + { + record: 'instance:node_num_cpu:sum' + expression: 'count without (cpu, mode) (node_cpu_seconds_total{job="node",mode="idle"})' + } + { + record: 'instance:node_cpu_utilisation:rate5m' + expression: '1 - avg without (cpu) (sum without (mode) (rate(node_cpu_seconds_total{job="node", mode=~"idle|iowait|steal"}[5m])))' + } + { + record: 'instance:node_load1_per_cpu:ratio' + expression: '(node_load1{job="node"}/ instance:node_num_cpu:sum{job="node"})' + } + { + record: 'instance:node_memory_utilisation:ratio' + expression: '1 - ((node_memory_MemAvailable_bytes{job="node"} or (node_memory_Buffers_bytes{job="node"} + node_memory_Cached_bytes{job="node"} + node_memory_MemFree_bytes{job="node"} + node_memory_Slab_bytes{job="node"})) / node_memory_MemTotal_bytes{job="node"})' + } + { + record: 'instance:node_vmstat_pgmajfault:rate5m' + expression: 'rate(node_vmstat_pgmajfault{job="node"}[5m])' + } + { + record: 'instance_device:node_disk_io_time_seconds:rate5m' + expression: 'rate(node_disk_io_time_seconds_total{job="node", device!=""}[5m])' + } + { + record: 'instance_device:node_disk_io_time_weighted_seconds:rate5m' + expression: 'rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[5m])' + } + { + record: 'instance:node_network_receive_bytes_excluding_lo:rate5m' + expression: 'sum without (device) (rate(node_network_receive_bytes_total{job="node", device!="lo"}[5m]))' + } + { + record: 'instance:node_network_transmit_bytes_excluding_lo:rate5m' + expression: 'sum without (device) (rate(node_network_transmit_bytes_total{job="node", device!="lo"}[5m]))' + } + { + record: 'instance:node_network_receive_drop_excluding_lo:rate5m' + expression: 'sum without (device) (rate(node_network_receive_drop_total{job="node", device!="lo"}[5m]))' + } + { + record: 'instance:node_network_transmit_drop_excluding_lo:rate5m' + expression: 'sum without (device) (rate(node_network_transmit_drop_total{job="node", device!="lo"}[5m]))' + } + ] + } +} + +output grafanaId string = grafanaDashboard.id +output aksId string = aksCluster.id +output containerRegistryLoginServer string = '${containerRegistry.name}.azurecr.io' diff --git a/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicepparam b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicepparam new file mode 100644 index 00000000..84a9566c --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicepparam @@ -0,0 +1,3 @@ +using 'aks.bicep' +param nameSuffix = 'something' +// param userObjectId = '0000000-00000000-0000-00000000000' diff --git a/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 new file mode 100644 index 00000000..c2dffa3e --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 @@ -0,0 +1,11 @@ +New-AzRoleAssignment -SignInName '@lab.CloudPortalCredential(Admin).Username' -RoleDefinitionName 'Azure Kubernetes Service RBAC Cluster Admin' -Scope '@lab.CloudResourceTemplate(AKSAutomatic).Outputs[aksId]' +New-AzRoleAssignment -SignInName '@lab.CloudPortalCredential(Admin).Username' -RoleDefinitionName 'Grafana Admin' -Scope '@lab.CloudResourceTemplate(AKSAutomatic).Outputs[grafanaId]' + +Register-AzResourceProvider -ProviderNamespace "Microsoft.KeyVault" +Register-AzResourceProvider -ProviderNamespace "Microsoft.AppConfiguration" +Register-AzResourceProvider -ProviderNamespace "Microsoft.ServiceLinker" +Register-AzResourceProvider -ProviderNamespace "Microsoft.ContainerRegistry" +Register-AzResourceProvider -ProviderNamespace "Microsoft.KubernetesConfiguration" +Register-AzResourceProvider -ProviderNamespace "Microsoft.CognitiveServices" +Register-AzProviderFeature -FeatureName "EnableImageIntegrityPreview" -ProviderNamespace "Microsoft.ContainerService" +Register-AzProviderFeature -FeatureName "AKS-AzurePolicyExternalData" -ProviderNamespace "Microsoft.ContainerService" \ No newline at end of file diff --git a/workshops/operating-aks-automatic/assets/setup/scripts/prebuild.ps1 b/workshops/operating-aks-automatic/assets/setup/scripts/prebuild.ps1 new file mode 100644 index 00000000..8b2c2754 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/scripts/prebuild.ps1 @@ -0,0 +1,22 @@ +Register-AzResourceProvider -ProviderNamespace "Microsoft.Compute" +Register-AzProviderFeature -FeatureName "AutomaticSKUPreview" -ProviderNamespace "Microsoft.ContainerService" + +while ($true) { + $status = Get-AzResourceProvider -ProviderNamespace "Microsoft.Compute" + Write-Output "$($status[0].ProviderNamespace) is still $($status[0].RegistrationState) in $($status[0].Locations)" + if ($status[0].RegistrationState -eq "Registered") { + break + } + Start-Sleep -Seconds 5 +} + +while ($true) { + $status = Get-AzProviderFeature -FeatureName "AutomaticSKUPreview" -ProviderNamespace "Microsoft.ContainerService" + Write-Output "$($status.FeatureName) is still $($status.RegistrationState)" + if ($status.RegistrationState -eq "Registered") { + break + } + Start-Sleep -Seconds 5 +} + +Register-AzResourceProvider -ProviderNamespace "Microsoft.PolicyInsights" \ No newline at end of file From b2e983f0bc6454f3e25bd60d919c5491cc5edaac Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 15:14:58 -0700 Subject: [PATCH 16/27] azure policy updates --- workshops/operating-aks-automatic/workshop.md | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 9b7568df..6440897f 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -43,7 +43,7 @@ The lab environment has been pre-configured for you with the following Azure res - [Azure Managed Grafana](https://learn.microsoft.com/azure/managed-grafana/overview) > [!NOTE] -> The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/aks.bicep) +> The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep) You will also need the following tools: @@ -211,7 +211,7 @@ mv ~/.kube/cache/*.json ~/.kube/cache/kubelogin/ ### Deployment Safeguards -As you unleash your developers to deploy their applications in the AKS cluster, you want to ensure that they are following best practices and policies. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature that helps enforce best practices and policies for your AKS clusters. In AKS Automatic clusters it is enabled by default and is implemented using [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview). A group of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) is assigned to your cluster to monitor resources running within it are secure, compliant, and follows best practices. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. +Before you unleash developers to deploy applications in the AKS cluster, you likely want to ensure that they are following best practices. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature that helps enforce best practices and policies for your AKS clusters. It is implemented as an AKS add-on using [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview) and enabled by default on AKS Automatic clusters. Deployment Safeguards is basically a group of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) which is assigned to your cluster to monitor resources running within it are secure, compliant, and follows best practices. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. The group of policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each policy description, the targeted resource, and the mutation that can be applied when the assignment is set to **Enforcement** mode. AKS Automatic defaults to **Warning** mode which simply displays warnings in the terminal as a gentle reminder to implement best practices. You may have seen Deployment Safeguards at work when you deployed the demo application using Helm. When Deployment Safeguards is in Enforcement mode, polices will be strongly enforced by either mutating deployments to comply with the policies or denying deployments that violate policy. Therefore, it is important to understand the impact of each policy before enabling Enforcement mode. @@ -236,7 +236,7 @@ These warnings are here to help remind you of the best practices that should be So let's try this again with some best practices in place. Run the following command to delete the pod that was just created. ```bash -kubectl delete pod mynginx +kubectl delete pod mynginx --wait=false ``` Run the following command to redeploy the pod with some best practices in place. @@ -284,32 +284,34 @@ Head over to the Azure portal. In the search bar, type `policy` and click on **P In the **Overview** section, you will see the **AKS Deployment Safeguards Policy Assignment** in the middle of the page. -Click on the policy assignment to view the compliance state of the cluster resources. You should see some of the things that were displayed in the terminal output as not being compliant. +Click on the policy assignment to view the compliance state of the cluster resources. You should see some of the policy warnings that were displayed in the terminal output when you deployed the pod without best practices in place. -Nice work! Now you know where to expect warnings and how to address some of them. +Awesome! Now you know where to expect to see warnings both in the terminal and in the Azure and how to address some of these warnings by following best practices. ### Custom policy enforcement -[Azure Policy add-on for AKS](https://learn.microsoft.com/azure/aks/use-azure-policy) has been enabled when AKS Automatic assigned Deployment Safeguards policy initiative. This means you can also leverage additional Azure Policy definitions (built-in or custom) to enforce organizational standards and compliance. When the Azure Policy for AKS feature is enabled, [Open Policy Agent (OPA) Gatekeeper](https://kubernetes.io/blog/2019/08/06/opa-gatekeeper-policy-and-governance-for-kubernetes/) is deployed in the AKS cluster. OPA Gatekeeper is a policy engine for Kubernetes that allows you to enforce policies written using [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/), a high-level declarative language. So when Azure policies are assigned to the AKS cluster, they are translated to OPA Gatekeeper [ConstraintTemplates](https://open-policy-agent.github.io/gatekeeper/website/docs/constrainttemplates/) and enforced in the cluster. +As mentioned in the previous section, the [Azure Policy add-on for AKS](https://learn.microsoft.com/azure/aks/use-azure-policy) has been enabled when AKS Automatic is provisioned. This means you have everything you need leverage additional Azure Policy definitions (built-in or custom) to enforce organizational standards. When the Azure Policy for AKS feature is enabled, [Open Policy Agent (OPA) Gatekeeper](https://kubernetes.io/blog/2019/08/06/opa-gatekeeper-policy-and-governance-for-kubernetes/) is deployed in the AKS cluster. [Gatekeeper](https://open-policy-agent.github.io/gatekeeper) is a policy engine for Kubernetes that allows you to enforce policies written using [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/), a high-level declarative language. As Azure policies are assigned to the AKS cluster, they are translated to Gatekeeper [ConstraintTemplates](https://open-policy-agent.github.io/gatekeeper/website/docs/constrainttemplates/) and enforced in the cluster. -The Gatekeeper pods are running in the **gatekeeper-system** namespace. +The Gatekeeper pods are running in the **gatekeeper-system** namespace. Run the following command to view the pods. ```bash kubectl get pods -n gatekeeper-system ``` -You can also view the ConstraintTemplates that are available in the cluster. +You can also view the ConstraintTemplates that are available in the cluster. Run the following command to view the ConstraintTemplates which have been deployed via the Azure Policy add-on for AKS. ```bash kubectl get constrainttemplates ``` -Although Gatekeepr is running in the cluster, it is worth noting that this Gatekeeper cannot be used outside of Azure Policy. That is, if you want to implement a well-known or commonly used ConstraintTemplates, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. From there **azure-policy-\*** pods running in the **kube-system** namespace listens for Azure Policy assignments, translates them to ConstraintTemplates, deploys the custom Constraints (cluster policy), and reports the cluster policy results back up to Azure Policy. +Although Gatekeepr is running in the cluster, it is worth noting that this Gatekeeper cannot be used outside of Azure Policy. That is, if you want to implement a well-known or commonly used ConstraintTemplates, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. + +From there **azure-policy-\*** pods running in the **kube-system** namespace listens for Azure Policy assignments, translates them to ConstraintTemplates, deploys the custom Constraints (cluster policy), and reports the cluster policy results back up to Azure Policy. Let's illustrate this by attempting to deploy a commonly used ConstraintTemplate that limits container images to only those from approved container registries. Run the following command to attempt to deploy the ConstraintTemplate. ```bash -kubectl apply -f https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml +kubectl apply -f https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml ``` In the output you should see **This cluster is governed by Azure Policy. Policies must be created through Azure.** @@ -340,7 +342,7 @@ In VS Code, click the **Azure Policy** icon and you should see the subscription Open the VS Code terminal and run the following command download the sample ConstraintTemplate file to your local machine. ```bash -curl -o constrainttemplate.yaml https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate.yaml +curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml ``` Open the constrainttemplate.yaml file in VS Code. @@ -357,7 +359,7 @@ This will generate a new Azure Policy definition in the JSON format. You will ne The Azure Policy definition will need to be deployed using the Azure Portal. Run the following command download the sample ConstraintTemplate file to your local machine. ```bash -curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/pauldotyu/ignite/refs/heads/main/constrainttemplate-as-policy.json +curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/Azure-Samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json ``` To create the policy definition and assign it to the AKS cluster, follow these steps: From e53fdaab246e79d62c63074cd3c6d020e91de249 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 15:58:42 -0700 Subject: [PATCH 17/27] custom policy edits --- packages/website/src/app/workshop/workshop.ts | 3 + workshops/operating-aks-automatic/workshop.md | 69 +++++++++++-------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index c23c9fb8..a9423da8 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -50,6 +50,9 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P // Replace all occurrences of the @lab.CloudPortalCredential(*).Username markdown tag with a placeholder markdown = markdown.replace(/@lab\.CloudPortalCredential\([^)]+\)\.Username/gi, ''); + // Replace all occurences of the @lab.CloudResourceTemplate(*).Outputs[*] markdown + markdown = markdown.replace(/@lab\.CloudResourceTemplate\([^)]+\)\.Outputs\[[^\]]+\]/gi, ''); + return { title, headings, markdown }; }); return { diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 6440897f..8edb854e 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -286,7 +286,7 @@ In the **Overview** section, you will see the **AKS Deployment Safeguards Policy Click on the policy assignment to view the compliance state of the cluster resources. You should see some of the policy warnings that were displayed in the terminal output when you deployed the pod without best practices in place. -Awesome! Now you know where to expect to see warnings both in the terminal and in the Azure and how to address some of these warnings by following best practices. +Nice! Now you know where to expect to see warnings both in the terminal and in the Azure and how to address some of these warnings by following best practices. ### Custom policy enforcement @@ -318,71 +318,86 @@ In the output you should see **This cluster is governed by Azure Policy. Policie So we need to translate this ConstraintTemplate to an Azure Policy definition and if you are unsure about how to translate ConstraintTemplates to Azure Policy JSON, the [Azure Policy extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) is available to help. -Open VS Code. +#### Create a custom policy definition from a ConstraintTemplate -> [!ALERT] -> If you are on a Windows machine, make sure you have the [Remote - WSL](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-wsl) extension installed. - -Connect to WSL by pressing **Ctrl+Shift+P** on your keyboard and typing **WSL: Connect to WSL**. +Using the Azure Policy extension for Visual Studio Code, you can easily create a custom policy definition from a ConstraintTemplate. -Make sure the Azure Policy extension is installed. If not, you can install it from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension). - -To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account. +- Open VS Code and make sure the Azure Policy extension is installed. If not, you can install it from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) +- To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account > [!HELP] > If you see multiple sign-in options, choose the one that has **azure-account.login** next to it. -Next, press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster. +- Press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster > [!HELP] > If you see multiple subscriptions, choose the one that has **azure-account.selectSubscriptions** next to it. -In VS Code, click the **Azure Policy** icon and you should see the subscription resources and policies panes being loaded. - -Open the VS Code terminal and run the following command download the sample ConstraintTemplate file to your local machine. +- In VS Code sidebar, click the **Azure Policy** icon and you should see the subscription resources and policies panes being loaded +- Open the VS Code terminal and run the following command download the sample ConstraintTemplate file to your local machine ```bash curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/files/constrainttemplate.yaml ``` -Open the constrainttemplate.yaml file in VS Code. +- Open the constrainttemplate.yaml file in VS Code and take a look at the contents -The constraint template includes Rego code that enforces that all containers in the AKS cluster are sourced from approved container registries. The approved container registries are defined in the **registry** parameter and this is where you can specify the container registry URL. +> [!KNOWLEDGE] +> The constraint template includes Rego code on line 17 that enforces that all containers in the AKS cluster are sourced from approved container registries. The approved container registries can are defined in the **registry** parameter and this is where you can specify the container registry URL when implementing the ConstraintTemplate. + +- To convert this template to Azure Policy JSON, press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** -To convert this template to Azure Policy JSON, press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** and select the **Base64Encoded** option. +> [!HELP] +> The extension activation process can take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file to create the policy definition. -This will generate a new Azure Policy definition in the JSON format. You will need to fill in details everywhere you see the text **/_ EDIT HERE _/**. For **apiGroups** field, you can use the value **[""]** to target all API groups and for the **kind** field, you can use the value **["Pod"]** to target pods. +- Select the **Base64Encoded** option +- This will generate a new Azure Policy definition in the JSON format and encode the ConstraintTemplate in Base64 format -> [!ALERT] -> The extension activation process might take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file to create the policy definition in the next step. +> [!NOTE] +> The template info can also refer to a URL where the ConstraintTemplate is hosted. This is useful when you want to reference a ConstraintTemplate that is hosted in a public repository. + +- Fill in details where you see the text **/_ EDIT HERE _/** + - For **displayName** field use the value `Approved registries only` + - For **description** field use the value `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` + - For **apiGroups** field use the value `[""]` to target all API groups + - For the **kind** field use the value `["Pod"]` to target pods + +#### Deploy a custom policy definition + +With the custom policy rule written, you can now deploy it to Azure. -The Azure Policy definition will need to be deployed using the Azure Portal. Run the following command download the sample ConstraintTemplate file to your local machine. +- Open a terminal and run the following command to download the sample Azure Policy JSON file to your local machine ```bash curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/Azure-Samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/files/constrainttemplate-as-policy.json ``` -To create the policy definition and assign it to the AKS cluster, follow these steps: - - Open **constrainttemplate-as-policy.json** file and copy the JSON to the clipboard -- Navigate back to the Azure Policy blade in the Azure Portal +- Navigate back to the [Azure Policy blade](https://portal.azure.com/#view/Microsoft_Azure_Policy/PolicyMenuBlade/~/Overview) in the Azure Portal - Click on **Definitions** under the **Authoring** section - Click on **+ Policy definition** then enter the following details: - - **Definition location**: Click the button next to the textbox, then select your subscription in the dropdown and click **Select** + - **Definition location**: Click the button next to the textbox, then select your subscription in the dropdown and click the **Select** button at the bottom - **Name**: `[AKS] Approved registries only` - **Description**: `This policy requires that all containers in an AKS cluster are sourced from approved container registries.` - **Category**: Click the **Use existing** radio button then select **Kubernetes** from the dropdown - **Policy rule**: Clear the existing content and paste the JSON you copied from the **constrainttemplate-as-policy.json** file -- Click **Save** then click on **Assign policy** button +- Click **Save** at the bottom of the page + +#### Deploy a custom policy definition + +With the custom policy definition created, you can now assign it to the AKS cluster. + +- Click the **Assign policy** button - In the **Basics** tab, enter the following details: - - **Scope**: Click the button next to the textbox, select the **myresourcegroup** which contains the AKS cluster and click **Select** + - **Scope**: Click the button next to the textbox, select the **myresourcegroup** which contains the AKS cluster and click **Select** at the bottom - Click **Next** - In the **Parameters** tab, enter the following details: - Uncheck the **Only show parameters that need input or review** checkbox - **Effect**: Select **deny** from the dropdown - **Namespace exclusions**: Replace the existing content with `["kube-system","kube-node-lease","kube-public", "gatekeeper-system","app-routing-system","azappconfig-system","sc-system"]` - **Image registry**: Enter your container registry URL, for example `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` -- Click **Review + create** then click **Create** +- Click **Review + create** to review the policy assignment +- Click **Create** to assign the policy definition to the AKS cluster Awesome! You have successfully enforced custom policies in the AKS cluster. Once the policy assignment has taken effect, you can try deploying a pod with an image from an unapproved container registry to see the policy in action. However, this policy assignment can take up to 15 minutes to take effect, so let's move on to the next section. From f6f9a2928367c8d7c64c40a70b9d3d771e0d9682 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 16:52:34 -0700 Subject: [PATCH 18/27] config management --- .../assets/setup/scripts/postbuild.ps1 | 12 +++++++++++- workshops/operating-aks-automatic/workshop.md | 8 +++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 index c2dffa3e..20d5aed1 100644 --- a/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 +++ b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 @@ -8,4 +8,14 @@ Register-AzResourceProvider -ProviderNamespace "Microsoft.ContainerRegistry" Register-AzResourceProvider -ProviderNamespace "Microsoft.KubernetesConfiguration" Register-AzResourceProvider -ProviderNamespace "Microsoft.CognitiveServices" Register-AzProviderFeature -FeatureName "EnableImageIntegrityPreview" -ProviderNamespace "Microsoft.ContainerService" -Register-AzProviderFeature -FeatureName "AKS-AzurePolicyExternalData" -ProviderNamespace "Microsoft.ContainerService" \ No newline at end of file +Register-AzProviderFeature -FeatureName "AKS-AzurePolicyExternalData" -ProviderNamespace "Microsoft.ContainerService" + +# deploy app configuration extension +az k8s-extension create ` + --cluster-type managedClusters ` + --cluster-name myakscluster ` + --resource-group myresourcegroup ` + --name appconfigurationkubernetesprovider ` + --extension-type Microsoft.AppConfiguration ` + --auto-upgrade false ` + --version 2.0.0 diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 8edb854e..4a513c2e 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -411,7 +411,7 @@ For more information on how to create a policy definition from a ConstraintTempl ## Secrets and config management -Developers need a way to integrate their workloads with Azure services and make the configs available to their workloads in the cluster. They also need to ensure password-less authentication with Microsoft Entra ID is leveraged as much as possible. This section aims to get you comfortable with setting up a centralized configuration store, syncing configs to the cluster as Kubernetes ConfigMaps, and setting up connectors to integrate with other Azure services. +Azure service integration with developers requires access to configurations for their cluster workloads and leveraging password-less Microsoft Entra ID authentication wherever possible. This section guides you through centralizing configuration storage, syncing configs as Kubernetes ConfigMaps, and setting up connectors for workload integration with Azure services. [Azure Key Vault](https://learn.microsoft.com/azure/key-vault/general/overview) is a cloud service for securely storing and accessing secrets. A secret is anything that you want to tightly control access to, such as API keys, passwords, or certificates. [Azure App Configuration](https://learn.microsoft.com/azure/azure-app-configuration/overview) is a managed service that helps developers centralize their application configurations. It provides a service to store, manage, and retrieve application settings and feature flags. You can also reference secrets stored in Azure Key Vault from Azure App Configuration. @@ -457,7 +457,7 @@ AC_NAME=$(az appconfig create \ --output tsv) ``` -It's best practice to create a User-Assigned Managed Identity to access Azure resources. This identity will be used to access only data in the Azure App Configuration store and the Azure Key Vault and nothing else. +It's best practice to create a User-Assigned Managed Identity to access Azure resources. This identity will be used to access only the data in the Azure App Configuration store and the Azure Key Vault and nothing else. ```bash AC_ID=$(az identity create \ @@ -487,7 +487,9 @@ az appconfig kv set-keyvault \ --yes ``` -The Azure App Configuration store will have a reference to the secret in the Azure Key Vault and the intent is to use the user-assigned managed identity to access the secret in the key vault. However, this identity needs to be granted access to the key vault. Run the following command to allow the configuration store's managed identity to read secrets from the key vault. +The Azure App Configuration store will have a reference to the secret in the Azure Key Vault and the intent is to use the user-assigned managed identity to access the secret in the key vault. However this identity does not have the necessary permissions yet. + +Run the following command to allow the configuration store's managed identity to read secrets from the key vault. ```bash az role assignment create \ From 1f55aef0c9d850772da65c14463721cba844b07e Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 17:08:56 -0700 Subject: [PATCH 19/27] scaling --- workshops/operating-aks-automatic/workshop.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 4a513c2e..e040e0b8 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -647,16 +647,16 @@ Great job! You have successfully synced configurations from Azure App Configurat ## Scaling and workload scheduling -One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of **Kubernetes in the cloud** is its ability to scale the node pool to handle more workloads to meet user demand. This section aims to get you comfortable with scaling capabilities of AKS Automatic and understand workload scheduling best practices. +One key benefit of Kubernetes is its ability to scale workloads across a pool of nodes. One key differentiator of running Kubernetes in the cloud is its power to dynamically scale the entire node pool to accommodate more workloads and meet user demand. This guide aims to get you comfortable with the scaling capabilities of AKS Automatic and understand best practices for workload scheduling. ### Cluster autoscaling With AKS Automatic, the [Node Autoprovision (NAP)](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default and will serve as the default cluster autoscaler. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. -> [!HINT] +> [!NOTE] > The term **Node Autoprovision (NAP)** may be used interchangeably with **Karpenter** in this lab. -The AKS Automatic cluster deploys a system node pool that will run all the system components; things that AKS Automatic will manage. As workloads are deployed to the cluster, NAP will automatically scale up a new node on demand. As soon as you deploy an AKS Cluster, there are no user nodes running; just the system node pool. As you deploy workloads, the NAP feature will automatically provision a new node to run the workload. Conversely, as you delete workloads, the NAP feature will automatically scale down the number of nodes to save costs. But this means that pods will remain in pending state until the newly provisioned node is ready or the workloads will be disrupted as they are moved to other nodes during consolidation events. So you need to account for this when planning for high availability for your workloads. +When using an AKS Automatic cluster, a system node pool is automatically deployed to run essential components managed by AKS Automatic. As workloads are added or removed from the cluster, the Node Autoscaling (NAP) feature dynamically scales up or down the number of nodes to meet demand. Initially, only system nodes run in the cluster, but as workloads are deployed, NAP provisions new nodes to support them. Conversely, when workloads are deleted, NAP reduces the number of nodes to minimize costs. However, this process can leave pods in a pending state until a new node is available or cause disruptions during consolidation events. As a result, it’s essential to consider these factors when planning high availability for your workloads. There are a few key Karpenter concepts to understand when working with NAP. Let's start by understanding the following concepts: @@ -872,7 +872,7 @@ With requests in place, the scheduler can make better decisions about where to p When deploying workloads to Kubernetes, it is important to ensure that your workloads are highly available and resilient to voluntary and involuntary disruptions. This is especially important when running workloads with Karpenter because nodes can be provisioned and deprovisioned automatically. There are a few best practices to follow to ensure that your workloads are highly available and resilient to disruptions. -The first thing you can do is to set [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) for your workloads. PodDisruptionBudgets are used to ensure that a certain number of pods are available during maintenance or disruptions. By setting PodDisruptionBudgets, you can ensure that your workloads are not abruptly terminated during maintenance or node scale down events. +One thing you can do is to set [PodDisruptionBudgets](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets) for your workloads. PodDisruptionBudgets are used to ensure that a certain number of pods are available during maintenance or disruptions. By setting PodDisruptionBudgets, you can ensure that your workloads are not abruptly terminated during maintenance or node scale down events. The YAML spec for a PodDisruptionBudget is relatively easy to write and understand. But if you are not sure of how to write one, you can use [Microsoft Copilot for Azure](https://learn.microsoft.com/azure/copilot/overview) to generate the YAML for you. From d67d97a4d9881d557944df4faef0a8ede1c2066c Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 17:54:47 -0700 Subject: [PATCH 20/27] troubleshooting updates --- packages/website/src/app/workshop/workshop.ts | 10 ++--- workshops/operating-aks-automatic/workshop.md | 43 ++++++++++++++----- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/packages/website/src/app/workshop/workshop.ts b/packages/website/src/app/workshop/workshop.ts index a9423da8..566b0858 100644 --- a/packages/website/src/app/workshop/workshop.ts +++ b/packages/website/src/app/workshop/workshop.ts @@ -41,11 +41,11 @@ export async function loadWorkshop(repoPath: string, options?: LoaderOptions): P }); } - markdown = replaceMarkdownTag(markdown, 'KNOWLEDGE', 'tip', 'knowledge'); - markdown = replaceMarkdownTag(markdown, 'ALERT', 'important', 'alert'); - markdown = replaceMarkdownTag(markdown, 'NOTE', 'task', 'note'); - markdown = replaceMarkdownTag(markdown, 'HELP', 'warning', 'help'); - markdown = replaceMarkdownTag(markdown, 'HINT', 'tip', 'hint'); + markdown = replaceMarkdownTag(markdown, 'KNOWLEDGE', 'info', 'info'); + markdown = replaceMarkdownTag(markdown, 'ALERT', 'important', 'important'); + markdown = replaceMarkdownTag(markdown, 'NOTE', 'info', 'note'); + markdown = replaceMarkdownTag(markdown, 'HELP', 'warning', 'warning'); + markdown = replaceMarkdownTag(markdown, 'HINT', 'tip', 'tip'); // Replace all occurrences of the @lab.CloudPortalCredential(*).Username markdown tag with a placeholder markdown = markdown.replace(/@lab\.CloudPortalCredential\([^)]+\)\.Username/gi, ''); diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index e040e0b8..048473f8 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -325,12 +325,12 @@ Using the Azure Policy extension for Visual Studio Code, you can easily create a - Open VS Code and make sure the Azure Policy extension is installed. If not, you can install it from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) - To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account -> [!HELP] +> [!TIP] > If you see multiple sign-in options, choose the one that has **azure-account.login** next to it. - Press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster -> [!HELP] +> [!TIP] > If you see multiple subscriptions, choose the one that has **azure-account.selectSubscriptions** next to it. - In VS Code sidebar, click the **Azure Policy** icon and you should see the subscription resources and policies panes being loaded @@ -353,7 +353,7 @@ curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/ - Select the **Base64Encoded** option - This will generate a new Azure Policy definition in the JSON format and encode the ConstraintTemplate in Base64 format -> [!NOTE] +> [!HINT] > The template info can also refer to a URL where the ConstraintTemplate is hosted. This is useful when you want to reference a ConstraintTemplate that is hosted in a public repository. - Fill in details where you see the text **/_ EDIT HERE _/** @@ -1076,11 +1076,15 @@ Let's face it. Applications will fail. Being able to quickly identify and mitiga [Microsoft Copilot for Azure](https://learn.microsoft.com/azure/copilot/overview) is a tool built into the Azure portal that enables you to diagnose and troubleshoot issues. It is not only limited to AKS but you can use it to help troubleshoot any issues with your Azure resources. The Azure Copilot provides a guided experience to lead you through the troubleshooting process and helps you understand concepts by offering explanations, suggestions, and resource URLs to learn more. -Use the Azure Copilot to help you find and fix issues with your workloads. +#### Find the issue In the Azure Portal, navigate to your AKS cluster and click on the Copilot button found at the top of the page. A panel will open on the right side of the screen and you will be presented with some suggested prompts. -Ask the Copilot `how's the health of my pods?` +Ask the Copilot: + +```text +How is the health of my pods? +``` You should be presented with a kubectl command that you can run to get the status of your pods. Click the **Yes** button to execute the command from the Run command page. @@ -1090,7 +1094,15 @@ Scroll through the output and see if you can spot the issue. There is a problem with the ai-service pod. -Ask the Copilot `I see the the ai-service pod in the dev namespace with crashloopbackoff status. What does that mean?` The Copilot should provide you with an explanation of what the crashloopbackoff status means and how to troubleshoot it. +#### Find the solution + +Ask the Copilot: + +```text +I see the the ai-service pod in the dev namespace with crashloopbackoff status. What does that mean? +``` + +The Copilot should provide you with an explanation of what the crashloopbackoff status means and how to troubleshoot it. You were not specific with the pod name so the Copilot gave you a general command to run, so re-prompt the Copilot to restate the commands by giving it the exact pod name `The exact pod name is ai-service-xxxxx. What commands should I run again?` @@ -1104,7 +1116,16 @@ It should have determined there is no Azure OpenAI service running. You go back to your dev team and they tell you that they will need an Azure OpenAI service with the GPT-3.5 Turbo model to run the ai-service pod. -Ask the Copilot `How do I create an Azure OpenAI service with the GPT-3.5 Turbo model?` +#### Implement the solution + +Ask the Copilot: + +```text +How do I create an Azure OpenAI service with the GPT-3.5 Turbo model? +``` + +> [!ALERT] +> The Azure Copilot won't always provide you with the exact commands to run but it will provide you with the necessary information to get you started. The instructions should be very close to what you need. You can either follow the instructions and/or reference the docs it replies with or you can run the following commands to quickly create an Azure OpenAI service account. @@ -1132,7 +1153,7 @@ az cognitiveservices account deployment create \ --sku-name "Standard" ``` -> [!IMPORTANT] +> [!ALERT] > The model version above may not be available in your region. You can the model availability [here](https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=python-secure#standard-deployment-model-availability) The dev team also tells you that the ai-service pod requires a ConfigMap named **ai-service-configs** with the following environment variables to connect to the Azure OpenAI service. @@ -1145,7 +1166,9 @@ Additionally the ai-service pod requires a Secret named **ai-service-secrets** w - **OPENAI_API_KEY** set to the API key of the Azure OpenAI service -Can you complete the rest of the steps to get the ai-service pod running? +#### Challenge + +Based on what you have learned so far in this lab, can you complete the rest of the steps to get the ai-service pod running? > [!HINT] > You can put the environment variables in the Azure App Configuration store and sync them to the Kubernetes ConfigMap. You can then update the ai-service deployment to use the ConfigMap for the environment variables. @@ -1173,4 +1196,4 @@ Congratulations! You have completed the workshop on operating AKS Automatic. You This lab is also available at https://aka.ms/aks/labs along with others, so feel free to check them out. -If you have any feedback or questions, please feel free to reach out to us at https://aka.ms/aks/feedback. +If you have any feedback or questions on AKS in general, please feel free to reach out to us at https://aka.ms/aks/feedback. From e065220aca8298d6152f164c6d3d26c29123123d Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Tue, 29 Oct 2024 19:09:20 -0700 Subject: [PATCH 21/27] adding app config provider extension to cluster --- .../assets/setup/bicep/aks.bicep | 13 + .../assets/setup/bicep/aks.json | 492 ++++++++++++++++++ .../assets/setup/scripts/postbuild.ps1 | 10 - workshops/operating-aks-automatic/workshop.md | 22 +- 4 files changed, 508 insertions(+), 29 deletions(-) create mode 100644 workshops/operating-aks-automatic/assets/setup/bicep/aks.json diff --git a/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep index babdd1b1..6d3a2762 100644 --- a/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep +++ b/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep @@ -96,6 +96,19 @@ resource aksCluster 'Microsoft.ContainerService/managedClusters@2024-03-02-previ } } +resource appConfigProvider 'Microsoft.KubernetesConfiguration/extensions@2022-11-01' = { + scope: aksCluster + name: 'appconfigurationkubernetesprovider' + identity: { + type: 'SystemAssigned' + } + properties: { + autoUpgradeMinorVersion: false + extensionType: 'Microsoft.AppConfiguration' + version: '2.0.0' + } +} + resource containerRegistry 'Microsoft.ContainerRegistry/registries@2023-11-01-preview' = { name: 'mycontainerregistry${take(uniqueString(nameSuffix), 4)}' location: resourceGroup().location diff --git a/workshops/operating-aks-automatic/assets/setup/bicep/aks.json b/workshops/operating-aks-automatic/assets/setup/bicep/aks.json new file mode 100644 index 00000000..b7c29620 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/bicep/aks.json @@ -0,0 +1,492 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "metadata": { + "_generator": { + "name": "bicep", + "version": "0.30.23.60470", + "templateHash": "15495512437515460279" + } + }, + "parameters": { + "nameSuffix": { + "type": "string", + "metadata": { + "description": "The basename of the resource." + } + } + }, + "resources": [ + { + "type": "Microsoft.OperationalInsights/workspaces", + "apiVersion": "2022-10-01", + "name": "[format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4))]", + "location": "[resourceGroup().location]", + "identity": { + "type": "SystemAssigned" + }, + "properties": { + "sku": { + "name": "PerGB2018" + } + } + }, + { + "type": "Microsoft.Monitor/accounts", + "apiVersion": "2023-04-03", + "name": "[format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4))]", + "location": "[resourceGroup().location]" + }, + { + "type": "Microsoft.Dashboard/grafana", + "apiVersion": "2023-09-01", + "name": "[format('mygrafana{0}', take(uniqueString(parameters('nameSuffix')), 4))]", + "location": "[resourceGroup().location]", + "sku": { + "name": "Standard" + }, + "identity": { + "type": "SystemAssigned" + }, + "properties": { + "grafanaIntegrations": { + "azureMonitorWorkspaceIntegrations": [ + { + "azureMonitorWorkspaceResourceId": "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + } + ] + } + }, + "dependsOn": [ + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.ContainerService/managedClusters", + "apiVersion": "2024-03-02-preview", + "name": "myakscluster", + "location": "[resourceGroup().location]", + "sku": { + "name": "Automatic", + "tier": "Standard" + }, + "properties": { + "agentPoolProfiles": [ + { + "name": "systempool", + "count": 3, + "osType": "Linux", + "mode": "System" + } + ], + "addonProfiles": { + "omsagent": { + "enabled": true, + "config": { + "logAnalyticsWorkspaceResourceID": "[resourceId('Microsoft.OperationalInsights/workspaces', format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "useAADAuth": "true" + } + } + }, + "azureMonitorProfile": { + "metrics": { + "enabled": true, + "kubeStateMetrics": { + "metricLabelsAllowlist": "*", + "metricAnnotationsAllowList": "*" + } + }, + "containerInsights": { + "enabled": true, + "logAnalyticsWorkspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + } + } + }, + "identity": { + "type": "SystemAssigned" + }, + "dependsOn": [ + "[resourceId('Microsoft.OperationalInsights/workspaces', format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.KubernetesConfiguration/extensions", + "apiVersion": "2022-11-01", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', 'myakscluster')]", + "name": "appconfigurationkubernetesprovider", + "identity": { + "type": "SystemAssigned" + }, + "properties": { + "autoUpgradeMinorVersion": false, + "extensionType": "Microsoft.AppConfiguration", + "version": "2.0.0" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]" + ] + }, + { + "type": "Microsoft.ContainerRegistry/registries", + "apiVersion": "2023-11-01-preview", + "name": "[format('mycontainerregistry{0}', take(uniqueString(parameters('nameSuffix')), 4))]", + "location": "[resourceGroup().location]", + "sku": { + "name": "Standard" + }, + "identity": { + "type": "SystemAssigned" + } + }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[format('Microsoft.ContainerRegistry/registries/{0}', format('mycontainerregistry{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "name": "[guid(subscription().id, resourceGroup().id, format('mycontainerregistry{0}', take(uniqueString(parameters('nameSuffix')), 4)), 'AcrPullRole')]", + "properties": { + "principalId": "[reference(resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster'), '2024-03-02-preview').identityProfile.kubeletIdentity.objectId]", + "principalType": "ServicePrincipal", + "roleDefinitionId": "[resourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d')]" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.ContainerRegistry/registries', format('mycontainerregistry{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionEndpoints", + "apiVersion": "2022-06-01", + "name": "[format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster')]", + "location": "[resourceGroup().location]", + "kind": "Linux", + "properties": { + "description": "Data Collection Endpoint for Prometheus" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionRuleAssociations", + "apiVersion": "2022-06-01", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', 'myakscluster')]", + "name": "configurationAccessEndpoint", + "properties": { + "dataCollectionEndpointId": "[resourceId('Microsoft.Insights/dataCollectionEndpoints', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Insights/dataCollectionEndpoints', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionRules", + "apiVersion": "2022-06-01", + "name": "[format('MSCI-{0}-{1}', resourceGroup().location, 'myakscluster')]", + "location": "[resourceGroup().location]", + "kind": "Linux", + "properties": { + "dataSources": { + "syslog": [], + "extensions": [ + { + "streams": [ + "Microsoft-ContainerInsights-Group-Default" + ], + "extensionName": "ContainerInsights", + "extensionSettings": { + "dataCollectionSettings": { + "interval": "1m", + "namespaceFilteringMode": "Off", + "enableContainerLogV2": true + } + }, + "name": "ContainerInsightsExtension" + } + ] + }, + "destinations": { + "logAnalytics": [ + { + "workspaceResourceId": "[resourceId('Microsoft.OperationalInsights/workspaces', format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "name": "ciworkspace" + } + ] + }, + "dataFlows": [ + { + "streams": [ + "Microsoft-ContainerInsights-Group-Default" + ], + "destinations": [ + "ciworkspace" + ] + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.OperationalInsights/workspaces', format('mylogs{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionRuleAssociations", + "apiVersion": "2022-06-01", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', 'myakscluster')]", + "name": "[format('MSCI-{0}-{1}', resourceGroup().location, 'myakscluster')]", + "properties": { + "dataCollectionRuleId": "[resourceId('Microsoft.Insights/dataCollectionRules', format('MSCI-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Insights/dataCollectionRules', format('MSCI-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionRules", + "apiVersion": "2022-06-01", + "name": "[format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster')]", + "location": "[resourceGroup().location]", + "kind": "Linux", + "properties": { + "dataCollectionEndpointId": "[resourceId('Microsoft.Insights/dataCollectionEndpoints', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]", + "dataSources": { + "prometheusForwarder": [ + { + "streams": [ + "Microsoft-PrometheusMetrics" + ], + "name": "PrometheusDataSource" + } + ] + }, + "destinations": { + "monitoringAccounts": [ + { + "accountResourceId": "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "name": "MonitoringAccount1" + } + ] + }, + "dataFlows": [ + { + "streams": [ + "Microsoft-PrometheusMetrics" + ], + "destinations": [ + "MonitoringAccount1" + ] + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Insights/dataCollectionEndpoints', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]", + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.Insights/dataCollectionRuleAssociations", + "apiVersion": "2022-06-01", + "scope": "[format('Microsoft.ContainerService/managedClusters/{0}', 'myakscluster')]", + "name": "[format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster')]", + "properties": { + "dataCollectionRuleId": "[resourceId('Microsoft.Insights/dataCollectionRules', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Insights/dataCollectionRules', format('MSProm-{0}-{1}', resourceGroup().location, 'myakscluster'))]" + ] + }, + { + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-09-01-preview", + "name": "[format('KubernetesRecordingRulesRuleGroup - {0}', 'myakscluster')]", + "location": "[resourceGroup().location]", + "properties": { + "enabled": true, + "description": "Kubernetes Recording Rules RuleGroup", + "clusterName": "myakscluster", + "scopes": [ + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]" + ], + "interval": "PT1M", + "rules": [ + { + "record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate", + "expression": "sum by (cluster, namespace, pod, container) (irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))" + }, + { + "record": "node_namespace_pod_container:container_memory_working_set_bytes", + "expression": "container_memory_working_set_bytes{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))" + }, + { + "record": "node_namespace_pod_container:container_memory_rss", + "expression": "container_memory_rss{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))" + }, + { + "record": "node_namespace_pod_container:container_memory_cache", + "expression": "container_memory_cache{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))" + }, + { + "record": "node_namespace_pod_container:container_memory_swap", + "expression": "container_memory_swap{job=\"cadvisor\", image!=\"\"}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=\"\"}))" + }, + { + "record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests", + "expression": "kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))" + }, + { + "record": "namespace_memory:kube_pod_container_resource_requests:sum", + "expression": "sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{resource=\"memory\",job=\"kube-state-metrics\"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))" + }, + { + "record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests", + "expression": "kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))" + }, + { + "record": "namespace_cpu:kube_pod_container_resource_requests:sum", + "expression": "sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_requests{resource=\"cpu\",job=\"kube-state-metrics\"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))" + }, + { + "record": "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits", + "expression": "kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))" + }, + { + "record": "namespace_memory:kube_pod_container_resource_limits:sum", + "expression": "sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{resource=\"memory\",job=\"kube-state-metrics\"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))" + }, + { + "record": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits", + "expression": "kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1) )" + }, + { + "record": "namespace_cpu:kube_pod_container_resource_limits:sum", + "expression": "sum by (namespace, cluster) (sum by (namespace, pod, cluster) (max by (namespace, pod, container, cluster) (kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"}) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)))" + }, + { + "record": "namespace_workload_pod:kube_pod_owner:relabel", + "expression": "max by (cluster, namespace, workload, pod) ((label_replace(label_replace(kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"}, \"replicaset\", \"$1\", \"owner_name\", \"(.*)\") * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (1, max by (replicaset, namespace, owner_name) (kube_replicaset_owner{job=\"kube-state-metrics\"})), \"workload\", \"$1\", \"owner_name\", \"(.*)\" )))", + "labels": { + "workload_type": "deployment" + } + }, + { + "record": "namespace_workload_pod:kube_pod_owner:relabel", + "expression": "max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\")))", + "labels": { + "workload_type": "daemonset" + } + }, + { + "record": "namespace_workload_pod:kube_pod_owner:relabel", + "expression": "max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\")))", + "labels": { + "workload_type": "statefulset" + } + }, + { + "record": "namespace_workload_pod:kube_pod_owner:relabel", + "expression": "max by (cluster, namespace, workload, pod) ((label_replace(kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Job\"}, \"workload\", \"$1\", \"owner_name\", \"(.*)\")))", + "labels": { + "workload_type": "job" + } + }, + { + "record": ":node_memory_MemAvailable_bytes:sum", + "expression": "sum(node_memory_MemAvailable_bytes{job=\"node\"} or (node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"})) by (cluster)" + }, + { + "record": "cluster:node_cpu:ratio_rate5m", + "expression": "sum(rate(node_cpu_seconds_total{job=\"node\",mode!=\"idle\",mode!=\"iowait\",mode!=\"steal\"}[5m])) by (cluster) /count(sum(node_cpu_seconds_total{job=\"node\"}) by (cluster, instance, cpu)) by (cluster)" + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + }, + { + "type": "Microsoft.AlertsManagement/prometheusRuleGroups", + "apiVersion": "2023-09-01-preview", + "name": "[format('NodeRecordingRulesRuleGroup - {0}', 'myakscluster')]", + "location": "[resourceGroup().location]", + "properties": { + "enabled": true, + "description": "Node Recording Rules RuleGroup", + "clusterName": "myakscluster", + "scopes": [ + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]", + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]" + ], + "interval": "PT1M", + "rules": [ + { + "record": "instance:node_num_cpu:sum", + "expression": "count without (cpu, mode) (node_cpu_seconds_total{job=\"node\",mode=\"idle\"})" + }, + { + "record": "instance:node_cpu_utilisation:rate5m", + "expression": "1 - avg without (cpu) (sum without (mode) (rate(node_cpu_seconds_total{job=\"node\", mode=~\"idle|iowait|steal\"}[5m])))" + }, + { + "record": "instance:node_load1_per_cpu:ratio", + "expression": "(node_load1{job=\"node\"}/ instance:node_num_cpu:sum{job=\"node\"})" + }, + { + "record": "instance:node_memory_utilisation:ratio", + "expression": "1 - ((node_memory_MemAvailable_bytes{job=\"node\"} or (node_memory_Buffers_bytes{job=\"node\"} + node_memory_Cached_bytes{job=\"node\"} + node_memory_MemFree_bytes{job=\"node\"} + node_memory_Slab_bytes{job=\"node\"})) / node_memory_MemTotal_bytes{job=\"node\"})" + }, + { + "record": "instance:node_vmstat_pgmajfault:rate5m", + "expression": "rate(node_vmstat_pgmajfault{job=\"node\"}[5m])" + }, + { + "record": "instance_device:node_disk_io_time_seconds:rate5m", + "expression": "rate(node_disk_io_time_seconds_total{job=\"node\", device!=\"\"}[5m])" + }, + { + "record": "instance_device:node_disk_io_time_weighted_seconds:rate5m", + "expression": "rate(node_disk_io_time_weighted_seconds_total{job=\"node\", device!=\"\"}[5m])" + }, + { + "record": "instance:node_network_receive_bytes_excluding_lo:rate5m", + "expression": "sum without (device) (rate(node_network_receive_bytes_total{job=\"node\", device!=\"lo\"}[5m]))" + }, + { + "record": "instance:node_network_transmit_bytes_excluding_lo:rate5m", + "expression": "sum without (device) (rate(node_network_transmit_bytes_total{job=\"node\", device!=\"lo\"}[5m]))" + }, + { + "record": "instance:node_network_receive_drop_excluding_lo:rate5m", + "expression": "sum without (device) (rate(node_network_receive_drop_total{job=\"node\", device!=\"lo\"}[5m]))" + }, + { + "record": "instance:node_network_transmit_drop_excluding_lo:rate5m", + "expression": "sum without (device) (rate(node_network_transmit_drop_total{job=\"node\", device!=\"lo\"}[5m]))" + } + ] + }, + "dependsOn": [ + "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]", + "[resourceId('Microsoft.Monitor/accounts', format('myprometheus{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + ] + } + ], + "outputs": { + "grafanaId": { + "type": "string", + "value": "[resourceId('Microsoft.Dashboard/grafana', format('mygrafana{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + }, + "aksId": { + "type": "string", + "value": "[resourceId('Microsoft.ContainerService/managedClusters', 'myakscluster')]" + }, + "containerRegistryLoginServer": { + "type": "string", + "value": "[format('{0}.azurecr.io', format('mycontainerregistry{0}', take(uniqueString(parameters('nameSuffix')), 4)))]" + } + } +} \ No newline at end of file diff --git a/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 index 20d5aed1..22e8107c 100644 --- a/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 +++ b/workshops/operating-aks-automatic/assets/setup/scripts/postbuild.ps1 @@ -9,13 +9,3 @@ Register-AzResourceProvider -ProviderNamespace "Microsoft.KubernetesConfiguratio Register-AzResourceProvider -ProviderNamespace "Microsoft.CognitiveServices" Register-AzProviderFeature -FeatureName "EnableImageIntegrityPreview" -ProviderNamespace "Microsoft.ContainerService" Register-AzProviderFeature -FeatureName "AKS-AzurePolicyExternalData" -ProviderNamespace "Microsoft.ContainerService" - -# deploy app configuration extension -az k8s-extension create ` - --cluster-type managedClusters ` - --cluster-name myakscluster ` - --resource-group myresourcegroup ` - --name appconfigurationkubernetesprovider ` - --extension-type Microsoft.AppConfiguration ` - --auto-upgrade false ` - --version 2.0.0 diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 048473f8..e477569e 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -43,7 +43,7 @@ The lab environment has been pre-configured for you with the following Azure res - [Azure Managed Grafana](https://learn.microsoft.com/azure/managed-grafana/overview) > [!NOTE] -> The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep) +> The Bicep template used to deploy the lab environment can be found [here](https://raw.githubusercontent.com/azure-samples/aks-labs/refs/heads/ignite/workshops/operating-aks-automatic/assets/setup/bicep/aks.bicep). Just note that if you deploy this template, you will need to assign yourself the "Azure Kubernetes Service RBAC Cluster Admin" role to the AKS cluster and the "Grafana Admin" role to the Azure Managed Grafana resources. You will also need the following tools: @@ -503,25 +503,9 @@ az role assignment create \ ### Azure App Configuration Provider for Kubernetes -AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. +AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. To save you some time in this lab, the extension has been pre-installed in the AKS Automatic cluster for you. -Run the following command to install the Azure App Configuration Provider for Kubernetes extension. - -```bash -az k8s-extension create \ ---cluster-type managedClusters \ ---cluster-name myakscluster \ ---resource-group myresourcegroup \ ---name appconfigurationkubernetesprovider \ ---extension-type Microsoft.AppConfiguration \ ---auto-upgrade false \ ---version 2.0.0 -``` - -> [!ALERT] -> This can take up to 5 minutes to complete. - -After the extension has been created, you can verify that the pods are running. +Run the following command to verify that the Azure app config provider pods are running. ```bash kubectl get pods -n azappconfig-system From a36ba8a73c37071d5cc23f9e05adca7bca9855f2 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Wed, 30 Oct 2024 10:53:39 -0700 Subject: [PATCH 22/27] adding teardown script --- .../operating-aks-automatic/assets/setup/scripts/torndown.ps1 | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 workshops/operating-aks-automatic/assets/setup/scripts/torndown.ps1 diff --git a/workshops/operating-aks-automatic/assets/setup/scripts/torndown.ps1 b/workshops/operating-aks-automatic/assets/setup/scripts/torndown.ps1 new file mode 100644 index 00000000..3a4afd90 --- /dev/null +++ b/workshops/operating-aks-automatic/assets/setup/scripts/torndown.ps1 @@ -0,0 +1,4 @@ +$deletedAccounts = Get-AzResource -ResourceId /subscriptions/@lab.CloudSubscription.Id/providers/Microsoft.CognitiveServices/deletedAccounts -ApiVersion 2021-04-30 +foreach ($deletedAccount in $deletedAccounts) { + Remove-AzResource -Confirm:$false -ResourceId $deletedAccount.ResourceId -ApiVersion 2021-04-30 -Force +} From f9d5919e44f99784ee1780f6f7281719e6f9228a Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Wed, 30 Oct 2024 16:41:26 -0700 Subject: [PATCH 23/27] final edits --- workshops/operating-aks-automatic/workshop.md | 158 ++++++++++++------ 1 file changed, 104 insertions(+), 54 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index e477569e..773ef992 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -89,7 +89,7 @@ Being able to manage user access to the AKS cluster and enforce policies is crit With [Azure RBAC for Kubernetes authorization](https://learn.microsoft.com/azure/aks/manage-azure-rbac?tabs=azure-cli) enabled on the AKS Automatic cluster, granting users access to the cluster is as simple as assigning roles to users, groups, and/or service principals. Users will run the normal **az aks get-credentials** command to download the [kubeconfig file](https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/), but when users attempt to execute commands against the Kubernetes API Server, they will be instructed to log in with their Microsoft Entra ID credentials and their assigned roles will determine what they can do within the cluster. -To grant permissions to the AKS cluster, you will need to assign an Azure role. The following built-in roles are available for user assignment. +To grant permissions to the AKS cluster, you will need to assign an Azure role to a user. The following built-in roles are available for user assignment. - [Azure Kubernetes Service RBAC Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-admin) - [Azure Kubernetes Service RBAC Cluster Admin](https://learn.microsoft.com/azure/role-based-access-control/built-in-roles/containers#azure-kubernetes-service-rbac-cluster-admin) @@ -136,7 +136,7 @@ DEV_USER_PRINCIPAL_ID=$(az ad user show \ --output tsv) ``` -Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. Scoping the permissions to the namespace ensures that the developer can only access the resources within the namespace and not the entire cluster. +Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. This ensures that the developer can only access the resources within the namespace and not the entire cluster. ```bash az role assignment create \ @@ -147,7 +147,7 @@ az role assignment create \ When you logged in to access the Kubernetes API via the kubectl command, you were prompted to log in with your Microsoft Entra ID. The kubelogin plugin stores the OIDC token in the **~/.kube/cache/kubelogin** directory. In order to quickly test the permissions of a different user, we can simply move the JSON file to a different directory. -Run the following command to move the cached credentials to its parent directory. +Run the following command to temporarily move the cached credentials to its parent directory. ```bash mv ~/.kube/cache/kubelogin/*.json ~/.kube/cache/ @@ -211,7 +211,7 @@ mv ~/.kube/cache/*.json ~/.kube/cache/kubelogin/ ### Deployment Safeguards -Before you unleash developers to deploy applications in the AKS cluster, you likely want to ensure that they are following best practices. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature that helps enforce best practices and policies for your AKS clusters. It is implemented as an AKS add-on using [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview) and enabled by default on AKS Automatic clusters. Deployment Safeguards is basically a group of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) which is assigned to your cluster to monitor resources running within it are secure, compliant, and follows best practices. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. +Before you unleash developers to deploy applications in the AKS cluster, you likely want to ensure that they are following best practices. [Deployment Safeguards](https://learn.microsoft.com/azure/aks/deployment-safeguards) is a feature that helps enforce best practices and policies for your AKS clusters. It is implemented as an AKS add-on using [Azure Policy](https://learn.microsoft.com/azure/governance/policy/overview) and enabled by default on AKS Automatic clusters. Deployment Safeguards is basically a group of policies known as an [initiative](https://learn.microsoft.com/azure/governance/policy/concepts/initiative-definition-structure) which is assigned to your cluster to ensure resources running within it are secure, compliant, and follows best practices. The compliance state of the cluster resources are reported back to Azure Policy and can be viewed in the Azure Portal. The group of policies that are included with Deployment Safeguards are documented [here](https://learn.microsoft.com/azure/aks/deployment-safeguards#deployment-safeguards-policies). Read carefully through each policy description, the targeted resource, and the mutation that can be applied when the assignment is set to **Enforcement** mode. AKS Automatic defaults to **Warning** mode which simply displays warnings in the terminal as a gentle reminder to implement best practices. You may have seen Deployment Safeguards at work when you deployed the demo application using Helm. When Deployment Safeguards is in Enforcement mode, polices will be strongly enforced by either mutating deployments to comply with the policies or denying deployments that violate policy. Therefore, it is important to understand the impact of each policy before enabling Enforcement mode. @@ -304,9 +304,7 @@ You can also view the ConstraintTemplates that are available in the cluster. Run kubectl get constrainttemplates ``` -Although Gatekeepr is running in the cluster, it is worth noting that this Gatekeeper cannot be used outside of Azure Policy. That is, if you want to implement a well-known or commonly used ConstraintTemplates, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. - -From there **azure-policy-\*** pods running in the **kube-system** namespace listens for Azure Policy assignments, translates them to ConstraintTemplates, deploys the custom Constraints (cluster policy), and reports the cluster policy results back up to Azure Policy. +Although Gatekeepr is running in the cluster, it is worth noting that this Gatekeeper cannot be used outside of Azure Policy. That is, if you want to implement a well-known or commonly used ConstraintTemplates, you'll need to translate it to an Azure Policy definition and assign it to the AKS cluster. From there **azure-policy-\*** pods running in the **kube-system** namespace listens for Azure Policy assignments, translates them to ConstraintTemplates, deploys the custom Constraints (cluster policy), and reports the cluster policy results back up to Azure Policy. Let's illustrate this by attempting to deploy a commonly used ConstraintTemplate that limits container images to only those from approved container registries. Run the following command to attempt to deploy the ConstraintTemplate. @@ -346,11 +344,11 @@ curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/ > The constraint template includes Rego code on line 17 that enforces that all containers in the AKS cluster are sourced from approved container registries. The approved container registries can are defined in the **registry** parameter and this is where you can specify the container registry URL when implementing the ConstraintTemplate. - To convert this template to Azure Policy JSON, press **Ctrl+Shift+P** then type **Azure Policy for Kubernetes: Create Policy Definition from a Constraint Template** +- Select the **Base64Encoded** option > [!HELP] -> The extension activation process can take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file to create the policy definition. +> The extension activation process can take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file in the [Deploy a custom policy definition](#deploy-a-custom-policy-definition) section below. -- Select the **Base64Encoded** option - This will generate a new Azure Policy definition in the JSON format and encode the ConstraintTemplate in Base64 format > [!HINT] @@ -383,7 +381,7 @@ curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/Azur - **Policy rule**: Clear the existing content and paste the JSON you copied from the **constrainttemplate-as-policy.json** file - Click **Save** at the bottom of the page -#### Deploy a custom policy definition +#### Assign a custom policy to the AKS cluster With the custom policy definition created, you can now assign it to the AKS cluster. @@ -394,11 +392,14 @@ With the custom policy definition created, you can now assign it to the AKS clus - In the **Parameters** tab, enter the following details: - Uncheck the **Only show parameters that need input or review** checkbox - **Effect**: Select **deny** from the dropdown - - **Namespace exclusions**: Replace the existing content with `["kube-system","kube-node-lease","kube-public", "gatekeeper-system","app-routing-system","azappconfig-system","sc-system"]` - - **Image registry**: Enter your container registry URL, for example `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` + - **Namespace exclusions**: Replace the existing content with `["kube-system","kube-node-lease","kube-public","gatekeeper-system","app-routing-system","azappconfig-system","sc-system","aks-command"]` + - **Image registry**: Enter your container registry URL as `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` - Click **Review + create** to review the policy assignment - Click **Create** to assign the policy definition to the AKS cluster +> [!ALERT] +> This policy assignment uses **Namespace exclusions** to exclude system namespaces from the policy enforcement. This is important because you may deny the deployment of certain pods if the namespaces are not "whitelisted" in the policy assignment. The alternative here is to only apply the policy to a specific namespace by using the **Namespace inclusions** parameter instead and specifying the namespace you want to enforce the policy on. + Awesome! You have successfully enforced custom policies in the AKS cluster. Once the policy assignment has taken effect, you can try deploying a pod with an image from an unapproved container registry to see the policy in action. However, this policy assignment can take up to 15 minutes to take effect, so let's move on to the next section. For more information on how to create a policy definition from a ConstraintTemplate or MutationTemplate, refer to the following documentation links: @@ -419,6 +420,10 @@ We can leverage these two services to store our application configurations and s ### Provision Azure resources +In order to complete this section, you will need a few Azure resources, so use the Azure CLI to create the following resources. + +#### Azure Key Vault setup + Run the following command to create an Azure Key Vault. ```bash @@ -447,6 +452,8 @@ az keyvault secret set \ --value MySecretValue1 ``` +#### Azure App Configuration setup + Run the following command to create an Azure App Configuration store. ```bash @@ -503,7 +510,7 @@ az role assignment create \ ### Azure App Configuration Provider for Kubernetes -AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps. This extension is not installed by default in AKS Automatic clusters, so you will need to install it manually. To save you some time in this lab, the extension has been pre-installed in the AKS Automatic cluster for you. +AKS offers an extension called the [Azure App Configuration Provider for Kubernetes](https://learn.microsoft.com/azure/aks/azure-app-configuration?tabs=cli) that allows you to sync configurations from Azure App Configuration to Kubernetes ConfigMaps and/or Kubernetes Secrets. This extension is not installed by default in AKS Automatic clusters, but in this lab environment the extension has been pre-installed in the AKS Automatic cluster for you. Run the following command to verify that the Azure app config provider pods are running. @@ -513,7 +520,7 @@ kubectl get pods -n azappconfig-system ### Passwordless authentication to Azure services -We also want to establish a passwordless connection between the AKS cluster and the Azure App Configuration store. We can do this by leveraging the [AKS Service Connector](https://learn.microsoft.com/azure/service-connector/how-to-use-service-connector-in-aks). The AKS Service Connector will take care of manual tasks like setting up the necessary Azure RBAC roles and federated credentials for authentication, creating the necessary Kubernetes Service Account, and creating any firewall rules needed to allow the AKS cluster to communicate with the Azure service. It makes it really simple to get your application pods connected to Azure services using [AKS Workload Identity](https://learn.microsoft.com/azure/aks/workload-identity-deploy-cluster). +The Azure App Config Provider pods will reach out to the Azure App Configuration store to retrieve key value pairs and/or secrets. The best practice is to establish a passwordless connection between the AKS cluster and the Azure App Configuration store and you can achieve this by using [AKS Workload Identity](https://learn.microsoft.com/azure/aks/workload-identity-deploy-cluster) and leveraging the [AKS Service Connector](https://learn.microsoft.com/azure/service-connector/how-to-use-service-connector-in-aks). The AKS Service Connector will take care of manual tasks like setting up the necessary Azure RBAC, creating a [federated credential](https://learn.microsoft.com/graph/api/resources/federatedidentitycredentials-overview?view=graph-rest-1.0), creating a Kubernetes [ServiceAccount](https://kubernetes.io/docs/concepts/security/service-accounts/), and creating any firewall rules needed to communicate with the Azure service.. Run the following command to create an AKS Service Connector to connect the AKS cluster to the Azure App Configuration store. @@ -524,23 +531,22 @@ az aks connection create appconfig \ --resource-group myresourcegroup \ --target-resource-group myresourcegroup \ --app-config $AC_NAME \ ---workload-identity $AC_ID \ ---client-type none +--workload-identity $AC_ID ``` > [!ALERT] > This can take up to 5 minutes to complete. -This command will create a service connector to allow pods in the **dev** namespace to connect to the Azure App Configuration store using the User-Assigned Managed Identity that was created earlier. The service connector will grant the User-Assigned Managed Identity the necessary permissions to access the Azure App Configuration store and configure a [federated credential](https://learn.microsoft.com/graph/api/resources/federatedidentitycredentials-overview?view=graph-rest-1.0) on the managed identity that will allow the Kubernetes [ServiceAccount](https://kubernetes.io/docs/concepts/security/service-accounts/) to authenticate via workload identity. This is a powerful feature that allows you to connect your application pods to Azure services without having to manage any credentials. +This command will create a service connector to allow pods in the **dev** namespace to connect to the Azure App Configuration store using the User-Assigned Managed Identity that was created earlier. The service connector will grant the User-Assigned Managed Identity the necessary permissions to access the Azure App Configuration store and configure a federated credential on the managed identity that will allow the ServiceAccount assigned to the pod to authenticate via workload identity. > [!TIP] -> The AKS Service Connector can also be used to connect your application pods to many other Azure services that support Microsoft Entra ID authentication. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/overview#what-services-are-supported-by-service-connector). +> The AKS Service Connector is a powerful feature that allows you to connect your application pods to Azure services without having to manage any credentials. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/overview#what-services-are-supported-by-service-connector). ### Config sync to Kubernetes -The Azure App Configuration Provider for Kubernetes extension also installed new Kubernetes [Custom Resource Definitions (CRDs)](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) which you can use to sync configurations from the Azure App Configuration store to Kubernetes ConfigMaps and optionally Kubernetes Secrets. +The Azure App Configuration Provider for Kubernetes extension also installed new Kubernetes [Custom Resource Definitions (CRDs)](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) which is used to sync configurations from the Azure App Configuration store to Kubernetes ConfigMaps and optionally Kubernetes Secrets. -We can now deploy a sync configuration manifest to sync the configurations from Azure App Configuration to Kubernetes ConfigMaps. But first we will need some values for the manifest. +Before you deploy the sync configuration manifest, you will need to collect the Azure App Configuration store's endpoint and the Kubernetes ServiceAccount name. Run the following command to get the Azure App Configuration store's endpoint. @@ -551,7 +557,7 @@ AC_ENDPOINT=$(az appconfig show \ --output tsv) ``` -As mentioned above, we will use Workload Identity to connect to the Azure App Configuration store in a passwordless manner. The AKS Automatic cluster is already configured with Workload Identity, and the AKS Service Connector created a Kubernetes ServiceAccount that you can use to authenticate to the Azure App Configuration store and ultimately the Azure Key Vault. +As mentioned above, we will use Workload Identity to connect to the Azure App Configuration store in a passwordless manner. Workload Identity is enabled by default in AKS Automatic clusters. Run the following command to get the Kubernetes ServiceAccount name. @@ -601,7 +607,7 @@ kubectl get cm -n dev myconfigmap -o jsonpath='{.data}' Also, check to see if the secret in the configuration store has been synced to the Kubernetes Secret. ```bash -kubectl get secret -n dev mysecret -ojsonpath='{.data.MySecret1}' | base64 -d +kubectl get secret -n dev mysecret -o jsonpath='{.data.MySecret1}' | base64 -d ``` > [!KNOWLEDGE] @@ -635,7 +641,7 @@ One key benefit of Kubernetes is its ability to scale workloads across a pool of ### Cluster autoscaling -With AKS Automatic, the [Node Autoprovision (NAP)](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default and will serve as the default cluster autoscaler. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. +With AKS Automatic, the [Node Autoprovision (NAP)](https://learn.microsoft.com/azure/aks/node-autoprovision?tabs=azure-cli) feature is enabled by default and acts as the cluster autoscaler. AKS Node Autoprovision is the Azure implementation of the [Karpenter project](https://karpenter.sh) which was developed by friends at AWS and [donated to the Cloud Native Computing Foundation (CNCF)](https://aws.amazon.com/blogs/containers/karpenter-graduates-to-beta/). In short, Karpenter is a Kubernetes controller that automates the provisioning, right-sizing, and termination of nodes in a Kubernetes cluster. > [!NOTE] > The term **Node Autoprovision (NAP)** may be used interchangeably with **Karpenter** in this lab. @@ -656,9 +662,9 @@ You can view the default NodePool by running the following command. kubectl get nodepools default -o yaml ``` -However, you may want to create additional NodePools with specific constraints if you have teams that need to deploy workloads that require specific VM attributes. +You may want to create additional NodePools with specific constraints if you have teams that need to deploy workloads that require specific compute requirements. -Let's create a new NodePool with specific constraints. Run the following command to create a new NodePool. +Run the following command to create a new NodePool for the dev team. ```bash kubectl apply -f - < [!HINT] -> Remember we created a new policy that only allows images from specified container registries. +> Remember we created a new policy that only allows images from specified container registries and container images that are not from the approved container registries will be denied. Run the following command to get the name of the Azure Container Registry. @@ -736,7 +744,7 @@ az acr import \ --image product-service:1.5.2 ``` -Run the following command to replace the existing product-service pod with ones that tolerates the taint. This will ensure that the pod is scheduled on the dev NodePool. +Run the following command to replace the existing product-service pod. ```bash kubectl replace --force -f - < [!KNOWLEDGE] > KEDA is the Kubernetes-based Event Driven Autoscaler. With KEDA, you can scale your workloads based on the number of events in a queue, the length of a stream, or any other custom metric. It won't be covered in this lab, but the KEDA add-on for AKS is enabled by default in AKS Automatic clusters and you can learn more about it [here](https://learn.microsoft.com/azure/aks/keda-about). @@ -862,11 +872,11 @@ The YAML spec for a PodDisruptionBudget is relatively easy to write and understa Follow these steps to create a PodDisruptionBudget for the product-service running in the dev namespace. -- Navigate to your AKS cluster in the Azure Portal. -- Under the **Kubernetes resources** section, click on **Workloads**, then click on the **+ Create** button to expand the dropdown. -- Click on the **Apply a YAML** button. Here you will be presented with a blank YAML editor. -- Put your cursor in the editor and press **Alt+I** to open the prompt dialog. -- In the textbox type the following text and click the **send** button. +- Navigate to your AKS cluster in the Azure Portal +- Under the **Kubernetes resources** section, click on **Workloads**, then click on the **+ Create** button to expand the dropdown +- Click on the **Apply a YAML** button. Here you will be presented with a blank YAML editor +- Put your cursor in the editor and press **Alt+I** to open the prompt dialog +- In the textbox type the following text and press the **Enter** key ```text create a pod disruption budget for the product-service running in the dev namespace to run at least 1 replica at all times ``` @@ -961,6 +971,8 @@ Let's take a look at the availability zones of the nodes in the dev NodePool. Ru kubectl get nodes -l karpenter.sh/nodepool=devpool -o custom-columns=NAME:'{.metadata.name}',OS:'{.status.nodeInfo.osImage}',SKU:'{.metadata.labels.karpenter\.azure\.com/sku-name}',ZONE:'{.metadata.labels.topology\.kubernetes\.io/zone}' ``` +In the output, you will see the nodes and their availability zones. The availability zones is denoted by a dash and a number following the region name. For example, **eastus-1**, **eastus-2**, and **eastus-3** are the availability zones in the **eastus** region. + Chances are that the nodes are already spread across different availability zones or you may see that all the nodes are in the same availability zone. You're really leaving it up to chance but to ensure that the pods are spread across different availability zones, you can set pod topology spread constraints. Run the following command to delete the product-service deployment. @@ -980,7 +992,7 @@ When you see that all the nodes in the dev NodePool are deleted, press **Ctrl+C* Run the following command to re-deploy the product-service deployment with pod topology spread constraints. ```bash -kubectl apply-f - < [!HINT] +> There is a problem with the ai-service pod. #### Find the solution Ask the Copilot: ```text -I see the the ai-service pod in the dev namespace with crashloopbackoff status. What does that mean? +I see the ai-service pod in the dev namespace with CrashLoopBackOff status. What does that mean? ``` -The Copilot should provide you with an explanation of what the crashloopbackoff status means and how to troubleshoot it. +The Copilot should provide you with an explanation of what the CrashLoopBackOff status means and how to troubleshoot it. -You were not specific with the pod name so the Copilot gave you a general command to run, so re-prompt the Copilot to restate the commands by giving it the exact pod name `The exact pod name is ai-service-xxxxx. What commands should I run again?` +You were not specific with the pod name so the Copilot gave you a general command to run, so re-prompt the Copilot to restate the commands by giving it the exact pod name `The exact pod name is ai-service-xxxxx. What commands should I run again?` (replace the xxxxx with the actual pod name). -Some of the commands may include a **Run** button that can enable the Azure Cloud Shell, don't use this as you'd need to re-authenticate from within the Cloud Shell. Instead, copy the **kubectl describe** pod command and run it in the Run command window to get more information about the pod. The **kubectl describe** command will provide you with more information about the pod including the events that led to the crashloopbackoff status. You might get a little more information about the issue if you look through the pod logs. The Copilot should have also provided you with a **kubectl logs** command to get the logs of the pod. Run that command to get the logs. +> [!HINT] +> Some of the commands may include a **Run** button that can enable the Azure Cloud Shell, don't use this as you'd need to re-authenticate from within the Cloud Shell. Instead, copy the **kubectl describe** pod command and run it in the Run command window to get more information about the pod. + +The **kubectl describe** command will provide you with more information about the pod including the events that led to the CrashLoopBackOff status. You might get a little more information about the issue if you look through the pod logs. + +The Copilot should have also provided you with a **kubectl logs** command to get the logs of the pod. + +Run that command to get the logs. You should see that the ai-service pod is failing because it is missing environment variables that are required to connect to Azure OpenAI. Do you have an Azure OpenAI service running? If you are not sure, you can ask the Copilot `Do I have an Azure OpenAI service running?` @@ -1140,31 +1162,59 @@ az cognitiveservices account deployment create \ > [!ALERT] > The model version above may not be available in your region. You can the model availability [here](https://learn.microsoft.com/azure/ai-services/openai/concepts/models?tabs=python-secure#standard-deployment-model-availability) -The dev team also tells you that the ai-service pod requires a ConfigMap named **ai-service-configs** with the following environment variables to connect to the Azure OpenAI service. +The dev team also tells you that the ai-service pod uses a ConfigMap named **ai-service-configs** with the following environment variables to connect to the Azure OpenAI service. - **AZURE_OPENAI_DEPLOYMENT_NAME** set to "gpt-35-turbo" - **AZURE_OPENAI_ENDPOINT** set to the endpoint of the Azure OpenAI service - **USE_AZURE_OPENAI** set to "True" -Additionally the ai-service pod requires a Secret named **ai-service-secrets** with the following variable to authenticate to the Azure OpenAI service. +Run the following command to delete the existing ConfigMap. + +```bash +kubectl delete configmap ai-service-configs -n dev +``` + +Run the following command to create a new ConfigMap with the Azure OpenAI service endpoint. + +```bash +kubectl create configmap ai-service-configs -n dev --from-literal=AZURE_OPENAI_DEPLOYMENT_NAME=gpt-35-turbo --from-literal=AZURE_OPENAI_ENDPOINT=$(az cognitiveservices account show --name $AI_NAME --resource-group myresourcegroup --query properties.endpoint -o tsv) --from-literal=USE_AZURE_OPENAI=True +``` + +Additionally the ai-service pod uses a Secret named **ai-service-secrets** with the following variable to authenticate to the Azure OpenAI service. - **OPENAI_API_KEY** set to the API key of the Azure OpenAI service -#### Challenge +Run the following command to delete the existing Secret. -Based on what you have learned so far in this lab, can you complete the rest of the steps to get the ai-service pod running? +```bash +kubectl delete secret ai-service-secrets -n dev +``` -> [!HINT] -> You can put the environment variables in the Azure App Configuration store and sync them to the Kubernetes ConfigMap. You can then update the ai-service deployment to use the ConfigMap for the environment variables. +Run the following command to create a new Secret with the Azure OpenAI service API key. + +```bash +kubectl create secret generic ai-service-secrets -n dev --from-literal=OPENAI_API_KEY=$(az cognitiveservices account keys list --name $AI_NAME --resource-group myresourcegroup --query key1 -o tsv) +``` + +Finally, run the following command to re-deploy the ai-service pod. + +```bash +kubectl rollout restart deployment ai-service -n dev +``` + +You should see the ai-service pod status change from CrashLoopBackOff status to Running after a few minutes. + +#### Challenges -How can you go about updating this to use passwordless authentication with AKS Workload Identity instead? +1. Based on what you have learned so far in this lab, can you leverage Azure App Configuration to store the environment variables for the ai-service pod and sync them to the Kubernetes ConfigMap and Secret? +2. How can you go about updating this to use passwordless authentication with AKS Workload Identity instead? > [!HINT] > A complete walkthrough of the solution can be found [here](https://learn.microsoft.com/azure/aks/open-ai-secure-access-quickstart) ### Troubleshooting with kubectl -The Azure Copilot gave you some pretty good suggestions to start troubleshooting with kubectl. The **kubectl describe** command is a great way to get more information about a pod. You can also use the **kubectl logs** command to get the logs of a pod. One thing to note about using the **kubectl logs** command is that it only works for pods that are running. If the pod is in a crashloopbackoff status, you may not be able to get the logs of the pod that failed. In this case you can use the **--previous** flag to get the logs of the previous container that failed. +The Azure Copilot gave you some pretty good suggestions to start troubleshooting with kubectl. The **kubectl describe** command is a great way to get more information about a pod. You can also use the **kubectl logs** command to get the logs of a pod. One thing to note about using the **kubectl logs** command is that it only works for pods that are running. If the pod is in a CrashLoopBackOff status, you may not be able to get the logs of the pod that failed. In this case you can use the **--previous** flag to get the logs of the previous container that failed. Finally, be sure to checkout the [Troubleshooting Applications](https://kubernetes.io/docs/tasks/debug/debug-application/) guide found on the Kubernetes documentation site and the following resources for more information on troubleshooting AKS: @@ -1178,6 +1228,6 @@ Finally, be sure to checkout the [Troubleshooting Applications](https://kubernet Congratulations! You have completed the workshop on operating AKS Automatic. You have learned how to create an AKS Automatic cluster, enforce custom policies, sync configurations to the cluster, scale workloads, and apply best practices for workload scheduling. You have also learned how to troubleshoot issues in AKS. You are now well-equipped to operate AKS Automatic clusters and ensure that your workloads are running efficiently and effectively. -This lab is also available at https://aka.ms/aks/labs along with others, so feel free to check them out. +This lab is also available at [https://aka.ms/aks/labs](https://aka.ms/aks/labs) along with others, so be sure to check out the site often for new labs and updates. -If you have any feedback or questions on AKS in general, please feel free to reach out to us at https://aka.ms/aks/feedback. +If you have any feedback or questions on AKS in general, please reach out to us at [https://aka.ms/aks/feedback](https://aka.ms/aks/feedback). From 7b29fa15401738cb6a43970cf7aa8cb5f6941c50 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Fri, 15 Nov 2024 22:46:30 -0800 Subject: [PATCH 24/27] edits --- workshops/operating-aks-automatic/workshop.md | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index 773ef992..b0288a88 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -127,7 +127,7 @@ AKS_ID=$(az aks show \ --output tsv) ``` -Run the following command to get the developer's user principal ID. +Run the following command to get a developer's user principal ID. ```bash DEV_USER_PRINCIPAL_ID=$(az ad user show \ @@ -159,7 +159,7 @@ Now, run the following command to get the dev namespace. kubectl get namespace dev ``` -Since there is no cached token in the kubelogin directory, this will trigger a new authentication prompt. Proceed to log in with the developer's user account. +Since there is no cached token in the kubelogin directory, this will trigger a new authentication prompt. Proceed to log in with the developer's user account. So when you log in, be sure to click the **Use another account** button and enter a developer's user credentials. After logging in, head back to your terminal. You should see details of the **dev** namespace. This means that the dev user has the necessary permissions to access the **dev** namespace. @@ -340,6 +340,10 @@ curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/ - Open the constrainttemplate.yaml file in VS Code and take a look at the contents +```bash +code constrainttemplate.yaml +``` + > [!KNOWLEDGE] > The constraint template includes Rego code on line 17 that enforces that all containers in the AKS cluster are sourced from approved container registries. The approved container registries can are defined in the **registry** parameter and this is where you can specify the container registry URL when implementing the ConstraintTemplate. @@ -347,7 +351,7 @@ curl -o constrainttemplate.yaml https://raw.githubusercontent.com/azure-samples/ - Select the **Base64Encoded** option > [!HELP] -> The extension activation process can take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, you will use a sample JSON file in the [Deploy a custom policy definition](#deploy-a-custom-policy-definition) section below. +> The extension activation process can take a few minutes to complete. If you cannot get the extension to generate JSON from the ConstraintTemplate, that's okay, skip to the [Deploy a custom policy definition](#deploy-a-custom-policy-definition) section below where you will use a sample Azure Policy JSON file. - This will generate a new Azure Policy definition in the JSON format and encode the ConstraintTemplate in Base64 format @@ -371,6 +375,11 @@ curl -o constrainttemplate-as-policy.json https://raw.githubusercontent.com/Azur ``` - Open **constrainttemplate-as-policy.json** file and copy the JSON to the clipboard + +```bash +code constrainttemplate-as-policy.json +``` + - Navigate back to the [Azure Policy blade](https://portal.azure.com/#view/Microsoft_Azure_Policy/PolicyMenuBlade/~/Overview) in the Azure Portal - Click on **Definitions** under the **Authoring** section - Click on **+ Policy definition** then enter the following details: @@ -392,7 +401,7 @@ With the custom policy definition created, you can now assign it to the AKS clus - In the **Parameters** tab, enter the following details: - Uncheck the **Only show parameters that need input or review** checkbox - **Effect**: Select **deny** from the dropdown - - **Namespace exclusions**: Replace the existing content with `["kube-system","kube-node-lease","kube-public","gatekeeper-system","app-routing-system","azappconfig-system","sc-system","aks-command"]` + - **Namespace exclusions**: Replace the existing content with `["kube-system","gatekeeper-system","azure-arc", "kube-node-lease","kube-public","app-routing-system","azappconfig-system","sc-system","aks-command"]` - **Image registry**: Enter your container registry URL as `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` - Click **Review + create** to review the policy assignment - Click **Create** to assign the policy definition to the AKS cluster @@ -625,7 +634,7 @@ az appconfig kv set \ --yes ``` -After a minute or so, you can check to see if the configurations have been updated in the Kubernetes ConfigMap. +After 10 seconds, you can check to see if the configurations have been updated in the Kubernetes ConfigMap. ```bash kubectl get cm -n dev myconfigmap -o jsonpath='{.data}' @@ -854,7 +863,7 @@ kubectl get pod -l app=product-service -n dev -w Once you see the pods being restarted, press **Ctrl+C** to exit the watch then run the following command to confirm the resource requests and limits have been set. ```bash -kubectl describe po -n dev $(kubectl get pod -n dev -l app=product-service --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[0].metadata.name}') | grep -i requests -A2 +kubectl describe po -n dev | grep -i requests -A2 ``` With requests in place, the scheduler can make better decisions about where to place the pod. The VPA will also adjust the resource requests based on the pod's usage. This is also especially important when using pod autoscaling features like the Kubernetes [HorizontalPodAutoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) or [KEDA](https://keda.sh). @@ -895,7 +904,7 @@ There are other ways to deal with Karpenter's desire to consolidate nodes and st [Affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity) in Kubernetes is a way for you to influence the scheduling of pods in a Kubernetes cluster. We saw an example of this earlier when we deployed a pod with node affinity and tolerations to ensure that the pod was scheduled on a node that matched the criteria. Pod anti-affinity is used to ensure that pods are not scheduled on the same node. If you noticed, the product-service deployment included three replicas but they were all scheduled on the same node. -You can confirm this by running the following command: +Go back to your terminal and confirm this by running the following command: ```bash kubectl get po -n dev -l app=product-service -o wide @@ -981,7 +990,7 @@ Run the following command to delete the product-service deployment. kubectl delete deployment product-service -n dev ``` -Run the following command to watch the nodes in the dev NodePool. +Run the following command to watch the nodes in the dev NodePool get deleted. ```bash kubectl get nodes -l karpenter.sh/nodepool=devpool -w @@ -1131,7 +1140,7 @@ How do I create an Azure OpenAI service with the GPT-3.5 Turbo model? ``` > [!ALERT] -> The Azure Copilot won't always provide you with the exact commands to run but it will provide you with the necessary information to get you started. +> The Azure Copilot may not always provide you with the exact commands to run but it will provide you with the necessary information to get you started. The instructions should be very close to what you need. You can either follow the instructions and/or reference the docs it replies with or you can run the following commands to quickly create an Azure OpenAI service account. From 1c42792c20c651ccbbd8d0d40884bf610056aaff Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Sat, 16 Nov 2024 19:37:13 -0800 Subject: [PATCH 25/27] adding custom domain to openai --- workshops/operating-aks-automatic/workshop.md | 1 + 1 file changed, 1 insertion(+) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index b0288a88..b23e38bb 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -1150,6 +1150,7 @@ AI_NAME=$(az cognitiveservices account create \ --resource-group myresourcegroup \ --kind OpenAI \ --sku S0 \ +--custom-domain myaiservice$RANDOM \ --query name \ --output tsv) ``` From 9d584c50a93380cfd7646a5f2294f3f7d84a70f8 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Wed, 20 Nov 2024 09:45:28 -0600 Subject: [PATCH 26/27] replace @lab variables --- workshops/operating-aks-automatic/workshop.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index b23e38bb..ec785cd8 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -131,11 +131,14 @@ Run the following command to get a developer's user principal ID. ```bash DEV_USER_PRINCIPAL_ID=$(az ad user show \ ---id @lab.CloudPortalCredential(Dev).Username \ +--id \ --query id \ --output tsv) ``` +> [!NOTE] +> Be sure to replace **** with the actual user principal name of the developer. + Run the following command to assign the **Azure Kubernetes Service RBAC Writer** role to the developer and have the permissions scoped only to the **dev** namespace. This ensures that the developer can only access the resources within the namespace and not the entire cluster. ```bash @@ -402,7 +405,7 @@ With the custom policy definition created, you can now assign it to the AKS clus - Uncheck the **Only show parameters that need input or review** checkbox - **Effect**: Select **deny** from the dropdown - **Namespace exclusions**: Replace the existing content with `["kube-system","gatekeeper-system","azure-arc", "kube-node-lease","kube-public","app-routing-system","azappconfig-system","sc-system","aks-command"]` - - **Image registry**: Enter your container registry URL as `@lab.CloudResourceTemplate(AKSAutomatic).Outputs[containerRegistryLoginServer]/` + - **Image registry**: Enter your container registry URL as `.azurecr.io/` - Click **Review + create** to review the policy assignment - Click **Create** to assign the policy definition to the AKS cluster From 7f7b9a944eced5464cc8ed3cd649170be8efa8b9 Mon Sep 17 00:00:00 2001 From: Paul Yu Date: Wed, 20 Nov 2024 09:49:39 -0600 Subject: [PATCH 27/27] admonition fixes --- workshops/operating-aks-automatic/workshop.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workshops/operating-aks-automatic/workshop.md b/workshops/operating-aks-automatic/workshop.md index ec785cd8..b6adff1e 100644 --- a/workshops/operating-aks-automatic/workshop.md +++ b/workshops/operating-aks-automatic/workshop.md @@ -326,12 +326,12 @@ Using the Azure Policy extension for Visual Studio Code, you can easily create a - Open VS Code and make sure the Azure Policy extension is installed. If not, you can install it from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=AzurePolicy.azurepolicyextension) - To activate the extension, press **Ctrl+Shift+P** on your keyboard to open the command palette and type **Azure: Sign in** then use the web browser to authenticate with your admin user account -> [!TIP] +> [!HINT] > If you see multiple sign-in options, choose the one that has **azure-account.login** next to it. - Press **Ctrl+Shift+P** again and type **Azure: Select Subscriptions** then select the subscription that contains the AKS cluster -> [!TIP] +> [!HINT] > If you see multiple subscriptions, choose the one that has **azure-account.selectSubscriptions** next to it. - In VS Code sidebar, click the **Azure Policy** icon and you should see the subscription resources and policies panes being loaded @@ -551,7 +551,7 @@ az aks connection create appconfig \ This command will create a service connector to allow pods in the **dev** namespace to connect to the Azure App Configuration store using the User-Assigned Managed Identity that was created earlier. The service connector will grant the User-Assigned Managed Identity the necessary permissions to access the Azure App Configuration store and configure a federated credential on the managed identity that will allow the ServiceAccount assigned to the pod to authenticate via workload identity. -> [!TIP] +> [!HINT] > The AKS Service Connector is a powerful feature that allows you to connect your application pods to Azure services without having to manage any credentials. For more information, refer to the [service connector documentation](https://learn.microsoft.com/azure/service-connector/overview#what-services-are-supported-by-service-connector). ### Config sync to Kubernetes @@ -860,7 +860,7 @@ Watch the pod for a few minutes and you should see the pod being restarted with kubectl get pod -l app=product-service -n dev -w ``` -> [!IMPORTANT] +> [!ALERT] > The VPA will evict pods only if the number of replicas is greater than 1. Otherwise, it will not evict the pod. Once you see the pods being restarted, press **Ctrl+C** to exit the watch then run the following command to confirm the resource requests and limits have been set.