diff --git a/.cspell/azure-services.txt b/.cspell/azure-services.txt index 7604e2fc..42e9749b 100644 --- a/.cspell/azure-services.txt +++ b/.cspell/azure-services.txt @@ -49,6 +49,7 @@ azurelinux azureml azuremonitor azurestack +azureuser azurevpnconfig containerapp cosmosdb @@ -65,6 +66,7 @@ powerbi rtxprogpu servicebus sharepoint +snet southeastasia wasbs westus diff --git a/.cspell/general-technical.txt b/.cspell/general-technical.txt index b0753db0..74d93435 100644 --- a/.cspell/general-technical.txt +++ b/.cspell/general-technical.txt @@ -50,6 +50,7 @@ aravis architected architecting ariba +armhf arrowdown arrowleft arrowright @@ -334,6 +335,7 @@ davinci dbcc dcgm DDPG +debconf debuggability dedupe deduplicated @@ -459,6 +461,7 @@ frontends frontmatter fullstack fulltext +fuser gartner gbps gcp @@ -499,6 +502,7 @@ hardened hardening hbase hci +hconf hdfs hdinsight heatmap @@ -817,6 +821,8 @@ nic nics nist nodejs +nodesource +nodistro nojekyll noncompliance noncompliant @@ -937,6 +943,7 @@ pipeline pipelines pitj pivottable +pkgs platform platformops platforms @@ -1163,6 +1170,7 @@ sdlc searchable seccomp sectigo +securestring seealso Seldon semver @@ -1985,3 +1993,17 @@ kolmogorov mqtt smirnov zustand + +# Cendio ThinLinc +Cendio +Linc +ThinLinc +tlwebaccess +tlwebadm +tlagent +tlmaster +thinlocal +WEBADM +WEBACCESS +vsmagent +vsmserver diff --git a/infrastructure/setup/optional/deploy-isaac-sim-vm.sh b/infrastructure/setup/optional/deploy-isaac-sim-vm.sh new file mode 100755 index 00000000..433606bc --- /dev/null +++ b/infrastructure/setup/optional/deploy-isaac-sim-vm.sh @@ -0,0 +1,392 @@ +#!/usr/bin/env bash +# Deploy Isaac Sim VM with Terraform-derived infrastructure defaults +set -o errexit -o nounset + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +# shellcheck source=infrastructure/setup/lib/common.sh +source "$SCRIPT_DIR/../lib/common.sh" +# shellcheck disable=SC1091 +# shellcheck source=infrastructure/setup/defaults.conf +source "$SCRIPT_DIR/../defaults.conf" + +show_help() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +Deploy one or more Isaac Sim VMs using the Bicep template in +infrastructure/setup/optional/isaac-sim-vm/bicep/. By default, the script reads the +resource group, location, dedicated Isaac Sim VM subnet, and shared NSG from +Terraform outputs. When Terraform state is unavailable, pass --tfvars-file to +derive the standard resource names from a Terraform variables file and resolve +the subnet and NSG IDs from Azure. + +OPTIONS: + -h, --help Show this help message + -t, --tf-dir DIR Terraform directory (default: $DEFAULT_TF_DIR) + --tfvars-file PATH Terraform tfvars file used when tfstate is unavailable + --vm-name NAME VM name to deploy + --resource-group NAME Target resource group override + --isolated-vm-rg Create and use a derived VM-specific resource group + --location LOCATION Azure location override + --subnet-id ID Existing subnet resource ID override + --nsg-id ID Existing network security group resource ID override + --admin-username NAME Admin username (default: azureuser) + --admin-password VALUE Admin password override + --vm-size SIZE VM size (default: Standard_NV36ads_A10_v5) + --deployment-name NAME ARM deployment name (default: isaac-lab-vms) + --skip-marketplace-requirements + Skip acceptance of the Isaac Sim marketplace terms + --enable-mde-linux Deploy the Defender for Endpoint extension with defaults + --config-preview Print configuration and exit + +EXAMPLES: + $(basename "$0") --vm-name isaac-sim-dev-01 + $(basename "$0") --vm-name isaac-sim-dev-01 --isolated-vm-rg + $(basename "$0") --tfvars-file infrastructure/terraform/terraform.tfvars --vm-name isaac-sim-dev-01 + $(basename "$0") --vm-name isaac-sim-dev-01 --subnet-id --nsg-id +EOF +} + +trim_whitespace() { + local value="${1-}" + value="${value#"${value%%[![:space:]]*}"}" + value="${value%"${value##*[![:space:]]}"}" + printf '%s\n' "$value" +} + +resolve_file_path() { + local file_path="${1:?file path required}" + + if [[ "$file_path" = /* ]]; then + printf '%s\n' "$file_path" + return 0 + fi + + printf '%s/%s\n' "$(cd "$(dirname "$file_path")" && pwd)" "$(basename "$file_path")" +} + +read_tfvars_metadata() { + local tf_dir="${1:?terraform directory required}" + local tfvars_path="${2:?terraform variables file required}" + local metadata="" + + [[ -f "$tfvars_path" ]] || fatal "Terraform variables file not found: $tfvars_path" + [[ -d "$tf_dir" ]] || fatal "Terraform directory not found: $tf_dir" + + info "Initializing Terraform in $tf_dir for tfvars evaluation..." + terraform -chdir="$tf_dir" init -backend=false -input=false -no-color >/dev/null || fatal "Unable to initialize Terraform in $tf_dir" + + metadata=$(terraform -chdir="$tf_dir" console -var-file="$tfvars_path" <<'EOF' +jsonencode({ + environment = var.environment + instance = var.instance + location = var.location + resource_group_name = var.resource_group_name + resource_prefix = var.resource_prefix + should_create_vm_subnet = var.should_create_vm_subnet +}) +EOF + ) || fatal "Unable to evaluate Terraform variables from $tfvars_path" + + jq -r '.' <<< "$metadata" +} + +print_az_command() { + local -a command=("$@") + local index + + for index in "${!command[@]}"; do + if [[ "${command[$index]}" == adminPassword=* ]]; then + command[index]='adminPassword=' + fi + done + + printf '%q ' az "${command[@]}" + echo +} + +derive_vm_resource_group_from_tfvars() { + local resource_prefix="${1-}" + local environment="${2-}" + local instance="${3-}" + + if [[ -z "$resource_prefix" || -z "$environment" || -z "$instance" ]]; then + return 1 + fi + + printf 'rg-%s-virtual-machines-%s-%s\n' "$resource_prefix" "$environment" "$instance" +} + +derive_vm_resource_group_from_main_rg() { + local main_resource_group="${1:?main resource group required}" + + if [[ "$main_resource_group" =~ ^rg-(.+)-([^-]+)-([^-]+)$ ]]; then + printf 'rg-%s-virtual-machines-%s-%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" "${BASH_REMATCH[3]}" + return 0 + fi + + return 1 +} + + +repo_root="$(cd "$SCRIPT_DIR/../../.." && pwd)" +template_file="$repo_root/infrastructure/setup/optional/isaac-sim-vm/bicep/main.bicep" + +# Defaults +tf_dir="$SCRIPT_DIR/../$DEFAULT_TF_DIR" +tfvars_file="" +resource_group="" +vm_resource_group="" +location="" +subnet_id="" +nsg_id="" +admin_username="azureuser" +admin_password="${ISAAC_LAB_VM_ADMIN_PASSWORD:-}" +vm_size="Standard_NV36ads_A10_v5" +deployment_name="isaac-lab-vms" +install_marketplace_requirements=true +isolated_vm_rg=false +config_preview=false +enable_mde_linux=false +vm_name="" +tfvars_metadata='{}' +tfvars_environment="" +tfvars_instance="001" +tfvars_location="" +tfvars_resource_group_name="" +tfvars_resource_prefix="" +tfvars_should_create_vm_subnet="false" +tfvars_resource_name_suffix="" +tfvars_vm_subnet_name="" +tfvars_vnet_name="" +tfvars_nsg_name="" +marketplace_publisher="nvidia" +marketplace_offer="isaac_sim_developer_workstation" +marketplace_plan="isaac_sim_developer_workstation_community_linux" + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) show_help; exit 0 ;; + -t|--tf-dir) tf_dir="$2"; shift 2 ;; + --tfvars-file) tfvars_file="$2"; shift 2 ;; + --vm-name) vm_name="$2"; shift 2 ;; + --resource-group) resource_group="$2"; shift 2 ;; + --isolated-vm-rg) isolated_vm_rg=true; shift ;; + --location) location="$2"; shift 2 ;; + --subnet-id) subnet_id="$2"; shift 2 ;; + --nsg-id) nsg_id="$2"; shift 2 ;; + --admin-username) admin_username="$2"; shift 2 ;; + --admin-password) admin_password="$2"; shift 2 ;; + --vm-size) vm_size="$2"; shift 2 ;; + --deployment-name) deployment_name="$2"; shift 2 ;; + --install-marketplace-requirements) install_marketplace_requirements=true; shift ;; + --skip-marketplace-requirements) install_marketplace_requirements=false; shift ;; + --enable-mde-linux) enable_mde_linux=true; shift ;; + --config-preview) config_preview=true; shift ;; + *) fatal "Unknown option: $1" ;; + esac +done + +require_tools az jq terraform + +[[ -f "$template_file" ]] || fatal "Bicep template not found: $template_file" + +if [[ -n "$tfvars_file" ]]; then + tfvars_file=$(resolve_file_path "$tfvars_file") + info "Reading Terraform variables from $tfvars_file..." + tfvars_metadata=$(read_tfvars_metadata "$tf_dir" "$tfvars_file") + tfvars_environment=$(tf_get "$tfvars_metadata" 'environment') + tfvars_instance=$(tf_get "$tfvars_metadata" 'instance' '001') + tfvars_location=$(tf_get "$tfvars_metadata" 'location') + tfvars_resource_group_name=$(tf_get "$tfvars_metadata" 'resource_group_name') + tfvars_resource_prefix=$(tf_get "$tfvars_metadata" 'resource_prefix') + tfvars_should_create_vm_subnet=$(tf_get "$tfvars_metadata" 'should_create_vm_subnet' 'false') + + if [[ -n "$tfvars_environment" && -n "$tfvars_resource_prefix" ]]; then + tfvars_resource_name_suffix="${tfvars_resource_prefix}-${tfvars_environment}-${tfvars_instance}" + tfvars_vnet_name="vnet-${tfvars_resource_name_suffix}" + tfvars_vm_subnet_name="snet-isaaclab-vm-${tfvars_resource_name_suffix}" + tfvars_nsg_name="nsg-${tfvars_resource_name_suffix}" + tfvars_resource_group_name="${tfvars_resource_group_name:-rg-${tfvars_resource_name_suffix}}" + fi +fi + +tf_output='{}' +if [[ -f "$tf_dir/terraform.tfstate" ]]; then + info "Reading terraform outputs from $tf_dir..." + tf_output=$(read_terraform_outputs "$tf_dir") +else + if [[ -n "$tfvars_file" ]]; then + warn "terraform.tfstate not found in $tf_dir; using tfvars-derived defaults where possible" + else + warn "terraform.tfstate not found in $tf_dir; using explicit CLI overrides only" + fi +fi + +if [[ -z "$resource_group" ]]; then + resource_group=$(tf_get "$tf_output" "resource_group.value.name") + if [[ -z "$resource_group" ]]; then + resource_group="$tfvars_resource_group_name" + fi + [[ -n "$resource_group" ]] || fatal "Resource group not provided and not found in terraform outputs" +fi + +if [[ -z "$location" ]]; then + location=$(tf_get "$tf_output" "resource_group.value.location") + if [[ -z "$location" ]]; then + location="$tfvars_location" + fi +fi + +if [[ -z "$location" && -n "$resource_group" ]]; then + info "Resolving resource group location from Azure..." + location=$(az group show --name "$resource_group" --query location --output tsv) +fi + +if [[ "$isolated_vm_rg" == "true" ]]; then + if vm_resource_group=$(derive_vm_resource_group_from_tfvars "$tfvars_resource_prefix" "$tfvars_environment" "$tfvars_instance"); then + : + elif vm_resource_group=$(derive_vm_resource_group_from_main_rg "$resource_group"); then + : + else + fatal "Unable to derive the isolated VM resource group name from $resource_group. Provide --tfvars-file with resource_prefix, environment, and instance values or use a main resource group named like rg---." + fi + + [[ -n "$location" ]] || fatal "Location is required to create the isolated VM resource group. Pass --location or ensure the main resource group location can be resolved." +else + vm_resource_group="$resource_group" +fi + +if [[ -z "$vm_name" ]]; then + fatal "Provide --vm-name" +fi + +if [[ -z "$subnet_id" ]]; then + subnet_id=$(tf_get "$tf_output" "vm_subnet.value.id") + if [[ -z "$subnet_id" && -n "$tfvars_file" ]]; then + [[ "$tfvars_should_create_vm_subnet" == "true" ]] || fatal "Subnet ID not provided and tfvars-derived configuration does not enable should_create_vm_subnet." + [[ -n "$tfvars_vnet_name" && -n "$tfvars_vm_subnet_name" ]] || fatal "Subnet ID not provided and tfvars file is missing resource_prefix or environment needed to derive subnet names." + info "Resolving VM subnet ID from Azure using tfvars-derived names..." + subnet_id=$(az network vnet subnet show \ + --resource-group "$resource_group" \ + --vnet-name "$tfvars_vnet_name" \ + --name "$tfvars_vm_subnet_name" \ + --query id \ + --output tsv) + fi + [[ -n "$subnet_id" ]] || fatal "Subnet ID not provided and vm_subnet output not found. Enable should_create_vm_subnet in Terraform or pass --subnet-id." +fi + +if [[ -z "$nsg_id" ]]; then + nsg_id=$(tf_get "$tf_output" "network_security_group.value.id") + if [[ -z "$nsg_id" && -n "$tfvars_file" ]]; then + [[ -n "$tfvars_nsg_name" ]] || fatal "NSG ID not provided and tfvars file is missing resource_prefix or environment needed to derive the NSG name." + info "Resolving network security group ID from Azure using tfvars-derived names..." + nsg_id=$(az network nsg show \ + --resource-group "$resource_group" \ + --name "$tfvars_nsg_name" \ + --query id \ + --output tsv) + fi + [[ -n "$nsg_id" ]] || fatal "NSG ID not provided and network_security_group output not found. Apply Terraform outputs or pass --nsg-id." +fi + +if [[ -z "$admin_password" && "$config_preview" != "true" ]]; then + if [[ -t 0 ]]; then + read -r -s -p "Admin password: " admin_password + echo + else + fatal "Admin password not provided. Use --admin-password or ISAAC_LAB_VM_ADMIN_PASSWORD." + fi +fi + +if [[ "$config_preview" == "true" ]]; then + section "Configuration Preview" + print_kv "Deployment" "$deployment_name" + print_kv "TFVars File" "${tfvars_file:-none}" + print_kv "Resource Group" "$resource_group" + print_kv "VM Resource Group" "$vm_resource_group" + print_kv "Isolated VM RG" "$isolated_vm_rg" + print_kv "Location" "${location:-resource-group default}" + print_kv "Marketplace Terms" "$install_marketplace_requirements" + print_kv "VM Name" "$vm_name" + print_kv "Subnet ID" "$subnet_id" + print_kv "NSG ID" "$nsg_id" + print_kv "VM Size" "$vm_size" + print_kv "Admin User" "$admin_username" + print_kv "MDE Linux" "$enable_mde_linux" + print_kv "Template" "$template_file" + exit 0 +fi + +#------------------------------------------------------------------------------ +# Deploy Isaac Sim VM +#------------------------------------------------------------------------------ +section "Deploy Isaac Sim VM" + +vm_resource_group_args=( + group create + --name "$vm_resource_group" + --location "$location" +) + +marketplace_args=( + vm image terms accept + --publisher "$marketplace_publisher" + --offer "$marketplace_offer" + --plan "$marketplace_plan" +) + +deployment_args=( + deployment group create + --name "$deployment_name" + --resource-group "$resource_group" + --template-file "$template_file" + --parameters "vmName=$vm_name" + --parameters "subnetId=$subnet_id" + --parameters "nsgId=$nsg_id" + --parameters "adminUsername=$admin_username" + --parameters "adminPassword=$admin_password" + --parameters "vmSize=$vm_size" +) + +if [[ -n "$location" ]]; then + deployment_args+=(--parameters "location=$location") +fi + +if [[ "$isolated_vm_rg" == "true" ]]; then + deployment_args+=(--parameters "vmResourceGroup=$vm_resource_group") +fi + +if [[ "$enable_mde_linux" == "true" ]]; then + deployment_args+=(--parameters 'mdeLinux={}') +fi + +if [[ "$isolated_vm_rg" == "true" ]]; then + az "${vm_resource_group_args[@]}" >/dev/null +fi + +if [[ "$install_marketplace_requirements" == "true" ]]; then + az "${marketplace_args[@]}" >/dev/null +fi + +az "${deployment_args[@]}" + +#------------------------------------------------------------------------------ +# Deployment Summary +#------------------------------------------------------------------------------ +section "Deployment Summary" +print_kv "Deployment" "$deployment_name" +print_kv "Resource Group" "$resource_group" +print_kv "VM Resource Group" "$vm_resource_group" +print_kv "Isolated VM RG" "$isolated_vm_rg" +print_kv "Location" "${location:-resource-group default}" +print_kv "Marketplace Terms" "$install_marketplace_requirements" +print_kv "VM Name" "$vm_name" +print_kv "Subnet ID" "$subnet_id" +print_kv "NSG ID" "$nsg_id" +print_kv "VM Size" "$vm_size" +print_kv "Admin User" "$admin_username" +print_kv "MDE Linux" "$enable_mde_linux" +info "Isaac Sim VM deployment complete" diff --git a/infrastructure/setup/optional/isaac-sim-vm/README.md b/infrastructure/setup/optional/isaac-sim-vm/README.md new file mode 100644 index 00000000..f4cba326 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/README.md @@ -0,0 +1,259 @@ +--- +description: Minimal prerequisites and commands for deploying Isaac Linux VMs with Bicep. +ms.date: 2026-02-24 +--- + +# Deploy an Isaac VM for development on Azure + +This optional setup deploys an Azure Virtual Machine into the same network as your platform infrastructure so you can run Isaac Sim for development. Use it when your team does not have local workstations that can run Isaac Sim reliably. + +It installs the [Isaac Sim Developer Workstation](https://marketplace.microsoft.com/product/nvidia.isaac_sim_developer_workstation?tab=Overview) marketplace offer. + +## 🔧 Why Use The Script? + +The deployment script integrates the Isaac Sim Developer Workstation with your existing infrastructure and installs the workstation prerequisites after provisioning: + +- `uv` installed system-wide at `/usr/local/bin` +- Azure CLI and the Azure ML extension +- AzCopy +- CUDA Toolkit 12.6 +- NVIDIA Container Toolkit configured for Docker +- VS Code Insiders, with Git configured to use `code-insiders --wait` +- PowerShell +- [Cendio's ThinLinc server](https://www.cendio.com/) for graphical remote desktop connection + +## ⚠️ Limitations + +- Only Linux VMs are supported. +- Only private networking is supported. Public IPs are not supported. +- The template reuses an existing subnet and an existing network security group. +- Direct Bicep deployment requires explicit values for `subnetId` and `nsgId`. +- Only password authentication is supported. +- [Cendio's ThinLinc server](https://www.cendio.com/) is always installed, we will make optional in the future. + +## 📋 Prerequisites + +Before deployment: + +1. Complete Steps 1 and 2 of the deployment pipeline: + + ```bash + source infrastructure/terraform/prerequisites/az-sub-init.sh + + cd infrastructure/terraform + terraform apply -var-file=terraform.tfvars + ``` + +1. Enable the dedicated VM subnet in `infrastructure/terraform/terraform.tfvars`: + + ```hcl + should_create_vm_subnet = true + ``` + +1. Re-apply Terraform so the VM subnet and shared network security group outputs exist: + + ```bash + cd infrastructure/terraform + terraform apply -var-file=terraform.tfvars + terraform output vm_subnet + terraform output network_security_group + ``` + +1. If the platform uses a private AKS cluster, complete the VPN deployment step before connecting to private VM resources: + + ```bash + cd infrastructure/terraform/vpn + terraform apply + ``` + +1. Accept the NVIDIA marketplace terms once per subscription, or let the deployment script handle it automatically: + + ```bash + az vm image terms accept \ + --publisher nvidia \ + --offer isaac_sim_developer_workstation \ + --plan isaac_sim_developer_workstation_community_linux + ``` + +## 🚀 Terraform-Backed Deployment + +Use the optional deployment script when these VMs should attach to the Terraform-managed platform network by default. + +Deploy a VM with Terraform-derived defaults: + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh --vm-name isaac-sim-dev-01 +``` + +Use `--config-preview` to inspect the resolved configuration without deploying: + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh \ + --vm-name isaac-sim-dev-01 \ + --config-preview +``` + +The script reads these values from Terraform outputs by default: + +| Value | Terraform output | +| ----- | ---------------- | +| Resource group | `resource_group.value.name` | +| Location | `resource_group.value.location` | +| Dedicated VM subnet | `vm_subnet.value.id` | +| Shared NSG | `network_security_group.value.id` | + +If `terraform.tfstate` is unavailable, pass `--tfvars-file` with a Terraform variables file that includes the same top-level fields used in `terraform.tfvars.example`. The script derives the standard resource names from that file, then resolves the subnet and NSG IDs from Azure. + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh \ + --tfvars-file infrastructure/terraform/terraform.tfvars \ + --vm-name isaac-sim-dev-01 +``` + +Deploy into a derived VM-specific resource group: + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh \ + --vm-name isaac-sim-dev-01 \ + --isolated-vm-rg +``` + +Override any detected value explicitly: + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh \ + --vm-name isaac-sim-dev-01 \ + --subnet-id /subscriptions/.../subnets/... \ + --nsg-id /subscriptions/.../networkSecurityGroups/... +``` + +Enable Microsoft Defender for Endpoint on the VM extension deployment: + +```bash +bash infrastructure/setup/optional/deploy-isaac-sim-vm.sh \ + --vm-name isaac-sim-dev-01 \ + --enable-mde-linux +``` + +If you do not want the script to accept marketplace terms automatically, pass `--skip-marketplace-requirements`. + +The script prompts for the admin password unless you pass `--admin-password` or set `ISAAC_LAB_VM_ADMIN_PASSWORD`. + +## 🚀 Direct Bicep Deployment + +```bash +az deployment group create \ + --resource-group \ + --template-file infrastructure/setup/optional/isaac-sim-vm/bicep/main.bicep \ + --parameters \ + vmName= \ + subnetId=/subscriptions//resourceGroups//providers/Microsoft.Network/virtualNetworks//subnets/ \ + nsgId=/subscriptions//resourceGroups//providers/Microsoft.Network/networkSecurityGroups/ \ + adminUsername= \ + adminPassword= +``` + +Pass `vmResourceGroup=` when the VM resources should be created in a different resource group than the deployment resource group. + +## 🖥️ Connect To The VM After Deployment + +The deployment configures the VM for private-network access only. Connect from a machine that has network reachability to the VM subnet, such as a workstation on the same network or a client connected through the point-to-site VPN. + +Get the VM private IP address: + +```bash +az vm show -d \ + --resource-group \ + --name \ + --query privateIps \ + --output tsv +``` + +### ThinLinc client for a GUI session + +[ThinLinc server](https://www.cendio.com/) is installed during provisioning. Install the [ThinLinc client](https://www.cendio.com/thinlinc/download/) on your local machine, then connect to the VM private IP address and sign in with the VM admin username and password. Make sure you understand [ThinLinc licensing](https://www.cendio.com/thinlinc/buy-pricing/). + +Use this option when you need a remote desktop session for Isaac Sim or other GUI tools. + +### SSH for terminal-only access + +Use SSH when you only need shell access and do not need a graphical desktop session: + +```bash +ssh @ +``` + +Use this option for setup, diagnostics, file transfers, or command-line workflows that do not require a UI. + +## 🗑️ Cleanup + +Delete the VM from the resource group that contains the VM resources: + +```bash +az vm delete \ + --resource-group \ + --name \ + --yes +``` + +The network interface and OS disk use `deleteOption: Delete` and are removed with the VM. The data disk uses `deleteOption: Detach` and remains available unless you delete it separately. + +If you deployed with `--isolated-vm-rg`, you can choose to delete the entire derived resource group. Note that deleting the resource group will **delete all VMs deployed to the resource group**: + +```bash +# Will delete all VMs in resource group, use with care! +az group delete --name --yes --no-wait +``` + +Delete the ARM deployment record from the deployment resource group when you no longer need it: + +```bash +az deployment group delete \ + --resource-group \ + --name +``` + +If `enableSubnetNatGatewayEgress` was enabled, delete the NAT gateway and public IP separately from the networking resource group: + +```bash +az network nat gateway delete \ + --resource-group \ + --name + +az network public-ip delete \ + --resource-group \ + --name +``` + +## ⚙️ Parameters + +The deployment template in `main.bicep` accepts the following parameters. + +| Name | Type | Required | Declared default | Description | +| ------------------------------ | -------------- | -------- | -------------------------- | ----------- | +| `vmName` | `string` | Yes | None | Name of the virtual machine to deploy. | +| `location` | `string` | No | `resourceGroup().location` | Azure region for deployed resources. | +| `vmResourceGroup` | `string` | No | `resourceGroup().name` | Resource group that receives the VM resources. | +| `tags` | `CommonTags?` | No | `null` | Tags applied to created resources. When `null`, `defaultCommonTags` is used as the effective value. | +| `subnetId` | `string` | Yes | None | Resource ID of the existing subnet used by the VM NIC. | +| `nsgId` | `string` | Yes | None | Resource ID of the existing network security group associated with the VM NIC. | +| `enableSubnetNatGatewayEgress` | `bool` | No | `false` | Deploy a NAT gateway and attach it to the target subnet for outbound internet egress without a VM public IP. | +| `natGatewayName` | `string` | No | `''` | NAT gateway name override when `enableSubnetNatGatewayEgress` is `true`. | +| `natGatewayPublicIpName` | `string` | No | `''` | Public IP name override for the NAT gateway when `enableSubnetNatGatewayEgress` is `true`. | +| `adminUsername` | `string` | Yes | None | Admin username for the Linux VM. | +| `adminPassword` | `securestring` | Yes | None | Admin password for the Linux VM. | +| `vmSize` | `string` | No | `Standard_NV36ads_A10_v5` | Virtual machine size. | +| `image` | `ImageConfig?` | No | `null` | Marketplace image configuration. When `null`, `defaultImageConfig` is used as the effective value. | +| `plan` | `PlanConfig?` | No | `null` | Marketplace plan configuration. When `null`, `defaultPlanConfig` is used as the effective value. | +| `osDisk` | `DiskConfig?` | No | `null` | OS disk configuration. When `null`, `defaultOsDiskConfig` is used as the effective value. | +| `dataDisk` | `DiskConfig?` | No | `null` | Data disk configuration. When `null`, `defaultDataDiskConfig` is used as the effective value. | +| `shutdownSchedule` | `ShutdownSchedule?` | No | `null` | Daily auto-shutdown schedule. When `null`, `defaultShutdownSchedule` is used as the effective value. | +| `mdeLinux` | `object?` | No | `null` | Defender for Endpoint extension settings. Set `{}` to enable with defaults. Set `null` to skip deployment. | + +### Structured parameter types + +- `CommonTags`: `environment` +- `ImageConfig`: `publisher`, `offer`, `sku`, `version` +- `PlanConfig`: `publisher`, `product`, `name` +- `DiskConfig`: `storageAccountType`, `sizeGb`, `caching`, `deleteOption` +- `ShutdownSchedule`: `time`, `timeZoneId` diff --git a/infrastructure/setup/optional/isaac-sim-vm/bicep/main.bicep b/infrastructure/setup/optional/isaac-sim-vm/bicep/main.bicep new file mode 100644 index 00000000..dfaea6d9 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/bicep/main.bicep @@ -0,0 +1,172 @@ +metadata name = 'Isaac Linux VM Main' +metadata description = 'Main orchestration entrypoint for deploying a single Isaac Linux VM.' + +targetScope = 'resourceGroup' + +import { + CommonTags + ShutdownSchedule + ImageConfig + PlanConfig + DiskConfig + defaultCommonTags + defaultShutdownSchedule + defaultImageConfig + defaultPlanConfig + defaultOsDiskConfig + defaultDataDiskConfig +} from 'types.bicep' + +/* + Common parameters +*/ + +@description('Name of the virtual machine to deploy.') +param vmName string + +@description('Location for deployed resources. Defaults to the current resource group location.') +param location string = resourceGroup().location + +@description('Resource group that receives VM resources. Defaults to the current deployment resource group.') +param vmResourceGroup string = resourceGroup().name + +@description('Tags applied to created resources.') +param tags CommonTags? + +/* + Networking parameters +*/ + +@description('Resource ID of the existing subnet used by the VM NIC.') +param subnetId string + +@description('Resource ID of the existing network security group associated to the VM NIC.') +param nsgId string + +@description('When true, deploys a NAT gateway and attaches it to the target subnet for outbound internet egress without a VM public IP.') +param enableSubnetNatGatewayEgress bool = false + +@description('Optional NAT gateway name override. Used when enableSubnetNatGatewayEgress is true.') +param natGatewayName string = '' + +@description('Optional Public IP name override for NAT gateway. Used when enableSubnetNatGatewayEgress is true.') +param natGatewayPublicIpName string = '' + +/* + Compute parameters +*/ + +@description('Admin username for the Linux VM.') +param adminUsername string + +@description('Password for the Linux VM admin account.') +@secure() +param adminPassword string + +@description('Virtual machine size.') +param vmSize string = 'Standard_NV36ads_A10_v5' + +@description('Marketplace image configuration.') +param image ImageConfig? + +@description('Marketplace plan configuration required for image deployment.') +param plan PlanConfig? + +@description('OS disk configuration.') +param osDisk DiskConfig? + +@description('Data disk configuration.') +param dataDisk DiskConfig? + +@description('Daily auto-shutdown schedule for the VM.') +param shutdownSchedule ShutdownSchedule? + +@description('Optional MDE.Linux extension settings. Set to null to skip extension deployment.') +param mdeLinux object? + +/* + Effective defaults +*/ + +var effectiveTags CommonTags = tags ?? defaultCommonTags +var effectiveImage ImageConfig = image ?? defaultImageConfig +var effectivePlan PlanConfig = plan ?? defaultPlanConfig +var effectiveOsDisk DiskConfig = osDisk ?? defaultOsDiskConfig +var effectiveDataDisk DiskConfig = dataDisk ?? defaultDataDiskConfig +var effectiveShutdownSchedule ShutdownSchedule = shutdownSchedule ?? defaultShutdownSchedule +var subnetIdParts = split(subnetId, '/') +var subnetSubscriptionId = subnetIdParts[2] +var subnetResourceGroupName = subnetIdParts[4] +var virtualNetworkName = subnetIdParts[8] +var subnetName = subnetIdParts[10] +var effectiveNatGatewayName = empty(natGatewayName) ? 'nat-${uniqueString(subnetId)}' : natGatewayName +var effectiveNatGatewayPublicIpName = empty(natGatewayPublicIpName) ? 'pip-nat-${uniqueString(subnetId)}' : natGatewayPublicIpName +var existingSubnetDefaultOutboundAccess = existingSubnet.properties.?defaultOutboundAccess +var existingSubnetNsg = existingSubnet.properties.?networkSecurityGroup +var existingSubnetServiceEndpoints = existingSubnet.properties.?serviceEndpoints +var existingSubnetDelegations = existingSubnet.properties.?delegations +var existingSubnetRouteTable = existingSubnet.properties.?routeTable + +resource existingVnet 'Microsoft.Network/virtualNetworks@2023-09-01' existing = { + scope: resourceGroup(subnetSubscriptionId, subnetResourceGroupName) + name: virtualNetworkName +} + +resource existingSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-09-01' existing = { + parent: existingVnet + name: subnetName +} + +var discoveredSubnetAddressPrefix = !empty(existingSubnet.properties.addressPrefix) + ? existingSubnet.properties.addressPrefix + : existingSubnet.properties.addressPrefixes[0] + +/* + Modules +*/ + +module linuxIsaacVmModule 'modules/linux-isaac-vm.bicep' = { + name: take('linux-isaac-vm-${vmName}-${uniqueString(resourceGroup().id, vmName)}', 64) + scope: resourceGroup(subscription().subscriptionId, vmResourceGroup) + params: { + vmName: vmName + location: location + subnetId: subnetId + nsgId: nsgId + adminUsername: adminUsername + adminPassword: adminPassword + vmSize: vmSize + image: effectiveImage + plan: effectivePlan + osDisk: effectiveOsDisk + dataDisk: effectiveDataDisk + shutdownSchedule: effectiveShutdownSchedule + mdeLinux: mdeLinux + tags: effectiveTags + } +} + +module subnetNatEgressModule 'modules/subnet-nat-egress.bicep' = if (enableSubnetNatGatewayEgress) { + name: take('subnet-nat-egress-${uniqueString(subnetId)}', 64) + scope: resourceGroup(subnetSubscriptionId, subnetResourceGroupName) + params: { + location: location + virtualNetworkName: virtualNetworkName + subnetName: subnetName + subnetAddressPrefix: discoveredSubnetAddressPrefix + natGatewayName: effectiveNatGatewayName + publicIpName: effectiveNatGatewayPublicIpName + existingDefaultOutboundAccess: existingSubnetDefaultOutboundAccess + existingNsg: existingSubnetNsg + existingServiceEndpoints: existingSubnetServiceEndpoints + existingDelegations: existingSubnetDelegations + existingRouteTable: existingSubnetRouteTable + tags: effectiveTags + } +} + +@description('Resource ID of the deployed virtual machine.') +output vmResourceId string = linuxIsaacVmModule.outputs.vmResourceId + +@description('Resource ID of the deployed network interface.') +output nicResourceId string = linuxIsaacVmModule.outputs.nicResourceId diff --git a/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/linux-isaac-vm.bicep b/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/linux-isaac-vm.bicep new file mode 100644 index 00000000..f61f1770 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/linux-isaac-vm.bicep @@ -0,0 +1,240 @@ +metadata name = 'Linux Isaac VM Module' +metadata description = 'Module placeholder for deploying a single Linux Isaac VM and related resources.' + +import { + CommonTags + ShutdownSchedule + ImageConfig + PlanConfig + DiskConfig +} from '../types.bicep' + +/* + Required parameters +*/ + +@description('Name of the VM to deploy.') +param vmName string + +@description('Location for module resources.') +param location string + +@description('Resource ID of the existing subnet used by the VM NIC.') +param subnetId string + +@description('Resource ID of the existing network security group associated to the VM NIC.') +param nsgId string + +@description('Admin username for the Linux VM.') +param adminUsername string + +@description('Password for the Linux VM admin account.') +@secure() +param adminPassword string + +@description('VM size for the deployed VM.') +param vmSize string + +@description('Marketplace image configuration.') +param image ImageConfig + +@description('Marketplace plan configuration.') +param plan PlanConfig + +@description('OS disk configuration.') +param osDisk DiskConfig + +@description('Data disk configuration.') +param dataDisk DiskConfig + +@description('Daily auto-shutdown schedule for the VM.') +param shutdownSchedule ShutdownSchedule + +@description('Optional MDE.Linux extension settings. Set to null to skip extension deployment.') +param mdeLinux object? + +@description('Tags applied to module resources.') +param tags CommonTags + +var defaultMdeLinuxSettings = { + autoUpdate: true + forceReOnboarding: false + vNextEnabled: false +} + +var effectiveMdeLinuxSettings = union(defaultMdeLinuxSettings, mdeLinux ?? {}) +var installDevDepsScript = loadTextContent('../../scripts/install-dev-deps.sh') +var installDevDepsScriptBase64 = base64(installDevDepsScript) +var installThinLincScript = loadTextContent('../../scripts/install-thinlinc-silent.sh') +var installThinLincScriptBase64 = base64(installThinLincScript) + +/* + Resources +*/ + +@description('Network interface for the VM with private networking only.') +resource networkInterface 'Microsoft.Network/networkInterfaces@2023-09-01' = { + name: '${vmName}-nic' + location: location + tags: tags + properties: { + networkSecurityGroup: { + id: nsgId + } + ipConfigurations: [ + { + name: 'ipconfig1' + properties: { + privateIPAllocationMethod: 'Dynamic' + privateIPAddressVersion: 'IPv4' + subnet: { + id: subnetId + } + } + } + ] + } +} + +@description('Linux VM configured for marketplace image and plan deployment.') +resource virtualMachine 'Microsoft.Compute/virtualMachines@2023-09-01' = { + name: vmName + location: location + tags: tags + identity: { + type: 'SystemAssigned' + } + plan: { + publisher: plan.publisher + product: plan.product + name: plan.name + } + properties: { + hardwareProfile: { + vmSize: vmSize + } + storageProfile: { + imageReference: { + publisher: image.publisher + offer: image.offer + sku: image.sku + version: image.version + } + osDisk: { + createOption: 'FromImage' + osType: 'Linux' + caching: osDisk.caching + diskSizeGB: osDisk.sizeGb + deleteOption: osDisk.deleteOption + managedDisk: { + storageAccountType: osDisk.storageAccountType + } + } + dataDisks: [ + { + lun: 0 + createOption: 'Empty' + caching: dataDisk.caching + diskSizeGB: dataDisk.sizeGb + deleteOption: dataDisk.deleteOption + managedDisk: { + storageAccountType: dataDisk.storageAccountType + } + } + ] + } + osProfile: { + computerName: vmName + adminUsername: adminUsername + adminPassword: adminPassword + linuxConfiguration: { + disablePasswordAuthentication: false + } + } + diagnosticsProfile: { + bootDiagnostics: { + enabled: true + } + } + networkProfile: { + networkInterfaces: [ + { + id: networkInterface.id + properties: { + deleteOption: 'Delete' + } + } + ] + } + } +} + +@description('Runs install-dev-deps.sh and install-thinlinc-silent.sh on the VM during provisioning via CustomScript extension.') +resource installDevDepsExtension 'Microsoft.Compute/virtualMachines/extensions@2023-09-01' = { + parent: virtualMachine + name: 'install-dev-deps' + location: location + tags: tags + properties: { + publisher: 'Microsoft.Azure.Extensions' + type: 'CustomScript' + typeHandlerVersion: '2.1' + autoUpgradeMinorVersion: true + settings: { + commandToExecute: format('bash -lc "echo {0} | base64 -d > /tmp/install-dev-deps.sh && echo {1} | base64 -d > /tmp/install-thinlinc-silent.sh && chmod +x /tmp/install-dev-deps.sh /tmp/install-thinlinc-silent.sh && /tmp/install-dev-deps.sh {2} && /tmp/install-thinlinc-silent.sh"', installDevDepsScriptBase64, installThinLincScriptBase64, adminUsername) + } + } +} + +@description('Defender for Servers extension for Linux VM onboarding.') +resource mdeExtension 'Microsoft.Compute/virtualMachines/extensions@2023-09-01' = if (mdeLinux != null) { + parent: virtualMachine + name: 'MDE.Linux' + location: location + tags: tags + properties: { + publisher: 'Microsoft.Azure.AzureDefenderForServers' + type: 'MDE.Linux' + typeHandlerVersion: '1.0' + autoUpgradeMinorVersion: true + settings: { + autoUpdate: effectiveMdeLinuxSettings.autoUpdate + azureResourceId: virtualMachine.id + forceReOnboarding: effectiveMdeLinuxSettings.forceReOnboarding + vNextEnabled: effectiveMdeLinuxSettings.vNextEnabled + } + } +} + +@description('Daily VM auto-shutdown schedule without notifications.') +resource autoShutdownSchedule 'Microsoft.DevTestLab/schedules@2018-09-15' = { + name: 'shutdown-computevm-${vmName}' + location: location + tags: tags + properties: { + status: 'Enabled' + taskType: 'ComputeVmShutdownTask' + dailyRecurrence: { + time: shutdownSchedule.time + } + timeZoneId: shutdownSchedule.timeZoneId + targetResourceId: virtualMachine.id + notificationSettings: { + status: 'Disabled' + timeInMinutes: 30 + webhookUrl: '' + emailRecipient: '' + notificationLocale: 'en' + } + } +} + +/* + Outputs +*/ + +@description('Resource ID of the deployed virtual machine.') +output vmResourceId string = virtualMachine.id + +@description('Resource ID of the deployed network interface.') +output nicResourceId string = networkInterface.id diff --git a/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/subnet-nat-egress.bicep b/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/subnet-nat-egress.bicep new file mode 100644 index 00000000..7b5aefd4 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/bicep/modules/subnet-nat-egress.bicep @@ -0,0 +1,113 @@ +metadata name = 'Subnet NAT Egress Module' +metadata description = 'Optional module to attach a NAT Gateway to an existing subnet for outbound internet egress without VM public IPs.' + +@description('Location for NAT resources.') +param location string + +@description('Name of the existing virtual network that contains the target subnet.') +param virtualNetworkName string + +@description('Name of the existing subnet to attach the NAT gateway to.') +param subnetName string + +@description('Address prefix of the existing subnet (required for subnet update operations).') +param subnetAddressPrefix string + +@description('Name of the NAT gateway resource to create.') +param natGatewayName string + +@description('Name of the Public IP resource used by the NAT gateway.') +param publicIpName string + +@description('Existing default outbound access setting to preserve when updating the subnet.') +param existingDefaultOutboundAccess bool? + +@description('Existing network security group association to preserve when updating the subnet.') +param existingNsg object? + +@description('Existing service endpoints to preserve when updating the subnet.') +param existingServiceEndpoints array? + +@description('Existing subnet delegations to preserve when updating the subnet.') +param existingDelegations array? + +@description('Existing route table association to preserve when updating the subnet.') +param existingRouteTable object? + +@description('Tags applied to NAT resources.') +param tags object + +@description('Idle timeout for NAT gateway connections, in minutes.') +@minValue(4) +@maxValue(120) +param idleTimeoutInMinutes int = 10 + +var subnetProperties = union( + { + addressPrefix: subnetAddressPrefix + natGateway: { + id: natGateway.id + } + }, + existingDefaultOutboundAccess == null ? {} : { + defaultOutboundAccess: existingDefaultOutboundAccess + }, + existingNsg == null ? {} : { + networkSecurityGroup: existingNsg + }, + existingServiceEndpoints == null ? {} : { + serviceEndpoints: existingServiceEndpoints + }, + existingDelegations == null ? {} : { + delegations: existingDelegations + }, + existingRouteTable == null ? {} : { + routeTable: existingRouteTable + } +) + +resource natPublicIp 'Microsoft.Network/publicIPAddresses@2023-09-01' = { + name: publicIpName + location: location + tags: tags + sku: { + name: 'Standard' + tier: 'Regional' + } + properties: { + publicIPAllocationMethod: 'Static' + publicIPAddressVersion: 'IPv4' + idleTimeoutInMinutes: idleTimeoutInMinutes + deleteOption: 'Delete' + } +} + +resource natGateway 'Microsoft.Network/natGateways@2023-09-01' = { + name: natGatewayName + location: location + tags: tags + sku: { + name: 'Standard' + } + properties: { + idleTimeoutInMinutes: idleTimeoutInMinutes + publicIpAddresses: [ + { + id: natPublicIp.id + } + ] + } +} + +resource vnet 'Microsoft.Network/virtualNetworks@2023-09-01' existing = { + name: virtualNetworkName +} + +resource subnet 'Microsoft.Network/virtualNetworks/subnets@2023-09-01' = { + parent: vnet + name: subnetName + properties: subnetProperties +} + +output natGatewayResourceId string = natGateway.id +output publicIpResourceId string = natPublicIp.id diff --git a/infrastructure/setup/optional/isaac-sim-vm/bicep/types.bicep b/infrastructure/setup/optional/isaac-sim-vm/bicep/types.bicep new file mode 100644 index 00000000..8ebb4737 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/bicep/types.bicep @@ -0,0 +1,126 @@ +metadata name = 'Isaac VM Shared Types' +metadata description = 'Shared exported types and default values for Isaac Linux VM Bicep deployments.' + +/* + Shared types +*/ + +@export() +@sealed() +@description('Common tags applied to deployed resources.') +type CommonTags = { + @description('Deployment environment tag value.') + environment: string +} + +@export() +@sealed() +@description('Marketplace image configuration for the VM.') +type ImageConfig = { + @description('Marketplace image publisher.') + publisher: string + + @description('Marketplace image offer.') + offer: string + + @description('Marketplace image SKU.') + sku: string + + @description('Marketplace image version.') + version: string +} + +@export() +@sealed() +@description('Marketplace plan configuration required for paid/community images.') +type PlanConfig = { + @description('Marketplace plan publisher.') + publisher: string + + @description('Marketplace plan product.') + product: string + + @description('Marketplace plan name.') + name: string +} + +@export() +@sealed() +@description('Managed disk sizing and SKU configuration.') +type DiskConfig = { + @description('Managed disk storage SKU.') + storageAccountType: 'Premium_LRS' | 'StandardSSD_LRS' | 'Standard_LRS' + + @description('Managed disk size in GiB.') + @minValue(1) + sizeGb: int + + @description('Disk caching mode.') + caching: 'None' | 'ReadOnly' | 'ReadWrite' + + @description('Delete behavior when the VM is deleted.') + deleteOption: 'Delete' | 'Detach' +} + +@export() +@sealed() +@description('Auto-shutdown schedule configuration for cost control.') +type ShutdownSchedule = { + @description('24-hour time string for daily shutdown (for example, 1900).') + time: string + + @description('Windows time zone identifier for the shutdown schedule.') + timeZoneId: string +} + +/* + Shared defaults +*/ + +@export() +@description('Default common tags aligned with the current reference VM deployment.') +var defaultCommonTags CommonTags = { + environment: 'dev' +} + +@export() +@description('Default marketplace image configuration for Isaac Sim Linux.') +var defaultImageConfig ImageConfig = { + publisher: 'nvidia' + offer: 'isaac_sim_developer_workstation' + sku: 'isaac_sim_developer_workstation_community_linux' + version: 'latest' +} + +@export() +@description('Default marketplace plan configuration for Isaac Sim Linux.') +var defaultPlanConfig PlanConfig = { + publisher: 'nvidia' + product: 'isaac_sim_developer_workstation' + name: 'isaac_sim_developer_workstation_community_linux' +} + +@export() +@description('Default OS disk configuration aligned to reference intent.') +var defaultOsDiskConfig DiskConfig = { + storageAccountType: 'Premium_LRS' + sizeGb: 512 + caching: 'ReadWrite' + deleteOption: 'Delete' +} + +@export() +@description('Default data disk configuration aligned to reference intent.') +var defaultDataDiskConfig DiskConfig = { + storageAccountType: 'Premium_LRS' + sizeGb: 512 + caching: 'ReadWrite' + deleteOption: 'Detach' +} + +@export() +@description('Default auto-shutdown schedule aligned to a globally neutral UTC default.') +var defaultShutdownSchedule ShutdownSchedule = { + time: '1900' + timeZoneId: 'UTC' +} diff --git a/infrastructure/setup/optional/isaac-sim-vm/scripts/install-dev-deps.sh b/infrastructure/setup/optional/isaac-sim-vm/scripts/install-dev-deps.sh new file mode 100755 index 00000000..1595b5f1 --- /dev/null +++ b/infrastructure/setup/optional/isaac-sim-vm/scripts/install-dev-deps.sh @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +set -euo pipefail + +export DEBIAN_FRONTEND=noninteractive +# Azure CustomScript can run without HOME set; ensure global tools (git, uv) work. +export HOME="${HOME:-/root}" +ADMIN_USER="${1:-azureuser}" + +if ! id "$ADMIN_USER" >/dev/null 2>&1; then + echo "Admin user does not exist: $ADMIN_USER" >&2 + exit 1 +fi + +ADMIN_USER_HOME="$(getent passwd "$ADMIN_USER" | cut -d: -f6)" + +if [[ -z "$ADMIN_USER_HOME" || ! -d "$ADMIN_USER_HOME" ]]; then + echo "Admin user home directory does not exist: $ADMIN_USER_HOME" >&2 + exit 1 +fi + +configure_admin_git() { + sudo -H -u "$ADMIN_USER" env HOME="$ADMIN_USER_HOME" bash -lc \ + 'cd "$HOME" && git config --global core.editor "code-insiders --wait"' +} + +install_admin_azure_cli_extension() { + local extension_name="$1" + + sudo -H -u "$ADMIN_USER" env HOME="$ADMIN_USER_HOME" bash -lc \ + "az extension add --name '${extension_name}' --yes" +} + +wait_for_apt_locks() { + local timeout_seconds=900 + local wait_interval=5 + local waited=0 + + while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1 || \ + sudo fuser /var/lib/dpkg/lock >/dev/null 2>&1 || \ + sudo fuser /var/lib/apt/lists/lock >/dev/null 2>&1 || \ + sudo fuser /var/cache/apt/archives/lock >/dev/null 2>&1; do + if [ "$waited" -ge "$timeout_seconds" ]; then + echo "Timed out waiting for apt/dpkg locks after ${timeout_seconds}s" >&2 + return 1 + fi + echo "apt/dpkg lock held, sleeping ${wait_interval}s... (${waited}s elapsed)" + sleep "$wait_interval" + waited=$((waited + wait_interval)) + done +} + +apt_get() { + wait_for_apt_locks + sudo apt-get -o DPkg::Lock::Timeout=600 "$@" +} + +dpkg_install() { + wait_for_apt_locks + sudo dpkg -i "$1" +} + +repair_dpkg_state() { + local running_kernel + local pkg + local -a broken_linux_pkgs=() + + wait_for_apt_locks + + # First attempt a normal repair path. + if sudo dpkg --configure -a; then + apt_get -f install -y || true + return + fi + + # If dpkg is blocked by failed future kernel/header config scripts (dkms), + # purge only the broken kernel/header packages not matching the running kernel. + running_kernel="$(uname -r)" + while IFS= read -r pkg; do + if [ -n "$pkg" ] && [[ "$pkg" != *"$running_kernel"* ]]; then + broken_linux_pkgs+=("$pkg") + fi + done < <(sudo dpkg --audit | grep -oE 'linux-(image|headers)-[0-9][^ ,]+' | sort -u) + + if [ "${#broken_linux_pkgs[@]}" -gt 0 ]; then + echo "Purging broken kernel/header packages: ${broken_linux_pkgs[*]}" + apt_get remove --purge -y "${broken_linux_pkgs[@]}" || true + fi + + sudo dpkg --configure -a || true + apt_get -f install -y || true +} + +prevent_kernel_upgrades_during_provisioning() { + # Avoid kernel/header transitions during provisioning, which can trigger dkms + # rebuild failures and leave apt in an error state. + # + # 1) Put installed meta-packages on hold. + # 2) Add apt pinning so unattended-upgrades and any later apt invocations + # in this VM won't pull Azure kernel transitions. + sudo apt-mark hold \ + linux-azure \ + linux-image-azure \ + linux-headers-azure \ + linux-tools-azure \ + linux-cloud-tools-azure || true + + sudo tee /etc/apt/preferences.d/99-hold-azure-kernel.pref >/dev/null <<'EOF' +Package: linux-azure +Pin: release * +Pin-Priority: -1 + +Package: linux-image-azure +Pin: release * +Pin-Priority: -1 + +Package: linux-headers-azure +Pin: release * +Pin-Priority: -1 + +Package: linux-tools-azure +Pin: release * +Pin-Priority: -1 + +Package: linux-cloud-tools-azure +Pin: release * +Pin-Priority: -1 +EOF +} + +# Prevent kernel transitions as early as possible. +prevent_kernel_upgrades_during_provisioning + +apt_get update +repair_dpkg_state + +## Install Node.js 22 LTS +sudo install -d -m 0755 /etc/apt/keyrings +curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/nodesource.gpg +sudo chmod go+r /etc/apt/keyrings/nodesource.gpg +sudo tee /etc/apt/sources.list.d/nodesource.sources >/dev/null <<'EOF' +Types: deb +URIs: https://deb.nodesource.com/node_22.x +Suites: nodistro +Components: main +Architectures: amd64 +Signed-By: /etc/apt/keyrings/nodesource.gpg +EOF +apt_get update +apt_get install -y --no-install-recommends nodejs + +curl -LsSf https://astral.sh/uv/install.sh | sudo env UV_INSTALL_DIR="/usr/local/bin" sh +if ! command -v uv >/dev/null 2>&1; then + echo "uv installation failed or is not on PATH" >&2 + exit 1 +fi +sudo install -d -m 0755 /etc/apt/keyrings +curl -fsSL https://packages.microsoft.com/keys/microsoft.asc | sudo gpg --dearmor --batch --yes -o /etc/apt/keyrings/microsoft.gpg +sudo chmod go+r /etc/apt/keyrings/microsoft.gpg +sudo tee /etc/apt/sources.list.d/azure-cli.sources >/dev/null < microsoft.gpg && +sudo install -D -o root -g root -m 644 microsoft.gpg /usr/share/keyrings/microsoft.gpg && +rm -f microsoft.gpg + +sudo tee /etc/apt/sources.list.d/vscode.sources > /dev/null </dev/null 2>&1; then + wget -O "${THINLINC_ZIP}" "${THINLINC_DOWNLOAD_URL}" + elif command -v curl >/dev/null 2>&1; then + curl -fL "${THINLINC_DOWNLOAD_URL}" -o "${THINLINC_ZIP}" + else + echo "Neither wget nor curl is available to download ${THINLINC_DOWNLOAD_URL}" + exit 1 + fi + else + echo "ThinLinc server ZIP not found: ${THINLINC_ZIP}" + echo "Pass the ZIP path as first argument, e.g.:" + echo " ./install-tl-server.sh /path/to/tl-4.20.0-server.zip" + exit 1 + fi +fi + +set_answer_if_present() { + local key="$1" + local value="$2" + + if grep -Eq "^[[:space:]]*#?[[:space:]]*${key}[[:space:]]*=" "${ANSWERS_FILE}"; then + sed -Ei "s|^[[:space:]]*#?[[:space:]]*${key}[[:space:]]*=.*$|${key}=${value}|" "${ANSWERS_FILE}" + echo "Applied: ${key} = ${value}" + return 0 + fi + + return 1 +} + +set_first_matching_key() { + local value="$1" + shift + + local key="" + for key in "$@"; do + if set_answer_if_present "${key}" "${value}"; then + return 0 + fi + done + + return 1 +} + +resolve_agent_hostname() { + local source="${TL_AGENT_HOSTNAME_SOURCE:-primary-ip}" + local resolved="" + + if [[ -n "${TL_AGENT_HOSTNAME_CMD:-}" ]]; then + resolved="$(bash -lc "${TL_AGENT_HOSTNAME_CMD}" 2>/dev/null | head -n 1 | tr -d '\r')" + if [[ -n "${resolved}" ]]; then + echo "${resolved}" + return 0 + fi + fi + + case "${source}" in + auto|fqdn) + resolved="$(hostname -f 2>/dev/null || true)" + if [[ -n "${resolved}" ]]; then + echo "${resolved}" + return 0 + fi + ;; + esac + + case "${source}" in + auto|primary-ip) + resolved="$(ip -4 route get 1.1.1.1 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="src") {print $(i+1); exit}}')" + if [[ -n "${resolved}" ]]; then + echo "${resolved}" + return 0 + fi + ;; + esac + + if [[ "${source}" == "public-ip" || "${source}" == "auto" ]]; then + for service in \ + "https://api.ipify.org" \ + "https://ifconfig.me/ip" \ + "https://icanhazip.com"; do + resolved="$(curl -fsS --max-time 5 "${service}" 2>/dev/null | head -n 1 | tr -d '\r')" + if [[ -n "${resolved}" ]]; then + echo "${resolved}" + return 0 + fi + done + fi + + return 1 +} + +unzip -q "${THINLINC_ZIP}" -d "${WORK_DIR}" +SERVER_DIR="$(find "${WORK_DIR}" -maxdepth 1 -type d -name 'tl-*-server' | head -n 1)" + +if [[ -z "${SERVER_DIR}" ]]; then + echo "Could not locate extracted ThinLinc server directory in ${WORK_DIR}" + exit 1 +fi + +sudo DEBIAN_FRONTEND=noninteractive apt-get update +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y "${SERVER_DIR}"/packages/*.deb + +sudo /opt/thinlinc/sbin/tl-setup -g "${ANSWERS_FILE}" + +# Required for silent mode: accept license and provide base behavior. +set_first_matching_key "yes" "accept-eula" >/dev/null || true +set_first_matching_key "master" "server-type" >/dev/null || true +set_first_matching_key "parameters" "migrate-conf" >/dev/null || true +set_first_matching_key "yes" "install-required-libs" >/dev/null || true +set_first_matching_key "yes" "install-nfs" >/dev/null || true +set_first_matching_key "yes" "install-sshd" >/dev/null || true +set_first_matching_key "no" "install-gtk" >/dev/null || true +set_first_matching_key "no" "install-python-ldap" >/dev/null || true +set_first_matching_key "ip" "agent-hostname-choice" >/dev/null || true +set_first_matching_key "nono@example.com" "email-address" >/dev/null || true +set_first_matching_key "no" "setup-thinlocal" >/dev/null || true +set_first_matching_key "no" "setup-nearest" >/dev/null || true +set_first_matching_key "no" "setup-firewall-ssh" >/dev/null || true +set_first_matching_key "no" "setup-firewall-tlwebaccess" >/dev/null || true +set_first_matching_key "no" "setup-firewall-tlwebadm" >/dev/null || true +set_first_matching_key "no" "setup-firewall-tlmaster" >/dev/null || true +set_first_matching_key "no" "setup-firewall-tlagent" >/dev/null || true +set_first_matching_key "no" "setup-selinux" >/dev/null || true +set_first_matching_key "no" "setup-apparmor" >/dev/null || true +set_first_matching_key "${TL_WEBADM_PASSWORD_HASH:-}" "tlwebadm-password" >/dev/null || true +set_first_matching_key "abort" "missing-answer" >/dev/null || true + +echo "Discovered answer keys in ${ANSWERS_FILE}:" +grep -E "^[[:space:]]*[^#%\[][^=]*=" "${ANSWERS_FILE}" || true + +if [[ -n "${TL_MASTER_HOSTNAME:-}" ]]; then + set_first_matching_key "${TL_MASTER_HOSTNAME}" \ + "/vsmagent/master_hostname" \ + "vsmagent/master_hostname" \ + "master_hostname" || echo "Warning: could not find a master hostname key in ${ANSWERS_FILE}" +fi + +if [[ "${TL_AGENT_HOSTNAME:-}" == "auto" ]]; then + if TL_AGENT_HOSTNAME="$(resolve_agent_hostname)"; then + echo "Resolved TL_AGENT_HOSTNAME=${TL_AGENT_HOSTNAME}" + else + echo "Warning: TL_AGENT_HOSTNAME=auto but no value could be resolved" + TL_AGENT_HOSTNAME="" + fi +fi + +if [[ -n "${TL_AGENT_HOSTNAME:-}" ]]; then + set_first_matching_key "${TL_AGENT_HOSTNAME}" \ + "manual-agent-hostname" \ + "/vsmagent/agent_hostname" \ + "vsmagent/agent_hostname" \ + "agent_hostname" || echo "Warning: could not find an agent hostname/IP key in ${ANSWERS_FILE}" +fi + +if [[ -n "${TL_WEBACCESS_LOGIN_PAGE:-}" ]]; then + set_first_matching_key "${TL_WEBACCESS_LOGIN_PAGE}" \ + "/webaccess/login_page" \ + "webaccess/login_page" \ + "login_page" || echo "Warning: could not find a web access login page key in ${ANSWERS_FILE}" +fi + +if [[ -n "${TL_ADMIN_EMAIL:-}" ]]; then + set_first_matching_key "${TL_ADMIN_EMAIL}" \ + "email-address" \ + "/vsmserver/admin_email" \ + "vsmserver/admin_email" \ + "admin_email" || echo "Warning: could not find an admin email key in ${ANSWERS_FILE}" +fi + +sudo /opt/thinlinc/sbin/tl-setup -a "${ANSWERS_FILE}" + +echo "ThinLinc silent setup complete." +echo "Service status:" +sudo systemctl --no-pager --full status vsmserver vsmagent tlwebaccess || true diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 48a4a76f..cd674939 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -79,9 +79,11 @@ module "platform" { // Networking configuration should_enable_nat_gateway = var.should_enable_nat_gateway + should_create_vm_subnet = var.should_create_vm_subnet virtual_network_config = { address_space = var.virtual_network_config.address_space subnet_address_prefix_main = var.virtual_network_config.subnet_address_prefix + subnet_address_prefix_vm = var.virtual_network_config.subnet_address_prefix_vm subnet_address_prefix_pe = var.virtual_network_config.subnet_address_prefix_pe subnet_address_prefix_resolver = var.virtual_network_config.subnet_address_prefix_resolver } diff --git a/infrastructure/terraform/modules/platform/networking.tf b/infrastructure/terraform/modules/platform/networking.tf index 41250fba..ccc47c18 100644 --- a/infrastructure/terraform/modules/platform/networking.tf +++ b/infrastructure/terraform/modules/platform/networking.tf @@ -34,6 +34,16 @@ resource "azurerm_subnet" "main" { default_outbound_access_enabled = !var.should_enable_nat_gateway } +resource "azurerm_subnet" "vm_subnet" { + count = var.should_create_vm_subnet ? 1 : 0 + + name = "snet-isaaclab-vm-${local.resource_name_suffix}" + resource_group_name = var.resource_group.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = [var.virtual_network_config.subnet_address_prefix_vm] + default_outbound_access_enabled = !var.should_enable_nat_gateway +} + // Private Endpoints Subnet (conditional - only created when private endpoints are enabled) resource "azurerm_subnet" "private_endpoints" { count = local.pe_enabled ? 1 : 0 @@ -51,6 +61,13 @@ resource "azurerm_subnet_network_security_group_association" "main" { network_security_group_id = azurerm_network_security_group.main.id } +resource "azurerm_subnet_network_security_group_association" "vm_subnet" { + count = var.should_create_vm_subnet ? 1 : 0 + + subnet_id = azurerm_subnet.vm_subnet[0].id + network_security_group_id = azurerm_network_security_group.main.id +} + resource "azurerm_subnet_network_security_group_association" "private_endpoints" { count = local.pe_enabled ? 1 : 0 @@ -102,6 +119,13 @@ resource "azurerm_subnet_nat_gateway_association" "main" { nat_gateway_id = azurerm_nat_gateway.main[0].id } +resource "azurerm_subnet_nat_gateway_association" "vm_subnet" { + count = var.should_create_vm_subnet && var.should_enable_nat_gateway ? 1 : 0 + + subnet_id = azurerm_subnet.vm_subnet[0].id + nat_gateway_id = azurerm_nat_gateway.main[0].id +} + // ============================================================ // DNS Private Resolver // ============================================================ diff --git a/infrastructure/terraform/modules/platform/outputs.tf b/infrastructure/terraform/modules/platform/outputs.tf index ee35fc2e..0b82724f 100644 --- a/infrastructure/terraform/modules/platform/outputs.tf +++ b/infrastructure/terraform/modules/platform/outputs.tf @@ -24,6 +24,10 @@ output "subnets" { id = azurerm_subnet.main.id name = azurerm_subnet.main.name } + vm_subnet = try({ + id = azurerm_subnet.vm_subnet[0].id + name = azurerm_subnet.vm_subnet[0].name + }, null) private_endpoints = try({ id = azurerm_subnet.private_endpoints[0].id name = azurerm_subnet.private_endpoints[0].name diff --git a/infrastructure/terraform/modules/platform/variables.tf b/infrastructure/terraform/modules/platform/variables.tf index 91b762c5..533abb3d 100644 --- a/infrastructure/terraform/modules/platform/variables.tf +++ b/infrastructure/terraform/modules/platform/variables.tf @@ -25,10 +25,17 @@ variable "should_enable_nat_gateway" { default = true } +variable "should_create_vm_subnet" { + type = bool + description = "Whether to create a dedicated subnet for virtual machines in the platform virtual network" + default = false +} + variable "virtual_network_config" { type = object({ address_space = string subnet_address_prefix_main = string + subnet_address_prefix_vm = optional(string) subnet_address_prefix_pe = optional(string) subnet_address_prefix_resolver = optional(string) }) @@ -36,6 +43,7 @@ variable "virtual_network_config" { default = { address_space = "10.0.0.0/16" subnet_address_prefix_main = "10.0.1.0/24" + subnet_address_prefix_vm = "10.0.4.0/24" subnet_address_prefix_pe = "10.0.2.0/24" subnet_address_prefix_resolver = "10.0.9.0/28" } diff --git a/infrastructure/terraform/outputs.tf b/infrastructure/terraform/outputs.tf index 1800d7a1..7378a15f 100644 --- a/infrastructure/terraform/outputs.tf +++ b/infrastructure/terraform/outputs.tf @@ -106,6 +106,16 @@ output "subnets" { value = module.platform.subnets } +output "vm_subnet" { + description = "Dedicated VM subnet. Null when should_create_vm_subnet is false." + value = module.platform.subnets.vm_subnet +} + +output "network_security_group" { + description = "Shared network security group for robotics infrastructure." + value = module.platform.network_security_group +} + // ============================================================ // DNS Private Resolver Outputs // ============================================================ diff --git a/infrastructure/terraform/terraform.tfvars.example b/infrastructure/terraform/terraform.tfvars.example index f59a074a..d99a8b7a 100644 --- a/infrastructure/terraform/terraform.tfvars.example +++ b/infrastructure/terraform/terraform.tfvars.example @@ -140,3 +140,7 @@ reports_archive_tier_days = 180 // DNS Zones // should_include_aks_dns_zone = true + +// VM Subnet +// If you plan to add a Virtual Machine with Isaac Sim we need to enable the creating of the subnet +// should_create_vm_subnet = true diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index b04f7d47..c4ae3677 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -275,10 +275,17 @@ variable "should_enable_nat_gateway" { default = true } +variable "should_create_vm_subnet" { + type = bool + description = "Whether to create a dedicated subnet for virtual machines in the platform virtual network" + default = false +} + variable "virtual_network_config" { type = object({ address_space = string subnet_address_prefix = string + subnet_address_prefix_vm = optional(string, "10.0.4.0/24") subnet_address_prefix_pe = optional(string, "10.0.2.0/24") subnet_address_prefix_resolver = optional(string, "10.0.9.0/28") }) @@ -286,12 +293,17 @@ variable "virtual_network_config" { default = { address_space = "10.0.0.0/16" subnet_address_prefix = "10.0.1.0/24" + subnet_address_prefix_vm = "10.0.4.0/24" subnet_address_prefix_pe = "10.0.2.0/24" subnet_address_prefix_resolver = "10.0.9.0/28" } validation { - condition = can(cidrhost(var.virtual_network_config.address_space, 0)) && can(cidrhost(var.virtual_network_config.subnet_address_prefix, 0)) - error_message = "Both address_space and subnet_address_prefix must be valid CIDR blocks." + condition = ( + can(cidrhost(var.virtual_network_config.address_space, 0)) && + can(cidrhost(var.virtual_network_config.subnet_address_prefix, 0)) && + can(cidrhost(var.virtual_network_config.subnet_address_prefix_vm, 0)) + ) + error_message = "address_space, subnet_address_prefix, and subnet_address_prefix_vm must be valid CIDR blocks." } }