Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/pr-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,17 @@ jobs:
permissions:
contents: read

# Terraform security scanning via Checkov (soft-fail until matrix is fully addressed)
terraform-security:
name: Terraform Security
uses: ./.github/workflows/terraform-security.yml
with:
soft-fail: true
working-directory: infrastructure/terraform
permissions:
contents: read
security-events: write

# Terraform test execution with Codecov Test Analytics
terraform-tests:
name: Terraform Tests
Expand Down
57 changes: 57 additions & 0 deletions .github/workflows/terraform-security.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Terraform Security Scan
Comment thread
nguyena2 marked this conversation as resolved.

on:
workflow_call:
inputs:
working-directory:
description: Directory passed to checkov via -d
required: false
type: string
default: infrastructure/terraform
soft-fail:
description: Whether to continue on Checkov violations
required: false
type: boolean
default: true

permissions:
contents: read

jobs:
checkov:
name: Checkov
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false

- name: Run Checkov
id: checkov
Comment thread
nguyena2 marked this conversation as resolved.
uses: bridgecrewio/checkov-action@99bb2caf247dfd9f03cf984373bc6043d4e32ebf # v12.1347.0
continue-on-error: ${{ inputs.soft-fail }}
with:
directory: ${{ inputs.working-directory }}
framework: terraform
output_format: sarif
soft_fail: ${{ inputs.soft-fail }}
download_external_modules: false

- name: Upload SARIF to GitHub code scanning
if: always()
uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
with:
sarif_file: results.sarif
category: checkov

- name: Upload Checkov SARIF artifact
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: checkov-sarif
path: results.sarif
retention-days: 30
5 changes: 5 additions & 0 deletions infrastructure/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ terraform init && terraform plan -var-file=terraform.tfvars
terraform apply -var-file=terraform.tfvars
```

> [!IMPORTANT]
> The conversion-pipeline module (`should_deploy_conversion_pipeline = true`) uses the `microsoft/fabric` provider, which authenticates via Azure CLI. Run `az login` before `terraform plan` / `apply`. The signed-in identity must be in a security group allow-listed under the Fabric tenant admin setting "Service principals can use Fabric APIs" (or the equivalent user/CLI-context allow-list).
>
> The conversion pipeline writes to the platform-owned data-lake account (`stdl...`); set `should_create_data_lake_storage = true` whenever `should_deploy_conversion_pipeline = true`.

### 3. Connect to the cluster

```bash
Expand Down
18 changes: 18 additions & 0 deletions infrastructure/examples/terraform.tfvars.dev
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,21 @@ should_enable_microsoft_defender = false
// should_deploy_monitor_workspace = true
// should_deploy_ampls = false
// should_deploy_dce = false

// =============================================================================
// Conversion Pipeline (Optional)
// =============================================================================
// Cost-optimized for dev: F2 capacity. The conversion pipeline reuses the
// platform stdl... data-lake account, so should_create_data_lake_storage must
// be true whenever should_deploy_conversion_pipeline is true (enforced by a
// root-level precondition).
// =============================================================================

should_deploy_conversion_pipeline = false
should_create_data_lake_storage = true
conversion_pipeline_config = {
should_create_fabric_capacity = true
should_create_fabric_workspace = true
fabric_capacity_sku = "F2"
should_enable_event_grid_dead_letter = true
}
18 changes: 18 additions & 0 deletions infrastructure/examples/terraform.tfvars.prod
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,21 @@ should_enable_purge_protection = true
// should_deploy_monitor_workspace = true
// should_deploy_ampls = true
// should_deploy_dce = true

// =============================================================================
// Conversion Pipeline (Optional)
// =============================================================================
// Production posture: F32 capacity. The conversion pipeline reuses the
// platform stdl... data-lake account, so should_create_data_lake_storage must
// be true whenever should_deploy_conversion_pipeline is true (enforced by a
// root-level precondition).
// =============================================================================

should_deploy_conversion_pipeline = false
should_create_data_lake_storage = true
conversion_pipeline_config = {
should_create_fabric_capacity = true
should_create_fabric_workspace = true
fabric_capacity_sku = "F32"
should_enable_event_grid_dead_letter = true
}
78 changes: 78 additions & 0 deletions infrastructure/examples/terraform.tfvars.staging
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// =============================================================================
// Staging Environment
// =============================================================================
// Production-shaped (private networking, HA backend) but right-sized for
// pre-production validation. Smaller GPU footprint and F8 Fabric capacity.
// =============================================================================

// Core Configuration
environment = "staging"
location = "westus3"
resource_prefix = "roboticsstg"
instance = "001"

// Resource Group
should_create_resource_group = true

// AKS System Node Pool
system_node_pool_vm_size = "Standard_D8ds_v5"
system_node_pool_node_count = 2

// Single GPU pool with dedicated subnet
node_pools = {
rtxprogpu = {
vm_size = "Standard_NC128ds_xl_RTXPRO6000BSE_v6"
subnet_address_prefixes = ["10.0.7.0/24"]
node_taints = ["nvidia.com/gpu:NoSchedule"]
gpu_driver = "None"
node_labels = {
"nvidia.com/gpu.deploy.driver" = "false"
}
priority = "Regular"
should_enable_auto_scaling = true
min_count = 1
max_count = 2
zones = []
}
}

// OSMO Backend Services with HA
should_deploy_postgresql = true
should_deploy_redis = true

// PostgreSQL HA
postgresql_sku_name = "GP_Standard_D2s_v3"
postgresql_high_availability = {
should_enable = true
standby_availability_zone = "2"
Comment thread
nguyena2 marked this conversation as resolved.
}

// Redis HA
should_enable_redis_high_availability = true

// Network Security — Full Private
should_enable_private_endpoint = true
should_enable_private_aks_cluster = true

should_enable_public_network_access = false
should_add_current_user_key_vault_admin = true
should_enable_microsoft_defender = true
should_enable_purge_protection = false

// =============================================================================
// Conversion Pipeline (Optional)
// =============================================================================
// Pre-production posture: F8 capacity. The conversion pipeline reuses the
// platform stdl... data-lake account, so should_create_data_lake_storage must
// be true whenever should_deploy_conversion_pipeline is true (enforced by a
// root-level precondition).
// =============================================================================

should_deploy_conversion_pipeline = false
should_create_data_lake_storage = true
conversion_pipeline_config = {
should_create_fabric_capacity = true
should_create_fabric_workspace = true
fabric_capacity_sku = "F8"
should_enable_event_grid_dead_letter = true
}
Loading
Loading