Skip to content

Commit

Permalink
feat!: enable horizontal autoscaling of Liquid Legions v2 Mill
Browse files Browse the repository at this point in the history
  • Loading branch information
SanjayVas committed Jul 26, 2024
1 parent 6286147 commit 6239034
Show file tree
Hide file tree
Showing 91 changed files with 1,791 additions and 738 deletions.
17 changes: 11 additions & 6 deletions src/main/docker/images.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ COMMON_IMAGES = [
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/job:computations_cleaner_image",
repository = _PREFIX + "/duchy/computations-cleaner",
),
struct(
name = "duchy_mill_job_scheduler_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/daemon/mill:job_scheduler_image",
repository = _PREFIX + "/duchy/mill-job-scheduler",
),
struct(
name = "kingdom_data_server_image",
image = "//src/main/kotlin/org/wfanet/measurement/kingdom/deploy/gcloud/server:gcp_kingdom_data_server_image",
Expand Down Expand Up @@ -127,8 +132,8 @@ GKE_IMAGES = [
repository = _PREFIX + "/duchy/requisition-fulfillment",
),
struct(
name = "duchy_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/gcloud/daemon/mill/liquidlegionsv2:gcs_liquid_legions_v2_mill_daemon_image",
name = "duchy_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/gcloud/job/mill/liquidlegionsv2:gcs_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/liquid-legions-v2-mill",
),
struct(
Expand Down Expand Up @@ -172,8 +177,8 @@ EKS_IMAGES = [
repository = _PREFIX + "/duchy/aws-requisition-fulfillment",
),
struct(
name = "duchy_s3_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/aws/daemon/mill/liquidlegionsv2:s3_liquid_legions_v2_mill_daemon_image",
name = "duchy_s3_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/aws/job/mill/liquidlegionsv2:s3_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/aws-liquid-legions-v2-mill",
),
struct(
Expand All @@ -196,8 +201,8 @@ LOCAL_IMAGES = [
repository = _PREFIX + "/duchy/local-herald",
),
struct(
name = "forwarded_storage_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/daemon/mill/liquidlegionsv2:forwarded_storage_liquid_legions_v2_mill_daemon_image",
name = "forwarded_storage_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/job/mill/liquidlegionsv2:forwarded_storage_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/local-liquid-legions-v2-mill",
),
struct(
Expand Down
98 changes: 86 additions & 12 deletions src/main/k8s/base.cue
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,37 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
metadata: #ObjectMeta
}

// K8s Role.
#Role: {
apiVersion: "rbac.authorization.k8s.io/v1"
kind: "Role"
metadata: #ObjectMeta
rules: [...{
apiGroups?: [...string]
resources?: [...string]
verbs: [...string]
resourceNames?: [...string]
}]
}

// K8s RoleBinding.
#RoleBinding: {
apiVersion: "rbac.authorization.k8s.io/v1"
kind: "RoleBinding"
metadata: #ObjectMeta
roleRef: {
apiGroup: string
kind: string
name: string
}
subjects: [...{
kind: string
name: string
apiGroup?: string
namespace?: string
}]
}

#ResourceQuantity: {
cpu?: string
memory?: string
Expand Down Expand Up @@ -400,6 +431,49 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
tolerations: [ for _, toleration in _tolerations {toleration}]
}

// K8s PodTemplateSpec.
#PodTemplateSpec: {
metadata: #ObjectMeta & {
annotations: {
"instrumentation.opentelemetry.io/inject-java": string | *"true"
}
}
spec: #PodSpec
}

// K8s PodTemplate.
#PodTemplate: {
let Name = metadata.name

_secretName?: string
_container: #Container & {
_javaOptions: {
heapDumpOnOutOfMemory: true
heapDumpPath: "/run/heap-dumps"
}
}

apiVersion: "v1"
kind: "PodTemplate"
metadata: #ObjectMeta
template: #PodTemplateSpec & {
metadata: labels: {
app: "\(Name)-app"
}
spec: {
_mounts: {
if _secretName != _|_ {
"\(Name)-files": {
volume: secret: secretName: _secretName
}
}
"heap-dumps": volume: emptyDir: {}
}
_containers: "\(Name)-container": _container
}
}
}

// K8s Pod.
#Pod: {
apiVersion: "v1"
Expand Down Expand Up @@ -491,16 +565,13 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
selector: #LabelSelector & {
matchLabels: app: _name + "-app"
}
template: {
template: #PodTemplateSpec & {
metadata: {
labels: {
app: _name + "-app"
}
annotations: {
"instrumentation.opentelemetry.io/inject-java": string | *"true"
}
}
spec: #PodSpec & {
spec: {
_mounts: {
if _secretName != _|_ {
"\(_name)-files": {
Expand Down Expand Up @@ -542,20 +613,23 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
name: _name + "-cronjob"
}
spec: {
schedule: string
schedule: string
concurrencyPolicy?: "Allow" | "Forbid" | "Replace"
startingDeadlineSeconds?: int64
suspend?: bool
successfulJobsHistoryLimit?: int32 & >0
failedJobsHistoryLimit?: int32 & >0

jobTemplate: {
spec: {
backoffLimit: uint | *0
template: {
metadata: #ObjectMeta & {
template: #PodTemplateSpec & {
metadata: {
labels: {
app: _name + "-app"
}
annotations: {
"instrumentation.opentelemetry.io/inject-java": _ | *"true"
}
}
spec: #PodSpec & {
spec: {
if _secretName != _|_ {
_mounts: "\(_name)-files": {
volume: secret: secretName: _secretName
Expand Down
22 changes: 13 additions & 9 deletions src/main/k8s/dev/base_gke.cue
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ package k8s

#NetworkPolicy: {
_egresses: {
// See https://cloud.google.com/kubernetes-engine/docs/how-to/network-policy#network-policy-and-workload-identity
gkeMetadataServer: {
to: [{ipBlock: cidr: "169.254.169.252/32"}]
ports: [
{
protocol: "TCP"
port: 988
},
{
protocol: "TCP"
port: 80
}]
ports: [{
protocol: "TCP"
port: 988
}]
}
gkeDataplaneV2: {
to: [{ipBlock: cidr: "169.254.169.254/32"}]
ports: [{
protocol: "TCP"
port: 80
}]
}

openTelemetryCollector: {
to: [{podSelector: matchLabels: app: "opentelemetry-collector-app"}]
ports: [{
Expand Down
50 changes: 24 additions & 26 deletions src/main/k8s/dev/duchy_eks.cue
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ _duchyCertName: "duchies/\(_duchyName)/certificates/\(_certificateId)"
}
}
#Llv2MillMaxHeapSize: "1G"
#Llv2MillReplicas: 1
#Llv2MillMaxConcurrency: 10
#HmssMillResourceRequirements: ResourceRequirements=#ResourceRequirements & {
requests: {
cpu: "2"
Expand Down Expand Up @@ -89,19 +89,13 @@ _duchyCertName: "duchies/\(_duchyName)/certificates/\(_certificateId)"
}
#ControlServiceMaxHeapSize: "320M"

objectSets: [
default_deny_ingress_and_egress,
duchy.deployments,
duchy.services,
duchy.networkPolicies,
duchy.cronjobs,
]
objectSets: [default_deny_ingress_and_egress] + [ for objectSet in duchy {objectSet}]

duchy: #PostgresDuchy & {
_imageSuffixes: {
"herald-daemon": "duchy/aws-herald"
"computation-control-server": "duchy/aws-computation-control"
"liquid-legions-v2-mill-daemon": "duchy/aws-liquid-legions-v2-mill"
"llv2-mill": "duchy/aws-liquid-legions-v2-mill"
"hmss-mill-daemon": "duchy/aws-honest-majority-share-shuffle-mill"
"requisition-fulfillment-server": "duchy/aws-requisition-fulfillment"
"internal-api-server": "duchy/aws-postgres-internal-server"
Expand All @@ -119,11 +113,14 @@ duchy: #PostgresDuchy & {
"worker1": _worker1SystemApiTarget
"worker2": _worker2SystemApiTarget
}
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: #AwsS3Config.flags
_verbose_grpc_logging: "false"
_postgresConfig: #AwsPostgresConfig
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: #AwsS3Config.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_liquidLegionsV2WorkLockDuration: "10m"
_postgresConfig: #AwsPostgresConfig

services: {
"requisition-fulfillment-server": _eipAllocations: _publicApiEipAllocs
"computation-control-server": _eipAllocations: _systemApiEipAllocs
Expand All @@ -138,18 +135,8 @@ duchy: #PostgresDuchy & {
serviceAccountName: #StorageServiceAccount
}
}
"liquid-legions-v2-mill-daemon-deployment": {
_workLockDuration: "10m"
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
spec: {
replicas: #Llv2MillReplicas
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
"mill-job-scheduler-deployment": {
_liquidLegionsV2MaxConcurrency: #Llv2MillMaxConcurrency
}
"hmss-mill-daemon-deployment": {
_workLockDuration: "5m"
Expand Down Expand Up @@ -191,4 +178,15 @@ duchy: #PostgresDuchy & {
}
}
}
podTemplates: {
"llv2-mill": {
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
}
}
54 changes: 24 additions & 30 deletions src/main/k8s/dev/duchy_gke.cue
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ _duchy_cert_name: "duchies/\(_duchy_name)/certificates/\(_certificateId)"
}
}
#Llv2MillMaxHeapSize: "1G"
#Llv2MillReplicas: 1
#Llv2MillMaxConcurrency: 10
#HmssMillResourceRequirements: ResourceRequirements=#ResourceRequirements & {
requests: {
cpu: "2"
Expand Down Expand Up @@ -90,15 +90,7 @@ _duchy_cert_name: "duchies/\(_duchy_name)/certificates/\(_certificateId)"
}
#ControlServiceMaxHeapSize: "320M"

objectSets: [
default_deny_ingress_and_egress,
duchy.serviceAccounts,
duchy.configMaps,
duchy.deployments,
duchy.services,
duchy.networkPolicies,
duchy.cronjobs,
]
objectSets: [default_deny_ingress_and_egress] + [ for objectSet in duchy {objectSet}]

_cloudStorageConfig: #CloudStorageConfig & {
bucket: _cloudStorageBucket
Expand All @@ -119,18 +111,18 @@ duchy: #SpannerDuchy & {
"worker1": _worker1SystemApiTarget
"worker2": _worker2SystemApiTarget
}
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: _cloudStorageConfig.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: _cloudStorageConfig.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_liquidLegionsV2WorkLockDuration: "10m"

serviceAccounts: [string]: #WorkloadIdentityServiceAccount
serviceAccounts: {
"\(#InternalServerServiceAccount)": {
"\(#InternalServerServiceAccount)": #WorkloadIdentityServiceAccount & {
_iamServiceAccountName: "\(_duchy_name)-duchy-internal"
}
"\(#StorageServiceAccount)": {
"\(#StorageServiceAccount)": #WorkloadIdentityServiceAccount & {
_iamServiceAccountName: "\(_duchy_name)-duchy-storage"
}
}
Expand All @@ -155,18 +147,8 @@ duchy: #SpannerDuchy & {
serviceAccountName: #StorageServiceAccount
}
}
"liquid-legions-v2-mill-daemon-deployment": {
_workLockDuration: "10m"
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
spec: {
replicas: #Llv2MillReplicas
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
"mill-job-scheduler-deployment": {
_liquidLegionsV2MaxConcurrency: #Llv2MillMaxConcurrency
}
"hmss-mill-daemon-deployment": {
_workLockDuration: "5m"
Expand Down Expand Up @@ -205,4 +187,16 @@ duchy: #SpannerDuchy & {
"requisition-fulfillment-server": _ipAddressName: _publicApiAddressName
"computation-control-server": _ipAddressName: _systemApiAddressName
}

podTemplates: {
"llv2-mill": {
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
}
}
Loading

0 comments on commit 6239034

Please sign in to comment.