Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: enable horizontal autoscaling of Liquid Legions v2 Mill #1697

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/main/docker/images.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ COMMON_IMAGES = [
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/job:computations_cleaner_image",
repository = _PREFIX + "/duchy/computations-cleaner",
),
struct(
name = "duchy_mill_job_scheduler_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/daemon/mill:job_scheduler_image",
repository = _PREFIX + "/duchy/mill-job-scheduler",
),
struct(
name = "kingdom_data_server_image",
image = "//src/main/kotlin/org/wfanet/measurement/kingdom/deploy/gcloud/server:gcp_kingdom_data_server_image",
Expand Down Expand Up @@ -127,8 +132,8 @@ GKE_IMAGES = [
repository = _PREFIX + "/duchy/requisition-fulfillment",
),
struct(
name = "duchy_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/gcloud/daemon/mill/liquidlegionsv2:gcs_liquid_legions_v2_mill_daemon_image",
name = "duchy_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/gcloud/job/mill/liquidlegionsv2:gcs_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/liquid-legions-v2-mill",
),
struct(
Expand Down Expand Up @@ -172,8 +177,8 @@ EKS_IMAGES = [
repository = _PREFIX + "/duchy/aws-requisition-fulfillment",
),
struct(
name = "duchy_s3_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/aws/daemon/mill/liquidlegionsv2:s3_liquid_legions_v2_mill_daemon_image",
name = "duchy_s3_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/aws/job/mill/liquidlegionsv2:s3_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/aws-liquid-legions-v2-mill",
),
struct(
Expand All @@ -196,8 +201,8 @@ LOCAL_IMAGES = [
repository = _PREFIX + "/duchy/local-herald",
),
struct(
name = "forwarded_storage_liquid_legions_v2_mill_daemon_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/daemon/mill/liquidlegionsv2:forwarded_storage_liquid_legions_v2_mill_daemon_image",
name = "forwarded_storage_liquid_legions_v2_mill_job_image",
image = "//src/main/kotlin/org/wfanet/measurement/duchy/deploy/common/job/mill/liquidlegionsv2:forwarded_storage_liquid_legions_v2_mill_job_image",
repository = _PREFIX + "/duchy/local-liquid-legions-v2-mill",
),
struct(
Expand Down
98 changes: 86 additions & 12 deletions src/main/k8s/base.cue
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,37 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
metadata: #ObjectMeta
}

// K8s Role.
#Role: {
apiVersion: "rbac.authorization.k8s.io/v1"
kind: "Role"
metadata: #ObjectMeta
rules: [...{
apiGroups?: [...string]
resources?: [...string]
verbs: [...string]
resourceNames?: [...string]
}]
}

// K8s RoleBinding.
#RoleBinding: {
apiVersion: "rbac.authorization.k8s.io/v1"
kind: "RoleBinding"
metadata: #ObjectMeta
roleRef: {
apiGroup: string
kind: string
name: string
}
subjects: [...{
kind: string
name: string
apiGroup?: string
namespace?: string
}]
}

#ResourceQuantity: {
cpu?: string
memory?: string
Expand Down Expand Up @@ -400,6 +431,49 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
tolerations: [ for _, toleration in _tolerations {toleration}]
}

// K8s PodTemplateSpec.
#PodTemplateSpec: {
metadata: #ObjectMeta & {
annotations: {
"instrumentation.opentelemetry.io/inject-java": string | *"true"
}
}
spec: #PodSpec
}

// K8s PodTemplate.
#PodTemplate: {
let Name = metadata.name

_secretName?: string
_container: #Container & {
_javaOptions: {
heapDumpOnOutOfMemory: true
heapDumpPath: "/run/heap-dumps"
}
}

apiVersion: "v1"
kind: "PodTemplate"
metadata: #ObjectMeta
template: #PodTemplateSpec & {
metadata: labels: {
app: "\(Name)-app"
}
spec: {
_mounts: {
if _secretName != _|_ {
"\(Name)-files": {
volume: secret: secretName: _secretName
}
}
"heap-dumps": volume: emptyDir: {}
}
_containers: "\(Name)-container": _container
}
}
}

// K8s Pod.
#Pod: {
apiVersion: "v1"
Expand Down Expand Up @@ -491,16 +565,13 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
selector: #LabelSelector & {
matchLabels: app: _name + "-app"
}
template: {
template: #PodTemplateSpec & {
metadata: {
labels: {
app: _name + "-app"
}
annotations: {
"instrumentation.opentelemetry.io/inject-java": string | *"true"
}
}
spec: #PodSpec & {
spec: {
_mounts: {
if _secretName != _|_ {
"\(_name)-files": {
Expand Down Expand Up @@ -542,20 +613,23 @@ objects: [ for objectSet in objectSets for object in objectSet {object}]
name: _name + "-cronjob"
}
spec: {
schedule: string
schedule: string
concurrencyPolicy?: "Allow" | "Forbid" | "Replace"
startingDeadlineSeconds?: int64
suspend?: bool
successfulJobsHistoryLimit?: int32 & >0
failedJobsHistoryLimit?: int32 & >0

jobTemplate: {
spec: {
backoffLimit: uint | *0
template: {
metadata: #ObjectMeta & {
template: #PodTemplateSpec & {
metadata: {
labels: {
app: _name + "-app"
}
annotations: {
"instrumentation.opentelemetry.io/inject-java": _ | *"true"
}
}
spec: #PodSpec & {
spec: {
if _secretName != _|_ {
_mounts: "\(_name)-files": {
volume: secret: secretName: _secretName
Expand Down
22 changes: 13 additions & 9 deletions src/main/k8s/dev/base_gke.cue
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ package k8s

#NetworkPolicy: {
_egresses: {
// See https://cloud.google.com/kubernetes-engine/docs/how-to/network-policy#network-policy-and-workload-identity
gkeMetadataServer: {
to: [{ipBlock: cidr: "169.254.169.252/32"}]
ports: [
{
protocol: "TCP"
port: 988
},
{
protocol: "TCP"
port: 80
}]
ports: [{
protocol: "TCP"
port: 988
}]
}
gkeDataplaneV2: {
to: [{ipBlock: cidr: "169.254.169.254/32"}]
ports: [{
protocol: "TCP"
port: 80
}]
}

openTelemetryCollector: {
to: [{podSelector: matchLabels: app: "opentelemetry-collector-app"}]
ports: [{
Expand Down
50 changes: 24 additions & 26 deletions src/main/k8s/dev/duchy_eks.cue
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ _duchyCertName: "duchies/\(_duchyName)/certificates/\(_certificateId)"
}
}
#Llv2MillMaxHeapSize: "1G"
#Llv2MillReplicas: 1
#Llv2MillMaxConcurrency: 10
#HmssMillResourceRequirements: ResourceRequirements=#ResourceRequirements & {
requests: {
cpu: "2"
Expand Down Expand Up @@ -89,19 +89,13 @@ _duchyCertName: "duchies/\(_duchyName)/certificates/\(_certificateId)"
}
#ControlServiceMaxHeapSize: "320M"

objectSets: [
default_deny_ingress_and_egress,
duchy.deployments,
duchy.services,
duchy.networkPolicies,
duchy.cronjobs,
]
objectSets: [default_deny_ingress_and_egress] + [ for objectSet in duchy {objectSet}]

duchy: #PostgresDuchy & {
_imageSuffixes: {
"herald-daemon": "duchy/aws-herald"
"computation-control-server": "duchy/aws-computation-control"
"liquid-legions-v2-mill-daemon": "duchy/aws-liquid-legions-v2-mill"
"llv2-mill": "duchy/aws-liquid-legions-v2-mill"
"hmss-mill-daemon": "duchy/aws-honest-majority-share-shuffle-mill"
"requisition-fulfillment-server": "duchy/aws-requisition-fulfillment"
"internal-api-server": "duchy/aws-postgres-internal-server"
Expand All @@ -119,11 +113,14 @@ duchy: #PostgresDuchy & {
"worker1": _worker1SystemApiTarget
"worker2": _worker2SystemApiTarget
}
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: #AwsS3Config.flags
_verbose_grpc_logging: "false"
_postgresConfig: #AwsPostgresConfig
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: #AwsS3Config.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_liquidLegionsV2WorkLockDuration: "10m"
_postgresConfig: #AwsPostgresConfig

services: {
"requisition-fulfillment-server": _eipAllocations: _publicApiEipAllocs
"computation-control-server": _eipAllocations: _systemApiEipAllocs
Expand All @@ -138,18 +135,8 @@ duchy: #PostgresDuchy & {
serviceAccountName: #StorageServiceAccount
}
}
"liquid-legions-v2-mill-daemon-deployment": {
_workLockDuration: "10m"
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
spec: {
replicas: #Llv2MillReplicas
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
"mill-job-scheduler-deployment": {
_liquidLegionsV2MaxConcurrency: #Llv2MillMaxConcurrency
}
"hmss-mill-daemon-deployment": {
_workLockDuration: "5m"
Expand Down Expand Up @@ -191,4 +178,15 @@ duchy: #PostgresDuchy & {
}
}
}
podTemplates: {
"llv2-mill": {
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
}
}
54 changes: 24 additions & 30 deletions src/main/k8s/dev/duchy_gke.cue
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ _duchy_cert_name: "duchies/\(_duchy_name)/certificates/\(_certificateId)"
}
}
#Llv2MillMaxHeapSize: "1G"
#Llv2MillReplicas: 1
#Llv2MillMaxConcurrency: 10
#HmssMillResourceRequirements: ResourceRequirements=#ResourceRequirements & {
requests: {
cpu: "2"
Expand Down Expand Up @@ -90,15 +90,7 @@ _duchy_cert_name: "duchies/\(_duchy_name)/certificates/\(_certificateId)"
}
#ControlServiceMaxHeapSize: "320M"

objectSets: [
default_deny_ingress_and_egress,
duchy.serviceAccounts,
duchy.configMaps,
duchy.deployments,
duchy.services,
duchy.networkPolicies,
duchy.cronjobs,
]
objectSets: [default_deny_ingress_and_egress] + [ for objectSet in duchy {objectSet}]

_cloudStorageConfig: #CloudStorageConfig & {
bucket: _cloudStorageBucket
Expand All @@ -119,18 +111,18 @@ duchy: #SpannerDuchy & {
"worker1": _worker1SystemApiTarget
"worker2": _worker2SystemApiTarget
}
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: _cloudStorageConfig.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_kingdom_system_api_target: #KingdomSystemApiTarget
_kingdom_public_api_target: #KingdomPublicApiTarget
_blob_storage_flags: _cloudStorageConfig.flags
_verbose_grpc_logging: "false"
_duchyMillParallelism: 4
_liquidLegionsV2WorkLockDuration: "10m"

serviceAccounts: [string]: #WorkloadIdentityServiceAccount
serviceAccounts: {
"\(#InternalServerServiceAccount)": {
"\(#InternalServerServiceAccount)": #WorkloadIdentityServiceAccount & {
_iamServiceAccountName: "\(_duchy_name)-duchy-internal"
}
"\(#StorageServiceAccount)": {
"\(#StorageServiceAccount)": #WorkloadIdentityServiceAccount & {
_iamServiceAccountName: "\(_duchy_name)-duchy-storage"
}
}
Expand All @@ -155,18 +147,8 @@ duchy: #SpannerDuchy & {
serviceAccountName: #StorageServiceAccount
}
}
"liquid-legions-v2-mill-daemon-deployment": {
_workLockDuration: "10m"
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
spec: {
replicas: #Llv2MillReplicas
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
"mill-job-scheduler-deployment": {
_liquidLegionsV2MaxConcurrency: #Llv2MillMaxConcurrency
}
"hmss-mill-daemon-deployment": {
_workLockDuration: "5m"
Expand Down Expand Up @@ -205,4 +187,16 @@ duchy: #SpannerDuchy & {
"requisition-fulfillment-server": _ipAddressName: _publicApiAddressName
"computation-control-server": _ipAddressName: _systemApiAddressName
}

podTemplates: {
"llv2-mill": {
_container: {
_javaOptions: maxHeapSize: #Llv2MillMaxHeapSize
resources: #Llv2MillResourceRequirements
}
template: spec: #ServiceAccountPodSpec & #SpotVmPodSpec & {
serviceAccountName: #StorageServiceAccount
}
}
}
}
Loading
Loading