From 9ec69fe7875fca41e8beb7c64aab563c5915340d Mon Sep 17 00:00:00 2001 From: cw75 Date: Fri, 12 May 2023 17:21:39 +0000 Subject: [PATCH 1/8] wip --- sdk/aqueduct/client.py | 7 +- sdk/aqueduct/constants/enums.py | 5 + sdk/aqueduct/resources/connect_config.py | 48 +- .../server/handler/cloud_integration_utils.go | 31 +- .../cmd/server/handler/connect_integration.go | 35 +- src/golang/lib/dynamic/engine.go | 411 +++++++++++++----- src/golang/lib/engine/authenticate.go | 81 +++- src/golang/lib/lib_utils/utils.go | 29 +- .../lib/models/shared/integration_config.go | 33 +- .../operator/connector/auth/static_config.go | 6 + 10 files changed, 529 insertions(+), 157 deletions(-) diff --git a/sdk/aqueduct/client.py b/sdk/aqueduct/client.py index 91c4f1fe5..162552b15 100644 --- a/sdk/aqueduct/client.py +++ b/sdk/aqueduct/client.py @@ -44,7 +44,6 @@ from aqueduct.resources.dynamic_k8s import DynamicK8sResource from aqueduct.resources.ecr import ECRResource from aqueduct.resources.google_sheets import GoogleSheetsResource -from aqueduct.resources.k8s import K8sResource from aqueduct.resources.mongodb import MongoDBResource from aqueduct.resources.s3 import S3Resource from aqueduct.resources.salesforce import SalesforceResource @@ -350,13 +349,13 @@ def integration( GoogleSheetsResource, RelationalDBResource, AirflowResource, - K8sResource, LambdaResource, MongoDBResource, DatabricksResource, SparkResource, AWSResource, ECRResource, + DynamicK8sResource, ]: """Deprecated. Use `client.resource()` instead.""" logger().warning( @@ -372,13 +371,13 @@ def resource( GoogleSheetsResource, RelationalDBResource, AirflowResource, - K8sResource, LambdaResource, MongoDBResource, DatabricksResource, SparkResource, AWSResource, ECRResource, + DynamicK8sResource, ]: """Retrieves a connected integration object. @@ -437,7 +436,7 @@ def resource( metadata=integration_info, ) elif integration_info.service == ServiceType.K8S: - return K8sResource( + return DynamicK8sResource( metadata=integration_info, ) elif integration_info.service == ServiceType.LAMBDA: diff --git a/sdk/aqueduct/constants/enums.py b/sdk/aqueduct/constants/enums.py index 0df5202f8..4da78d605 100644 --- a/sdk/aqueduct/constants/enums.py +++ b/sdk/aqueduct/constants/enums.py @@ -82,6 +82,11 @@ class ServiceType(str, Enum, metaclass=MetaEnum): ECR = "ECR" +class CloudProviderType(str, Enum, metaclass=MetaEnum): + AWS = "AWS" + GCP = "GCP" + + class RelationalDBServices(str, Enum, metaclass=MetaEnum): """Must match the corresponding entries in `ServiceType` exactly.""" diff --git a/sdk/aqueduct/resources/connect_config.py b/sdk/aqueduct/resources/connect_config.py index d1992a24f..0a4f6a522 100644 --- a/sdk/aqueduct/resources/connect_config.py +++ b/sdk/aqueduct/resources/connect_config.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union, cast -from aqueduct.constants.enums import MetaEnum, NotificationLevel, ServiceType +from aqueduct.constants.enums import CloudProviderType, MetaEnum, NotificationLevel, ServiceType from aqueduct.error import InternalAqueductError, InvalidUserArgumentException from pydantic import BaseModel, Extra, Field @@ -199,6 +199,13 @@ class AWSConfig(BaseConnectionConfig): k8s: Optional[DynamicK8sConfig] +class GCPConfig(BaseConnectionConfig): + region: str + zone: str + service_account_key_path: str = "" + service_account_key: str = "" + + class ECRConfig(BaseConnectionConfig): # Either 1) all of access_key_id, secret_access_key, region, or 2) both config_file_path and # config_file_profile need to be specified. Any other cases will be rejected by the server's @@ -265,11 +272,24 @@ class DatabricksConfig(BaseConnectionConfig): class K8sConfig(BaseConnectionConfig): + kubeconfig_path: str = "" + cluster_name: str = "" + use_same_cluster: str = "false" + # dynamic: str = "false" + # cloud_integration_id: str = "" + cloud_provider: Optional[CloudProviderType] + gcp_config: Optional[GCPConfig] + # cluster_config: Optional[DynamicK8sConfig] + + +class _K8sConfigWithSerializedConfig(BaseConnectionConfig): kubeconfig_path: str cluster_name: str use_same_cluster: str = "false" - dynamic: str = "false" - cloud_integration_id: str = "" + # dynamic: str = "false" + # cloud_integration_id: str = "" + cloud_provider: Optional[CloudProviderType] + gcp_config_serialized: Optional[str] # this is a json-serialized string of GCPConfig ResourceConfig = Union[ @@ -294,6 +314,7 @@ class K8sConfig(BaseConnectionConfig): DatabricksConfig, K8sConfig, CondaConfig, + _K8sConfigWithSerializedConfig, ] @@ -359,6 +380,9 @@ def prepare_integration_config( if service == ServiceType.AWS: return _prepare_aws_config(cast(AWSConfig, config)) + if service == ServiceType.K8S: + return _prepare_k8s_config(cast(K8sConfig, config)) + return config @@ -394,6 +418,24 @@ def _prepare_aws_config(config: AWSConfig) -> _AWSConfigWithSerializedConfig: ) +def _prepare_k8s_config(config: K8sConfig) -> _K8sConfigWithSerializedConfig: + if config.gcp_config is not None and config.gcp_config.service_account_key_path is not None: + with open(config.gcp_config.service_account_key_path, "r") as f: + config.gcp_config.service_account_key = f.read() + + return _K8sConfigWithSerializedConfig( + kubeconfig_path=config.kubeconfig_path, + cluster_name=config.cluster_name, + use_same_cluster=config.use_same_cluster, + # dynamic=config.dynamic, + # cloud_integration_id=config.cloud_integration_id, + cloud_provider=config.cloud_provider, + gcp_config_serialized=( + None if config.gcp_config is None else config.gcp_config.json(exclude_none=True) + ), + ) + + def _prepare_big_query_config(config: BigQueryConfig) -> BigQueryConfig: """Prepares the BigQueryConfig object by reading the service account credentials into a string field if the filepath is specified. diff --git a/src/golang/cmd/server/handler/cloud_integration_utils.go b/src/golang/cmd/server/handler/cloud_integration_utils.go index 33cbb701d..99e818628 100644 --- a/src/golang/cmd/server/handler/cloud_integration_utils.go +++ b/src/golang/cmd/server/handler/cloud_integration_utils.go @@ -38,7 +38,7 @@ func setupCloudIntegration( } terraformPath := filepath.Join(os.Getenv("HOME"), ".aqueduct", "server", "cloud_integration", args.Name, "eks") - if err = setupTerraformDirectory(terraformPath); err != nil { + if err = dynamic.SetupTerraformDirectory(dynamic.EKSTerraformTemplateDir, terraformPath); err != nil { return http.StatusInternalServerError, errors.Wrap(err, "Unable to create Terraform directory.") } @@ -51,8 +51,8 @@ func setupCloudIntegration( config := shared.DynamicK8sConfig{ Keepalive: strconv.Itoa(shared.K8sDefaultKeepalive), - CpuNodeType: shared.K8sDefaultCpuNodeType, - GpuNodeType: shared.K8sDefaultGpuNodeType, + CpuNodeType: shared.EKSDefaultCpuNodeType, + GpuNodeType: shared.EKSDefaultGpuNodeType, MinCpuNode: strconv.Itoa(shared.K8sDefaultMinCpuNode), MaxCpuNode: strconv.Itoa(shared.K8sDefaultMaxCpuNode), MinGpuNode: strconv.Itoa(shared.K8sDefaultMinGpuNode), @@ -117,31 +117,6 @@ func setupCloudIntegration( return http.StatusOK, nil } -// setupTerraformDirectory copies all files and folders in the Terraform template directory to the -// cloud integration's destination directory, which is ~/.aqueduct/server/cloud_integration//eks. -func setupTerraformDirectory(dst string) error { - // Create the destination directory if it doesn't exist. - if err := os.MkdirAll(dst, 0o755); err != nil { - return err - } - - _, stdErr, err := lib_utils.RunCmd( - "cp", - []string{ - "-R", // we could have used -T to not create a directory if the source is a directory, but it's not supported on macOS - fmt.Sprintf("%s%s.", dynamic.TerraformTemplateDir, string(filepath.Separator)), - dst, - }, - "", - false, - ) - if err != nil { - return errors.New(stdErr) - } - - return nil -} - // deleteCloudIntegrationHelper does the following: // 1. Verifies that there is no workflow using the dynamic k8s integration. // 2. Deletes the EKS cluster if it's running. diff --git a/src/golang/cmd/server/handler/connect_integration.go b/src/golang/cmd/server/handler/connect_integration.go index 955e03751..e4b4c8046 100644 --- a/src/golang/cmd/server/handler/connect_integration.go +++ b/src/golang/cmd/server/handler/connect_integration.go @@ -177,6 +177,7 @@ func (h *ConnectIntegrationHandler) Perform(ctx context.Context, interfaceArgs i args.Name, args.ID, args.OrgID, + args.Config, h.IntegrationRepo, h.Database, ) @@ -639,7 +640,7 @@ func validateKubernetesConfig( ctx context.Context, config auth.Config, ) (int, error) { - if err := engine.AuthenticateK8sConfig(ctx, config); err != nil { + if err := engine.AuthenticateAndUpdateK8sConfig(ctx, config); err != nil { return http.StatusBadRequest, err } @@ -724,6 +725,7 @@ func ValidatePrerequisites( name string, userID uuid.UUID, orgID string, + conf auth.Config, integrationRepo repos.Integration, DB database.Database, ) (int, error) { @@ -826,6 +828,37 @@ func ValidatePrerequisites( } } + // For on-demand GKE resource, we require the user to have Terraform, gcloud, and gcloud gke-gcloud-auth-plugin plugin installed. + if svc == shared.Kubernetes { + // Parse the config and see if cloud provider is GCP + k8sConf, err := lib_utils.ParseK8sConfig(conf) + if err != nil { + return http.StatusInternalServerError, errors.Wrap(err, "Unable to parse Kubernetes configuration.") + } + + if k8sConf.CloudProvider == shared.GCPProvider { + if _, _, err := lib_utils.RunCmd("terraform", []string{"--version"}, "", false); err != nil { + return http.StatusNotFound, errors.Wrap(err, "terraform executable not found. Please go to https://developer.hashicorp.com/terraform/downloads to install terraform") + } + + _, _, err := lib_utils.RunCmd("gcloud", []string{"--version"}, "", false) + if err != nil { + return http.StatusNotFound, errors.Wrap(err, "gcloud executable not found. Please go to https://cloud.google.com/sdk/docs/install to install gcloud") + } + + // TODO: check version lowerbound? + + componentsOutput, _, err := lib_utils.RunCmd("gcloud", []string{"components", "list"}, "", false) + if err != nil { + return http.StatusUnprocessableEntity, errors.Wrap(err, "Error listing gcloud components") + } + + if !strings.Contains(componentsOutput, "gke-gcloud-auth-plugin") { + return http.StatusUnprocessableEntity, errors.New("gke-gcloud-auth-plugin is not installed. Please run `gcloud components install gke-gcloud-auth-plugin` to install it.") + } + } + } + return http.StatusOK, nil } diff --git a/src/golang/lib/dynamic/engine.go b/src/golang/lib/dynamic/engine.go index 3802c9044..d62f8dafc 100644 --- a/src/golang/lib/dynamic/engine.go +++ b/src/golang/lib/dynamic/engine.go @@ -3,6 +3,7 @@ package dynamic import ( "context" "crypto/rand" + "encoding/json" "fmt" "math/big" "os" @@ -38,7 +39,10 @@ const ( K8sIntegrationNameSuffix = "aqueduct_ondemand_k8s" ) -var TerraformTemplateDir = filepath.Join(os.Getenv("HOME"), ".aqueduct", "server", "template", "aws", "eks") +var ( + EKSTerraformTemplateDir = filepath.Join(os.Getenv("HOME"), ".aqueduct", "server", "template", "aws", "eks") + GKETerraformTemplateDir = filepath.Join(os.Getenv("HOME"), ".aqueduct", "server", "template", "gke") +) // PrepareCluster blocks until the cluster is in status "Active". func PrepareCluster( @@ -114,7 +118,7 @@ func CreateOrUpdateK8sCluster( db database.Database, ) error { if !(action == K8sClusterCreateAction || action == K8sClusterUpdateAction) { - return errors.Newf("Unsupport action %s.", action) + return errors.Newf("Unsupported action %s.", action) } configDeltaMap := configDelta.ToMap() @@ -145,83 +149,169 @@ func CreateOrUpdateK8sCluster( return err } - awsConfig, err := fetchAWSCredential(ctx, engineIntegration, vaultObject) - if err != nil { - return err + var awsConfig *shared.AWSConfig + var gcpConfig *shared.GCPConfig + var err error + + if engineIntegration.Config[shared.K8sCloudProviderKey] == string(shared.GCPProvider) { + gcpConfig, err = fetchGCPCredential(ctx, engineIntegration, vaultObject) + if err != nil { + return err + } + } else { + awsConfig, err = fetchAWSCredential(ctx, engineIntegration, vaultObject) + if err != nil { + return err + } } - if err := runTerraformApply(awsConfig, engineIntegration); err != nil { + if err := runTerraformApply(awsConfig, gcpConfig, engineIntegration); err != nil { return err } if action == K8sClusterCreateAction { - var envVars []string - if awsConfig.AccessKeyId != "" && awsConfig.SecretAccessKey != "" && awsConfig.Region != "" { - // If we enter here, it means the authentication mode is access key. - envVars = []string{ - fmt.Sprintf("AWS_ACCESS_KEY_ID=%s", awsConfig.AccessKeyId), - fmt.Sprintf("AWS_SECRET_ACCESS_KEY=%s", awsConfig.SecretAccessKey), - fmt.Sprintf("AWS_REGION=%s", awsConfig.Region), + if awsConfig != nil { + var envVars []string + if awsConfig.AccessKeyId != "" && awsConfig.SecretAccessKey != "" && awsConfig.Region != "" { + // If we enter here, it means the authentication mode is access key. + envVars = []string{ + fmt.Sprintf("AWS_ACCESS_KEY_ID=%s", awsConfig.AccessKeyId), + fmt.Sprintf("AWS_SECRET_ACCESS_KEY=%s", awsConfig.SecretAccessKey), + fmt.Sprintf("AWS_REGION=%s", awsConfig.Region), + } + } else { + // If we enter here, it means the authentication mode is credential file. + envVars = []string{ + fmt.Sprintf("AWS_SHARED_CREDENTIALS_FILE=%s", awsConfig.ConfigFilePath), + fmt.Sprintf("AWS_PROFILE=%s", awsConfig.ConfigFileProfile), + } + } + if _, _, err := lib_utils.RunCmd( + "env", + append( + envVars, + "aws", + "eks", + "update-kubeconfig", + "--name", + engineIntegration.Config[shared.K8sClusterNameKey], + "--kubeconfig", + engineIntegration.Config[shared.K8sKubeconfigPathKey], + ), + engineIntegration.Config[shared.K8sTerraformPathKey], + true, + ); err != nil { + return errors.Wrap(err, "Failed to update Kubeconfig") + } + + config, err := clientcmd.LoadFromFile(engineIntegration.Config[shared.K8sKubeconfigPathKey]) + if err != nil { + return errors.Wrap(err, "Failed to load Kubeconfig") + } + + for _, authInfo := range config.AuthInfos { + if awsConfig.AccessKeyId != "" && awsConfig.SecretAccessKey != "" && awsConfig.Region != "" { + authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ + Name: "AWS_ACCESS_KEY_ID", + Value: awsConfig.AccessKeyId, + }) + authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ + Name: "AWS_SECRET_ACCESS_KEY", + Value: awsConfig.SecretAccessKey, + }) + authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ + Name: "AWS_REGION", + Value: awsConfig.Region, + }) + } else { + authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ + Name: "AWS_SHARED_CREDENTIALS_FILE", + Value: awsConfig.ConfigFilePath, + }) + authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ + Name: "AWS_PROFILE", + Value: awsConfig.ConfigFileProfile, + }) + } + } + + err = clientcmd.WriteToFile(*config, engineIntegration.Config[shared.K8sKubeconfigPathKey]) + if err != nil { + return errors.Wrap(err, "Failed to update Kubeconfig with environment variables") } } else { - // If we enter here, it means the authentication mode is credential file. - envVars = []string{ - fmt.Sprintf("AWS_SHARED_CREDENTIALS_FILE=%s", awsConfig.ConfigFilePath), - fmt.Sprintf("AWS_PROFILE=%s", awsConfig.ConfigFileProfile), + // GCP + var key struct { + ProjectID string `json:"project_id"` + ClientEmail string `json:"client_email"` } - } - if _, _, err := lib_utils.RunCmd( - "env", - append( - envVars, - "aws", - "eks", - "update-kubeconfig", - "--name", - engineIntegration.Config[shared.K8sClusterNameKey], - "--kubeconfig", - engineIntegration.Config[shared.K8sKubeconfigPathKey], - ), - engineIntegration.Config[shared.K8sTerraformPathKey], - true, - ); err != nil { - return errors.Wrap(err, "Failed to update Kubeconfig") - } - config, err := clientcmd.LoadFromFile(engineIntegration.Config[shared.K8sKubeconfigPathKey]) - if err != nil { - return errors.Wrap(err, "Failed to load Kubeconfig") - } + err := json.Unmarshal([]byte(gcpConfig.ServiceAccountKey), &key) + if err != nil { + return errors.Wrap(err, "Failed to parse project ID and client email from service account key") + } - for _, authInfo := range config.AuthInfos { - if awsConfig.AccessKeyId != "" && awsConfig.SecretAccessKey != "" && awsConfig.Region != "" { - authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ - Name: "AWS_ACCESS_KEY_ID", - Value: awsConfig.AccessKeyId, - }) - authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ - Name: "AWS_SECRET_ACCESS_KEY", - Value: awsConfig.SecretAccessKey, - }) - authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ - Name: "AWS_REGION", - Value: awsConfig.Region, - }) - } else { - authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ - Name: "AWS_SHARED_CREDENTIALS_FILE", - Value: awsConfig.ConfigFilePath, - }) - authInfo.Exec.Env = append(authInfo.Exec.Env, api.ExecEnvVar{ - Name: "AWS_PROFILE", - Value: awsConfig.ConfigFileProfile, - }) + // Write the service account key to a temporary file in the resource's Terraform directory. + // This is necessary because the gcloud CLI requires a file path to the service account key. + serviceAccountKeyPath := filepath.Join(engineIntegration.Config[shared.K8sTerraformPathKey], "service_account_key.json") + err = os.WriteFile(serviceAccountKeyPath, []byte(gcpConfig.ServiceAccountKey), 0o644) + if err != nil { + return errors.Wrap(err, "Failed to write service account key to temporary file") } - } - err = clientcmd.WriteToFile(*config, engineIntegration.Config[shared.K8sKubeconfigPathKey]) - if err != nil { - return errors.Wrap(err, "Failed to update Kubeconfig with environment variables") + if _, _, err := lib_utils.RunCmd( + "gcloud", + []string{ + "auth", + "activate-service-account", + "--key-file", + serviceAccountKeyPath, + }, + "", + true, + ); err != nil { + return errors.Wrap(err, "Failed to activate service account") + } + + if _, _, err := lib_utils.RunCmd( + "gcloud", + []string{ + "config", + "set", + "account", + key.ClientEmail, + }, + "", + true, + ); err != nil { + return errors.Wrap(err, "Failed to set account") + } + + kubeconfigEnv := fmt.Sprintf("KUBECONFIG=%s", engineIntegration.Config[shared.K8sKubeconfigPathKey]) + if _, _, err := lib_utils.RunCmd( + "env", + []string{ + kubeconfigEnv, + "gcloud", + "container", + "clusters", + "get-credentials", + engineIntegration.Config[shared.K8sClusterNameKey], + "--region", + gcpConfig.Region, + "--project", + key.ProjectID, + }, + "", + true, + ); err != nil { + return errors.Wrap(err, "Failed to update Kubeconfig") + } + + // Delete the temporary service account key file. + if err := os.Remove(serviceAccountKeyPath); err != nil { + return errors.Wrap(err, "Failed to remove temporary service account key file") + } } } @@ -283,14 +373,25 @@ func DeleteK8sCluster( return err } - // Even for deletion, we need to specify the AWS region, so we need to pass in the actual AWS + // Even for deletion, we need to specify the AWS region, so we need to pass in the cloud provider // config instead of a dummy one to generateTerraformVariables. - awsConfig, err := fetchAWSCredential(ctx, engineIntegration, vaultObject) - if err != nil { - return err + var awsConfig *shared.AWSConfig + var gcpConfig *shared.GCPConfig + var err error + + if engineIntegration.Config[shared.K8sCloudProviderKey] == string(shared.GCPProvider) { + gcpConfig, err = fetchGCPCredential(ctx, engineIntegration, vaultObject) + if err != nil { + return err + } + } else { + awsConfig, err = fetchAWSCredential(ctx, engineIntegration, vaultObject) + if err != nil { + return err + } } - terraformArgs, err := generateTerraformVariables(awsConfig, engineIntegration.Config) + terraformArgs, err := generateTerraformVariables(awsConfig, gcpConfig, engineIntegration.Config) if err != nil { return err } @@ -531,40 +632,9 @@ func PollClusterStatus( func generateTerraformVariables( awsConfig *shared.AWSConfig, + gcpConfig *shared.GCPConfig, engineConfig map[string]string, ) ([]string, error) { - accessKeyVar := fmt.Sprintf("-var=access_key=%s", awsConfig.AccessKeyId) - secretAccessKeyVar := fmt.Sprintf("-var=secret_key=%s", awsConfig.SecretAccessKey) - regionVar := fmt.Sprintf("-var=region=%s", awsConfig.Region) - credentialPathVar := fmt.Sprintf("-var=credentials_file=%s", awsConfig.ConfigFilePath) - profileVar := fmt.Sprintf("-var=profile=%s", awsConfig.ConfigFileProfile) - - if awsConfig.ConfigFilePath != "" && awsConfig.ConfigFileProfile != "" { - // If the authentication mode is credential file, we need to retrieve the AWS region via - // `aws configure get region` and explicitly pass it to Terraform. - region, stderr, err := lib_utils.RunCmd( - "env", - []string{ - fmt.Sprintf("AWS_SHARED_CREDENTIALS_FILE=%s", awsConfig.ConfigFilePath), - fmt.Sprintf("AWS_PROFILE=%s", awsConfig.ConfigFileProfile), - "aws", - "configure", - "get", - "region", - }, - "", - false, - ) - // We need to check if stderr is empty because when the region is not specified in the - // profile, the cmd will error and it will produce an empty stdout and stderr. In this case, - // we should just set the region to an empty string, which means using the default region. - if err != nil && stderr != "" { - return nil, err - } - - regionVar = fmt.Sprintf("-var=region=%s", strings.TrimRight(region, "\n")) - } - cpuNodeTypeVar := fmt.Sprintf("-var=cpu_node_type=%s", engineConfig[shared.K8sCpuNodeTypeKey]) gpuNodeTypeVar := fmt.Sprintf("-var=gpu_node_type=%s", engineConfig[shared.K8sGpuNodeTypeKey]) minCpuNodeVar := fmt.Sprintf("-var=min_cpu_node=%s", engineConfig[shared.K8sMinCpuNodeKey]) @@ -574,12 +644,7 @@ func generateTerraformVariables( clusterNameVar := fmt.Sprintf("-var=cluster_name=%s", engineConfig[shared.K8sClusterNameKey]) - return []string{ - accessKeyVar, - secretAccessKeyVar, - regionVar, - credentialPathVar, - profileVar, + vars := []string{ cpuNodeTypeVar, gpuNodeTypeVar, minCpuNodeVar, @@ -587,14 +652,80 @@ func generateTerraformVariables( minGpuNodeVar, maxGpuNodeVar, clusterNameVar, - }, nil + } + + if awsConfig != nil { + accessKeyVar := fmt.Sprintf("-var=access_key=%s", awsConfig.AccessKeyId) + secretAccessKeyVar := fmt.Sprintf("-var=secret_key=%s", awsConfig.SecretAccessKey) + regionVar := fmt.Sprintf("-var=region=%s", awsConfig.Region) + credentialPathVar := fmt.Sprintf("-var=credentials_file=%s", awsConfig.ConfigFilePath) + profileVar := fmt.Sprintf("-var=profile=%s", awsConfig.ConfigFileProfile) + + if awsConfig.ConfigFilePath != "" && awsConfig.ConfigFileProfile != "" { + // If the authentication mode is credential file, we need to retrieve the AWS region via + // `aws configure get region` and explicitly pass it to Terraform. + region, stderr, err := lib_utils.RunCmd( + "env", + []string{ + fmt.Sprintf("AWS_SHARED_CREDENTIALS_FILE=%s", awsConfig.ConfigFilePath), + fmt.Sprintf("AWS_PROFILE=%s", awsConfig.ConfigFileProfile), + "aws", + "configure", + "get", + "region", + }, + "", + false, + ) + // We need to check if stderr is empty because when the region is not specified in the + // profile, the cmd will error and it will produce an empty stdout and stderr. In this case, + // we should just set the region to an empty string, which means using the default region. + if err != nil && stderr != "" { + return nil, err + } + + regionVar = fmt.Sprintf("-var=region=%s", strings.TrimRight(region, "\n")) + } + + vars = append(vars, []string{ + accessKeyVar, + secretAccessKeyVar, + regionVar, + credentialPathVar, + profileVar, + }...) + } else { + var key struct { + ProjectID string `json:"project_id"` + } + + err := json.Unmarshal([]byte(gcpConfig.ServiceAccountKey), &key) + if err != nil { + return nil, errors.Wrap(err, "Failed to parse project ID and client email from service account key") + } + + regionVar := fmt.Sprintf("-var=region=%s", gcpConfig.Region) + zoneVar := fmt.Sprintf("-var=zone=%s", gcpConfig.Zone) + secretKeyVar := fmt.Sprintf("-var=secret_key=%s", gcpConfig.ServiceAccountKey) + projectIDVar := fmt.Sprintf("-var=project_id=%s", key.ProjectID) + + vars = append(vars, []string{ + regionVar, + zoneVar, + secretKeyVar, + projectIDVar, + }...) + } + + return vars, nil } func runTerraformApply( awsConfig *shared.AWSConfig, + gcpConfig *shared.GCPConfig, engineIntegration *models.Integration, ) error { - terraformArgs, err := generateTerraformVariables(awsConfig, engineIntegration.Config) + terraformArgs, err := generateTerraformVariables(awsConfig, gcpConfig, engineIntegration.Config) if err != nil { return err } @@ -717,6 +848,39 @@ func GenerateClusterName() (string, error) { return fmt.Sprintf("%s_%s", "aqueduct", string(b)), nil } +func GenerateClusterNameGKE() (string, error) { + const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789" + + b := make([]byte, 16) + for i := range b { + n, err := rand.Int(rand.Reader, big.NewInt(int64(len(letterBytes)))) + if err != nil { + return "", err + } + b[i] = letterBytes[n.Int64()] + } + + return fmt.Sprintf("%s-%s", "aqueduct", string(b)), nil +} + +func fetchGCPCredential( + ctx context.Context, + engineIntegration *models.Integration, + vaultObject vault.Vault, +) (*shared.GCPConfig, error) { + config, err := auth.ReadConfigFromSecret(ctx, engineIntegration.ID, vaultObject) + if err != nil { + return nil, errors.Wrap(err, "Unable to read integration config from vault.") + } + + k8sConfig, err := lib_utils.ParseK8sConfig(config) + if err != nil { + return nil, errors.Wrap(err, "Unable to parse Kubernetes config") + } + + return k8sConfig.GCPConfig, nil +} + func fetchAWSCredential( ctx context.Context, engineIntegration *models.Integration, @@ -742,3 +906,28 @@ func fetchAWSCredential( return awsConfig, nil } + +// SetupTerraformDirectory copies all files and folders in the Terraform template directory to the +// cloud integration's destination directory, which is ~/.aqueduct/server/cloud_integration//eks. +func SetupTerraformDirectory(src, dst string) error { + // Create the destination directory if it doesn't exist. + if err := os.MkdirAll(dst, 0o755); err != nil { + return err + } + + _, stdErr, err := lib_utils.RunCmd( + "cp", + []string{ + "-R", // we could have used -T to not create a directory if the source is a directory, but it's not supported on macOS + fmt.Sprintf("%s%s.", src, string(filepath.Separator)), + dst, + }, + "", + false, + ) + if err != nil { + return errors.New(stdErr) + } + + return nil +} diff --git a/src/golang/lib/engine/authenticate.go b/src/golang/lib/engine/authenticate.go index 4caaf8bb8..a21795793 100644 --- a/src/golang/lib/engine/authenticate.go +++ b/src/golang/lib/engine/authenticate.go @@ -2,23 +2,33 @@ package engine import ( "context" + "encoding/json" + "os" + "path/filepath" + "strconv" databricks_lib "github.com/aqueducthq/aqueduct/lib/databricks" + "github.com/aqueducthq/aqueduct/lib/dynamic" "github.com/aqueducthq/aqueduct/lib/k8s" "github.com/aqueducthq/aqueduct/lib/lib_utils" + "github.com/aqueducthq/aqueduct/lib/models/shared" "github.com/aqueducthq/aqueduct/lib/spark" "github.com/aqueducthq/aqueduct/lib/workflow/operator/connector/auth" "github.com/dropbox/godropbox/errors" ) // Authenticates kubernetes configuration by trying to connect a client. -func AuthenticateK8sConfig(ctx context.Context, authConf auth.Config) error { +// In case of on-demand k8s resource, updates the k8s config with the +// cluster config parameters. +func AuthenticateAndUpdateK8sConfig(ctx context.Context, authConf auth.Config) error { conf, err := lib_utils.ParseK8sConfig(authConf) if err != nil { return errors.Wrap(err, "Unable to parse configuration.") } if conf.Dynamic { + // The following code path is currently reserved for AWS. Need to refactor it to be consistent + // with GCP. if conf.CloudIntegrationId == "" { return errors.New("Dynamic K8s integration must have a cloud integration ID attached.") } else { @@ -26,6 +36,75 @@ func AuthenticateK8sConfig(ctx context.Context, authConf auth.Config) error { } } + if conf.CloudProvider == shared.GCPProvider { + // This is an on-demand GKE resource. + k8sConfig := shared.DynamicK8sConfig{ + Keepalive: strconv.Itoa(shared.K8sDefaultKeepalive), + CpuNodeType: shared.GKEDefaultCpuNodeType, + GpuNodeType: shared.GKEDefaultGpuNodeType, + MinCpuNode: strconv.Itoa(shared.K8sDefaultMinCpuNode), + MaxCpuNode: strconv.Itoa(shared.K8sDefaultMaxCpuNode), + MinGpuNode: strconv.Itoa(shared.K8sDefaultMinGpuNode), + MaxGpuNode: strconv.Itoa(shared.K8sDefaultMaxGpuNode), + } + + // Parse authconf to shared.DynamicK8sConfig + data, err := authConf.Marshal() + if err != nil { + return err + } + + var customConfig shared.DynamicK8sConfig + if err := json.Unmarshal(data, &customConfig); err != nil { + return err + } + + k8sConfig.Update(&customConfig) + + clusterName, err := dynamic.GenerateClusterNameGKE() + if err != nil { + return errors.Wrap(err, "Unable to generate k8s cluster name.") + } + + terraformPath := filepath.Join(os.Getenv("HOME"), ".aqueduct", "server", "ondemand_k8s", clusterName) + if err = dynamic.SetupTerraformDirectory(dynamic.GKETerraformTemplateDir, terraformPath); err != nil { + return errors.Wrap(err, "Unable to create Terraform directory.") + } + + if _, _, err := lib_utils.RunCmd("terraform", []string{"init"}, terraformPath, true); err != nil { + return errors.Wrap(err, "Error initializing Terraform") + } + + kubeconfigPath := filepath.Join(terraformPath, "kube_config") + + dynamicK8sConfig := map[string]string{ + shared.K8sTerraformPathKey: terraformPath, + shared.K8sKubeconfigPathKey: kubeconfigPath, + shared.K8sClusterNameKey: clusterName, + shared.K8sDynamicKey: strconv.FormatBool(true), + shared.K8sUseSameClusterKey: strconv.FormatBool(false), + shared.K8sStatusKey: string(shared.K8sClusterTerminatedStatus), + shared.K8sDesiredCpuNodeKey: k8sConfig.MinCpuNode, + shared.K8sDesiredGpuNodeKey: k8sConfig.MinGpuNode, + } + + for k, v := range k8sConfig.ToMap() { + dynamicK8sConfig[k] = v + } + + if err := dynamic.CheckIfValidConfig(dynamic.K8sClusterCreateAction, dynamicK8sConfig); err != nil { + return err + } + + // Update the authConf with the dynamicK8sConfig + castedConf := authConf.(*auth.StaticConfig) + for k, v := range dynamicK8sConfig { + castedConf.Set(k, v) + } + + return nil + } + return k8s.ValidateCluster(ctx, conf.ClusterName, conf.KubeconfigPath, bool(conf.UseSameCluster)) } diff --git a/src/golang/lib/lib_utils/utils.go b/src/golang/lib/lib_utils/utils.go index bd70fae12..59cb684b7 100644 --- a/src/golang/lib/lib_utils/utils.go +++ b/src/golang/lib/lib_utils/utils.go @@ -142,12 +142,37 @@ func ParseK8sConfig(conf auth.Config) (*shared.K8sIntegrationConfig, error) { return nil, err } - var c shared.K8sIntegrationConfig + log.Errorf("logging marshalled config: %s", string(data)) + + var c struct { + KubeconfigPath string `json:"kubeconfig_path"` + ClusterName string `json:"cluster_name"` + UseSameCluster shared.ConfigBool `json:"use_same_cluster"` + Dynamic shared.ConfigBool `json:"dynamic"` + CloudIntegrationId string `json:"cloud_integration_id"` + CloudProvider shared.CloudProviderType `json:"cloud_provider"` + GCPConfigSerialized string `json:"gcp_config_serialized"` + } if err := json.Unmarshal(data, &c); err != nil { return nil, err } - return &c, nil + var gcpConfig shared.GCPConfig + if len(c.GCPConfigSerialized) > 0 { + if err := json.Unmarshal([]byte(c.GCPConfigSerialized), &gcpConfig); err != nil { + return nil, err + } + } + + return &shared.K8sIntegrationConfig{ + KubeconfigPath: c.KubeconfigPath, + ClusterName: c.ClusterName, + UseSameCluster: c.UseSameCluster, + Dynamic: c.Dynamic, + CloudIntegrationId: c.CloudIntegrationId, + CloudProvider: c.CloudProvider, + GCPConfig: &gcpConfig, + }, nil } func ParseLambdaConfig(conf auth.Config) (*shared.LambdaIntegrationConfig, error) { diff --git a/src/golang/lib/models/shared/integration_config.go b/src/golang/lib/models/shared/integration_config.go index 81c891e05..b09000dd6 100644 --- a/src/golang/lib/models/shared/integration_config.go +++ b/src/golang/lib/models/shared/integration_config.go @@ -59,6 +59,8 @@ const ( K8sStatusKey string = "status" K8sLastUsedTimestampKey string = "last_used_timestamp" + K8sCloudProviderKey string = "cloud_provider" + // Dynamic k8s cluster config keys K8sKeepaliveKey string = "keepalive" K8sCpuNodeTypeKey string = "cpu_node_type" @@ -77,20 +79,37 @@ const ( // Dynamic k8s cluster config default values K8sMinimumKeepalive int = 600 K8sDefaultKeepalive int = 1200 - K8sDefaultCpuNodeType string = "t3.xlarge" - K8sDefaultGpuNodeType string = "p2.xlarge" + EKSDefaultCpuNodeType string = "t3.xlarge" + EKSDefaultGpuNodeType string = "p2.xlarge" K8sDefaultMinCpuNode int = 1 K8sDefaultMaxCpuNode int = 1 K8sDefaultMinGpuNode int = 0 K8sDefaultMaxGpuNode int = 1 + GKEDefaultCpuNodeType string = "n1-standard-4" + GKEDefaultGpuNodeType string = "nvidia-tesla-t4" ) +type CloudProviderType string + +const ( + AWSProvider CloudProviderType = "AWS" + GCPProvider CloudProviderType = "GCP" +) + +type GCPConfig struct { + Region string `json:"region" yaml:"region"` + Zone string `json:"zone" yaml:"zone"` + ServiceAccountKey string `json:"service_account_key" yaml:"service_account_key"` +} + type K8sIntegrationConfig struct { - KubeconfigPath string `json:"kubeconfig_path" yaml:"kubeconfigPath"` - ClusterName string `json:"cluster_name" yaml:"clusterName"` - UseSameCluster ConfigBool `json:"use_same_cluster" yaml:"useSameCluster"` - Dynamic ConfigBool `json:"dynamic" yaml:"dynamic"` - CloudIntegrationId string `json:"cloud_integration_id" yaml:"cloud_integration_id"` + KubeconfigPath string `json:"kubeconfig_path" yaml:"kubeconfigPath"` + ClusterName string `json:"cluster_name" yaml:"clusterName"` + UseSameCluster ConfigBool `json:"use_same_cluster" yaml:"useSameCluster"` + Dynamic ConfigBool `json:"dynamic" yaml:"dynamic"` + CloudIntegrationId string `json:"cloud_integration_id" yaml:"cloud_integration_id"` + CloudProvider CloudProviderType `json:"cloud_provider" yaml:"cloud_provider"` + GCPConfig *GCPConfig `json:"gcp_config" yaml:"gcp_config"` } type LambdaIntegrationConfig struct { diff --git a/src/golang/lib/workflow/operator/connector/auth/static_config.go b/src/golang/lib/workflow/operator/connector/auth/static_config.go index 52a2c09d5..ec1c7aee5 100644 --- a/src/golang/lib/workflow/operator/connector/auth/static_config.go +++ b/src/golang/lib/workflow/operator/connector/auth/static_config.go @@ -37,6 +37,7 @@ func (sc *StaticConfig) PublicConfig() map[string]string { "access_key_id", // AWS config. "secret_access_key", // AWS config. "expire_at", // ECR config. + "gcp_config_serialized", // GCP config. } for key, val := range sc.Conf { @@ -66,3 +67,8 @@ func (sc *StaticConfig) Refresh(ctx context.Context) (bool, error) { func (sc *StaticConfig) Set(key, value string) { sc.Conf[key] = value } + +// GetConfigMap returns the config map. +func (sc *StaticConfig) GetConfigMap() map[string]string { + return sc.Conf +} From dacba79636e487b597324df4f183bc9dbe57cfee Mon Sep 17 00:00:00 2001 From: cw75 Date: Fri, 12 May 2023 20:54:08 +0000 Subject: [PATCH 2/8] remove log --- src/golang/lib/lib_utils/utils.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/golang/lib/lib_utils/utils.go b/src/golang/lib/lib_utils/utils.go index 59cb684b7..a90de6286 100644 --- a/src/golang/lib/lib_utils/utils.go +++ b/src/golang/lib/lib_utils/utils.go @@ -142,8 +142,6 @@ func ParseK8sConfig(conf auth.Config) (*shared.K8sIntegrationConfig, error) { return nil, err } - log.Errorf("logging marshalled config: %s", string(data)) - var c struct { KubeconfigPath string `json:"kubeconfig_path"` ClusterName string `json:"cluster_name"` From 617838192676878ef901ebcfe340415266264c59 Mon Sep 17 00:00:00 2001 From: cw75 Date: Fri, 12 May 2023 20:59:57 +0000 Subject: [PATCH 3/8] add basic gke template --- src/terraform/gke/gke.tf | 81 ++++++++++++++++++++++++++++++++++ src/terraform/gke/outputs.tf | 22 +++++++++ src/terraform/gke/variables.tf | 55 +++++++++++++++++++++++ src/terraform/gke/versions.tf | 14 ++++++ src/terraform/gke/vpc.tf | 23 ++++++++++ 5 files changed, 195 insertions(+) create mode 100644 src/terraform/gke/gke.tf create mode 100644 src/terraform/gke/outputs.tf create mode 100644 src/terraform/gke/variables.tf create mode 100644 src/terraform/gke/versions.tf create mode 100644 src/terraform/gke/vpc.tf diff --git a/src/terraform/gke/gke.tf b/src/terraform/gke/gke.tf new file mode 100644 index 000000000..ac0dfd675 --- /dev/null +++ b/src/terraform/gke/gke.tf @@ -0,0 +1,81 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + +# variable "gke_username" { +# default = "" +# description = "gke username" +# } + +# variable "gke_password" { +# default = "" +# description = "gke password" +# } + +# variable "gke_num_nodes" { +# default = 1 +# description = "number of gke nodes" +# } + +# GKE cluster +resource "google_container_cluster" "primary" { + name = var.cluster_name + location = var.region + + # We can't create a cluster with no node pool defined, but we want to only use + # separately managed node pools. So we create the smallest possible default + # node pool and immediately delete it. + remove_default_node_pool = true + initial_node_count = 1 + + network = google_compute_network.vpc.name + subnetwork = google_compute_subnetwork.subnet.name +} + +# Separately Managed Node Pool +resource "google_container_node_pool" "primary_nodes" { + name = google_container_cluster.primary.name + location = var.region + cluster = google_container_cluster.primary.name + node_count = 1 + + node_locations = [var.zone] + + node_config { + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ] + + labels = { + env = var.cluster_name + } + + # preemptible = true + machine_type = "n1-standard-4" + tags = ["gke-node", var.cluster_name] + metadata = { + disable-legacy-endpoints = "true" + } + } +} + + +# # Kubernetes provider +# # The Terraform Kubernetes Provider configuration below is used as a learning reference only. +# # It references the variables and resources provisioned in this file. +# # We recommend you put this in another file -- so you can have a more modular configuration. +# # https://learn.hashicorp.com/terraform/kubernetes/provision-gke-cluster#optional-configure-terraform-kubernetes-provider +# # To learn how to schedule deployments and services using the provider, go here: https://learn.hashicorp.com/tutorials/terraform/kubernetes-provider. + +# provider "kubernetes" { +# load_config_file = "false" + +# host = google_container_cluster.primary.endpoint +# username = var.gke_username +# password = var.gke_password + +# client_certificate = google_container_cluster.primary.master_auth.0.client_certificate +# client_key = google_container_cluster.primary.master_auth.0.client_key +# cluster_ca_certificate = google_container_cluster.primary.master_auth.0.cluster_ca_certificate +# } + diff --git a/src/terraform/gke/outputs.tf b/src/terraform/gke/outputs.tf new file mode 100644 index 000000000..3ca5add06 --- /dev/null +++ b/src/terraform/gke/outputs.tf @@ -0,0 +1,22 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + +output "region" { + value = var.region + description = "GCloud Region" +} + +output "project_id" { + value = var.project_id + description = "GCloud Project ID" +} + +output "kubernetes_cluster_name" { + value = google_container_cluster.primary.name + description = "GKE Cluster Name" +} + +output "kubernetes_cluster_host" { + value = google_container_cluster.primary.endpoint + description = "GKE Cluster Host" +} diff --git a/src/terraform/gke/variables.tf b/src/terraform/gke/variables.tf new file mode 100644 index 000000000..7d1997533 --- /dev/null +++ b/src/terraform/gke/variables.tf @@ -0,0 +1,55 @@ +variable "region" { + description = "GCP region" + type = string +} + +variable "zone" { + description = "GCP zone" + type = string +} + +variable "secret_key" { + description = "GCP service account key" + type = string + sensitive = true +} + +variable "cluster_name" { + description = "The name of the GKE cluster" + type = string +} + +variable "project_id" { + description = "The ID of the GCP project" + type = string +} + +variable "cpu_node_type" { + description = "The instance type of the CPU node group" + type = string +} + +variable "gpu_node_type" { + description = "The instance type of the GPU node group." + type = string +} + +variable "min_cpu_node" { + description = "Minimum number of nodes in the CPU node group" + type = number +} + +variable "max_cpu_node" { + description = "Maximum number of nodes in the CPU node group" + type = number +} + +variable "min_gpu_node" { + description = "Minimum number of nodes in the GPU node group" + type = number +} + +variable "max_gpu_node" { + description = "Maximum number of nodes in the GPU node group" + type = number +} \ No newline at end of file diff --git a/src/terraform/gke/versions.tf b/src/terraform/gke/versions.tf new file mode 100644 index 000000000..5486b0cb5 --- /dev/null +++ b/src/terraform/gke/versions.tf @@ -0,0 +1,14 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "4.64.0" + } + } + + required_version = ">= 0.14" +} + diff --git a/src/terraform/gke/vpc.tf b/src/terraform/gke/vpc.tf new file mode 100644 index 000000000..382108efb --- /dev/null +++ b/src/terraform/gke/vpc.tf @@ -0,0 +1,23 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + +provider "google" { + project = var.project_id + #region = var.region + #credentials = file("./gke_secret.json") + credentials = jsondecode(jsonencode(var.secret_key)) +} + +# VPC +resource "google_compute_network" "vpc" { + name = "${var.cluster_name}-vpc" + auto_create_subnetworks = "false" +} + +# Subnet +resource "google_compute_subnetwork" "subnet" { + name = "${var.cluster_name}-subnet" + region = var.region + network = google_compute_network.vpc.name + ip_cidr_range = "10.10.0.0/24" +} From d8de6bad692f9caf0968314c12fb1e48206ae232 Mon Sep 17 00:00:00 2001 From: cw75 Date: Fri, 12 May 2023 22:19:27 +0000 Subject: [PATCH 4/8] Terraform scripts for on-demand gke cluster provisioning --- sdk/aqueduct/resources/connect_config.py | 6 +- src/golang/lib/job/k8s.go | 4 + src/terraform/gke/.gitignore | 6 ++ src/terraform/gke/gke.tf | 71 ++-------------- src/terraform/gke/main.tf | 22 +++++ src/terraform/gke/nodepools.tf | 102 +++++++++++++++++++++++ src/terraform/gke/terraform.tfvars | 20 +++++ src/terraform/gke/variables.tf | 72 +++++++++++++++- src/terraform/gke/versions.tf | 7 +- src/terraform/gke/vpc.tf | 8 +- 10 files changed, 243 insertions(+), 75 deletions(-) create mode 100644 src/terraform/gke/.gitignore create mode 100644 src/terraform/gke/main.tf create mode 100644 src/terraform/gke/nodepools.tf create mode 100644 src/terraform/gke/terraform.tfvars diff --git a/sdk/aqueduct/resources/connect_config.py b/sdk/aqueduct/resources/connect_config.py index 0a4f6a522..734ce0dae 100644 --- a/sdk/aqueduct/resources/connect_config.py +++ b/sdk/aqueduct/resources/connect_config.py @@ -279,7 +279,7 @@ class K8sConfig(BaseConnectionConfig): # cloud_integration_id: str = "" cloud_provider: Optional[CloudProviderType] gcp_config: Optional[GCPConfig] - # cluster_config: Optional[DynamicK8sConfig] + cluster_config: Optional[DynamicK8sConfig] class _K8sConfigWithSerializedConfig(BaseConnectionConfig): @@ -290,6 +290,7 @@ class _K8sConfigWithSerializedConfig(BaseConnectionConfig): # cloud_integration_id: str = "" cloud_provider: Optional[CloudProviderType] gcp_config_serialized: Optional[str] # this is a json-serialized string of GCPConfig + cluster_config_serialized: Optional[str] # this is a json-serialized string of cluster configuration ResourceConfig = Union[ @@ -433,6 +434,9 @@ def _prepare_k8s_config(config: K8sConfig) -> _K8sConfigWithSerializedConfig: gcp_config_serialized=( None if config.gcp_config is None else config.gcp_config.json(exclude_none=True) ), + cluster_config_serialized=( + None if config.cluster_config is None else config.cluster_config.json(exclude_none=True) + ) ) diff --git a/src/golang/lib/job/k8s.go b/src/golang/lib/job/k8s.go index 164f94984..6e84588bc 100644 --- a/src/golang/lib/job/k8s.go +++ b/src/golang/lib/job/k8s.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strconv" + "time" "github.com/aqueducthq/aqueduct/lib" "github.com/aqueducthq/aqueduct/lib/k8s" @@ -46,6 +47,9 @@ func setupNamespaceAndSecrets(k8sClient *kubernetes.Clientset, conf *K8sJobManag if _, secretExistsErr := k8s.GetSecret(context.Background(), k8s.AwsCredentialsSecretName, k8sClient); secretExistsErr != nil { return errors.Wrap(err, "Error while creating K8s Secrets") } + } else { + log.Error("Successfully created K8s Secrets, waiting 10 seconds before proceeding...") + time.Sleep(10 * time.Second) } return nil diff --git a/src/terraform/gke/.gitignore b/src/terraform/gke/.gitignore new file mode 100644 index 000000000..83754ef00 --- /dev/null +++ b/src/terraform/gke/.gitignore @@ -0,0 +1,6 @@ +**./terraform +*.terraform* +gke_secret.json +gke.plan +terraform.tfstate +terraform.tfstate.backup diff --git a/src/terraform/gke/gke.tf b/src/terraform/gke/gke.tf index ac0dfd675..5536a0d9e 100644 --- a/src/terraform/gke/gke.tf +++ b/src/terraform/gke/gke.tf @@ -1,25 +1,13 @@ # Copyright (c) HashiCorp, Inc. # SPDX-License-Identifier: MPL-2.0 -# variable "gke_username" { -# default = "" -# description = "gke username" -# } - -# variable "gke_password" { -# default = "" -# description = "gke password" -# } - -# variable "gke_num_nodes" { -# default = 1 -# description = "number of gke nodes" -# } - # GKE cluster resource "google_container_cluster" "primary" { name = var.cluster_name - location = var.region + description = var.description + location = local.location + node_locations = local.node_locations + resource_labels = var.cluster_resource_labels # We can't create a cluster with no node pool defined, but we want to only use # separately managed node pools. So we create the smallest possible default @@ -29,53 +17,4 @@ resource "google_container_cluster" "primary" { network = google_compute_network.vpc.name subnetwork = google_compute_subnetwork.subnet.name -} - -# Separately Managed Node Pool -resource "google_container_node_pool" "primary_nodes" { - name = google_container_cluster.primary.name - location = var.region - cluster = google_container_cluster.primary.name - node_count = 1 - - node_locations = [var.zone] - - node_config { - oauth_scopes = [ - "https://www.googleapis.com/auth/logging.write", - "https://www.googleapis.com/auth/monitoring", - ] - - labels = { - env = var.cluster_name - } - - # preemptible = true - machine_type = "n1-standard-4" - tags = ["gke-node", var.cluster_name] - metadata = { - disable-legacy-endpoints = "true" - } - } -} - - -# # Kubernetes provider -# # The Terraform Kubernetes Provider configuration below is used as a learning reference only. -# # It references the variables and resources provisioned in this file. -# # We recommend you put this in another file -- so you can have a more modular configuration. -# # https://learn.hashicorp.com/terraform/kubernetes/provision-gke-cluster#optional-configure-terraform-kubernetes-provider -# # To learn how to schedule deployments and services using the provider, go here: https://learn.hashicorp.com/tutorials/terraform/kubernetes-provider. - -# provider "kubernetes" { -# load_config_file = "false" - -# host = google_container_cluster.primary.endpoint -# username = var.gke_username -# password = var.gke_password - -# client_certificate = google_container_cluster.primary.master_auth.0.client_certificate -# client_key = google_container_cluster.primary.master_auth.0.client_key -# cluster_ca_certificate = google_container_cluster.primary.master_auth.0.cluster_ca_certificate -# } - +} \ No newline at end of file diff --git a/src/terraform/gke/main.tf b/src/terraform/gke/main.tf new file mode 100644 index 000000000..be3eae13a --- /dev/null +++ b/src/terraform/gke/main.tf @@ -0,0 +1,22 @@ +data "google_compute_zones" "available" { + provider = google + + project = var.project_id + region = local.region +} + +resource "random_shuffle" "available_zones" { + input = data.google_compute_zones.available.names + result_count = 3 +} + +locals { + location = var.regional ? var.region : var.zones[0] + region = var.regional ? var.region : join("-", slice(split("-", var.zones[0]), 0, 2)) + + // For regional cluster - use var.zones if provided, use available otherwise, for zonal cluster use var.zones with first element extracted + node_locations = var.regional ? coalescelist(compact(var.zones), sort(random_shuffle.available_zones.result)) : slice(var.zones, 1, length(var.zones)) + + node_pool_names = [for np in toset(var.node_pools) : np.name] + node_pools = zipmap(local.node_pool_names, tolist(toset(var.node_pools))) +} \ No newline at end of file diff --git a/src/terraform/gke/nodepools.tf b/src/terraform/gke/nodepools.tf new file mode 100644 index 000000000..0b2387301 --- /dev/null +++ b/src/terraform/gke/nodepools.tf @@ -0,0 +1,102 @@ +resource "google_container_node_pool" "primary_nodes" { + name = "${var.cluster_name}-cpu-node-pool" + location = local.location + cluster = google_container_cluster.primary.name + initial_node_count = var.initial_node_count + node_locations = local.node_locations + + autoscaling { + location_policy = "BALANCED" + min_node_count = var.min_cpu_node + max_node_count = var.max_cpu_node + } + + management { + auto_repair = true + auto_upgrade = true + } + node_config { + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ] + + labels = { + env = var.project_id + } + + preemptible = true + machine_type = var.cpu_node_type + service_account = var.compute_engine_service_account + tags = ["gke-node", "${var.project_id}-gke"] + + disk_size_gb = var.disk_size_in_gb + disk_type = var.disk_type + metadata = { + disable-legacy-endpoints = "true" + } + reservation_affinity { + consume_reservation_type = "ANY_RESERVATION" + values = [] + } + } + upgrade_settings { + max_surge = 1 + max_unavailable = 0 + strategy = "SURGE" + } +} + +resource "google_container_node_pool" "gpu_nodes" { + count = var.create_gpu_node_pool ? 1 : 0 + name = "${var.cluster_name}-gpu-node-pool" + location = local.location + cluster = google_container_cluster.primary.name + initial_node_count = var.initial_node_count + node_locations = local.node_locations + + autoscaling { + location_policy = "BALANCED" + min_node_count = var.min_gpu_node + max_node_count = var.max_gpu_node + } + + management { + auto_repair = true + auto_upgrade = true + } + + node_config { + oauth_scopes = [ + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ] + + labels = { + env = var.project_id + accelerator = var.gpu_node_type + } + + preemptible = true + machine_type = var.gpu_node_type + tags = ["gke-node", "${var.project_id}-gke"] + metadata = { + disable-legacy-endpoints = "true" + gke-node-accelerator = var.gpu_node_type + } + + reservation_affinity { + consume_reservation_type = "ANY_RESERVATION" + values = [] + } + + guest_accelerator { + count = 1 + type = var.gpu_node_type + } + } + upgrade_settings { + max_surge = 1 + max_unavailable = 0 + } +} \ No newline at end of file diff --git a/src/terraform/gke/terraform.tfvars b/src/terraform/gke/terraform.tfvars new file mode 100644 index 000000000..fba7dd150 --- /dev/null +++ b/src/terraform/gke/terraform.tfvars @@ -0,0 +1,20 @@ +project_id = "seraphic-music-341401" +description = "Aqueduct GKE Cluster" +region = "us-central1" +regional = true +zones = ["us-central1-a"] +min_cpu_node = 1 +max_cpu_node = 10 +min_gpu_node = 0 +max_gpu_node = 2 +cluster_name = "aqueduct-gke-6" +cpu_node_type = "n1-standard-4" +gpu_node_type = "n1-standard-4" +disk_size_in_gb = 50 +disk_type = "pd-balanced" +initial_node_count = 2 +node_count = 1 +max_pods_per_node = 100 +create_gpu_node_pool = false +secret_key = "" +compute_engine_service_account = "24235262657-compute@developer.gserviceaccount.com" \ No newline at end of file diff --git a/src/terraform/gke/variables.tf b/src/terraform/gke/variables.tf index 7d1997533..4aacae861 100644 --- a/src/terraform/gke/variables.tf +++ b/src/terraform/gke/variables.tf @@ -3,11 +3,22 @@ variable "region" { type = string } +variable "zones" { + description = "The zones to host cluster in" + type = list(string) + default = [] +} + variable "zone" { - description = "GCP zone" + description = "GCP Zone" type = string } +variable "regional" { + description = "Whether is a regional cluster (Zonal cluster if set false)" + type = bool +} + variable "secret_key" { description = "GCP service account key" type = string @@ -24,6 +35,17 @@ variable "project_id" { type = string } +variable "description" { + description = "Description of the cluster" + type = string +} + +variable "cluster_resource_labels" { + type = map(string) + description = "The GCE resource labels (a map of key/value pairs) to be applied to the cluster" + default = {} +} + variable "cpu_node_type" { description = "The instance type of the CPU node group" type = string @@ -52,4 +74,50 @@ variable "min_gpu_node" { variable "max_gpu_node" { description = "Maximum number of nodes in the GPU node group" type = number -} \ No newline at end of file +} + +variable "disk_type" { + description = "Type of disk " + type = string +} + +variable "disk_size_in_gb" { + description = "Disk Capacity" + type = number +} + +variable "max_pods_per_node" { + description = "Maximum number of pods per node" + type = number +} + +variable "initial_node_count" { + description = "Initial number of nodes in this pool" + type = number +} + +variable "node_count" { + description = "Number of nodes in this pool" + type = number +} + +variable "create_gpu_node_pool" { + description = "Decide if this resource pool has to be created" + type = bool +} + +variable "node_pools" { + type = list(map(any)) + description = "List of maps containing node pools" + + default = [ + { + name = "default-node-pool" + }, + ] +} + + +variable "compute_engine_service_account" { + description = "Service account to associate to the nodes in the cluster" +} diff --git a/src/terraform/gke/versions.tf b/src/terraform/gke/versions.tf index 5486b0cb5..a5423ea3e 100644 --- a/src/terraform/gke/versions.tf +++ b/src/terraform/gke/versions.tf @@ -7,8 +7,11 @@ terraform { source = "hashicorp/google" version = "4.64.0" } + + kubernetes = { + source = "hashicorp/kubernetes" + } } required_version = ">= 0.14" -} - +} \ No newline at end of file diff --git a/src/terraform/gke/vpc.tf b/src/terraform/gke/vpc.tf index 382108efb..19c350dbc 100644 --- a/src/terraform/gke/vpc.tf +++ b/src/terraform/gke/vpc.tf @@ -3,8 +3,8 @@ provider "google" { project = var.project_id - #region = var.region - #credentials = file("./gke_secret.json") + region = local.region + # credentials = file("./gke_secret.json") credentials = jsondecode(jsonencode(var.secret_key)) } @@ -17,7 +17,7 @@ resource "google_compute_network" "vpc" { # Subnet resource "google_compute_subnetwork" "subnet" { name = "${var.cluster_name}-subnet" - region = var.region + region = local.region network = google_compute_network.vpc.name ip_cidr_range = "10.10.0.0/24" -} +} \ No newline at end of file From c68c2deb69920a2b75fd16cc06c4c502aa28fe1d Mon Sep 17 00:00:00 2001 From: cw75 Date: Tue, 30 May 2023 23:13:24 +0000 Subject: [PATCH 5/8] minor ui fix --- .../resources/cards/kubernetesCard.tsx | 23 +++++++++++++++---- src/ui/common/src/utils/resources.ts | 1 + 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/ui/common/src/components/resources/cards/kubernetesCard.tsx b/src/ui/common/src/components/resources/cards/kubernetesCard.tsx index 87fcf3abb..9462e9b7b 100644 --- a/src/ui/common/src/components/resources/cards/kubernetesCard.tsx +++ b/src/ui/common/src/components/resources/cards/kubernetesCard.tsx @@ -1,3 +1,5 @@ +import { Typography } from '@mui/material'; +import Box from '@mui/material/Box'; import React from 'react'; import { KubernetesConfig, Resource } from '../../../utils/resources'; @@ -10,9 +12,22 @@ type Props = { export const KubernetesCard: React.FC = ({ resource }) => { const config = resource.config as KubernetesConfig; return ( - + + + {config.cloud_provider === 'GCP' && ( + + + Managed by Aqueduct on GCP + + + )} + ); }; diff --git a/src/ui/common/src/utils/resources.ts b/src/ui/common/src/utils/resources.ts index 531fa40d0..cea5a9310 100644 --- a/src/ui/common/src/utils/resources.ts +++ b/src/ui/common/src/utils/resources.ts @@ -214,6 +214,7 @@ export type KubernetesConfig = { kubeconfig_path: string; cluster_name: string; use_same_cluster: string; + cloud_provider?: string; }; export type LambdaConfig = { From 6ef6a423cb210d5fdf9e6fdb3dd370e48b3bb852 Mon Sep 17 00:00:00 2001 From: Jerome Purushotham Date: Tue, 30 May 2023 17:04:44 -0700 Subject: [PATCH 6/8] Updated terraform scripts (#1386) --- src/terraform/gke/gke.tf | 6 +-- src/terraform/gke/main.tf | 18 +-------- src/terraform/gke/nodepools.tf | 33 ++++++---------- src/terraform/gke/terraform.tfvars | 20 ---------- src/terraform/gke/variables.tf | 60 ++++-------------------------- src/terraform/gke/vpc.tf | 4 +- 6 files changed, 25 insertions(+), 116 deletions(-) delete mode 100644 src/terraform/gke/terraform.tfvars diff --git a/src/terraform/gke/gke.tf b/src/terraform/gke/gke.tf index 5536a0d9e..96bceed37 100644 --- a/src/terraform/gke/gke.tf +++ b/src/terraform/gke/gke.tf @@ -5,15 +5,13 @@ resource "google_container_cluster" "primary" { name = var.cluster_name description = var.description - location = local.location - node_locations = local.node_locations - resource_labels = var.cluster_resource_labels + location = var.region # We can't create a cluster with no node pool defined, but we want to only use # separately managed node pools. So we create the smallest possible default # node pool and immediately delete it. remove_default_node_pool = true - initial_node_count = 1 + initial_node_count = var.initial_node_count network = google_compute_network.vpc.name subnetwork = google_compute_subnetwork.subnet.name diff --git a/src/terraform/gke/main.tf b/src/terraform/gke/main.tf index be3eae13a..7839b081a 100644 --- a/src/terraform/gke/main.tf +++ b/src/terraform/gke/main.tf @@ -2,21 +2,5 @@ data "google_compute_zones" "available" { provider = google project = var.project_id - region = local.region -} - -resource "random_shuffle" "available_zones" { - input = data.google_compute_zones.available.names - result_count = 3 -} - -locals { - location = var.regional ? var.region : var.zones[0] - region = var.regional ? var.region : join("-", slice(split("-", var.zones[0]), 0, 2)) - - // For regional cluster - use var.zones if provided, use available otherwise, for zonal cluster use var.zones with first element extracted - node_locations = var.regional ? coalescelist(compact(var.zones), sort(random_shuffle.available_zones.result)) : slice(var.zones, 1, length(var.zones)) - - node_pool_names = [for np in toset(var.node_pools) : np.name] - node_pools = zipmap(local.node_pool_names, tolist(toset(var.node_pools))) + region = var.region } \ No newline at end of file diff --git a/src/terraform/gke/nodepools.tf b/src/terraform/gke/nodepools.tf index 0b2387301..58f083bfd 100644 --- a/src/terraform/gke/nodepools.tf +++ b/src/terraform/gke/nodepools.tf @@ -1,9 +1,9 @@ resource "google_container_node_pool" "primary_nodes" { name = "${var.cluster_name}-cpu-node-pool" - location = local.location + location = var.region cluster = google_container_cluster.primary.name - initial_node_count = var.initial_node_count - node_locations = local.node_locations + node_locations = [var.zone] + node_count = var.min_cpu_node autoscaling { location_policy = "BALANCED" @@ -25,35 +25,25 @@ resource "google_container_node_pool" "primary_nodes" { env = var.project_id } + disk_size_gb = var.disk_size_in_gb preemptible = true machine_type = var.cpu_node_type - service_account = var.compute_engine_service_account tags = ["gke-node", "${var.project_id}-gke"] - - disk_size_gb = var.disk_size_in_gb - disk_type = var.disk_type - metadata = { - disable-legacy-endpoints = "true" } - reservation_affinity { - consume_reservation_type = "ANY_RESERVATION" - values = [] + upgrade_settings { + max_surge = 1 + max_unavailable = 0 + strategy = "SURGE" } - } - upgrade_settings { - max_surge = 1 - max_unavailable = 0 - strategy = "SURGE" - } } resource "google_container_node_pool" "gpu_nodes" { count = var.create_gpu_node_pool ? 1 : 0 name = "${var.cluster_name}-gpu-node-pool" - location = local.location + location = var.region cluster = google_container_cluster.primary.name - initial_node_count = var.initial_node_count - node_locations = local.node_locations + node_locations = [var.zone] + node_count = var.min_gpu_node autoscaling { location_policy = "BALANCED" @@ -77,6 +67,7 @@ resource "google_container_node_pool" "gpu_nodes" { accelerator = var.gpu_node_type } + disk_size_gb = var.disk_size_in_gb preemptible = true machine_type = var.gpu_node_type tags = ["gke-node", "${var.project_id}-gke"] diff --git a/src/terraform/gke/terraform.tfvars b/src/terraform/gke/terraform.tfvars deleted file mode 100644 index fba7dd150..000000000 --- a/src/terraform/gke/terraform.tfvars +++ /dev/null @@ -1,20 +0,0 @@ -project_id = "seraphic-music-341401" -description = "Aqueduct GKE Cluster" -region = "us-central1" -regional = true -zones = ["us-central1-a"] -min_cpu_node = 1 -max_cpu_node = 10 -min_gpu_node = 0 -max_gpu_node = 2 -cluster_name = "aqueduct-gke-6" -cpu_node_type = "n1-standard-4" -gpu_node_type = "n1-standard-4" -disk_size_in_gb = 50 -disk_type = "pd-balanced" -initial_node_count = 2 -node_count = 1 -max_pods_per_node = 100 -create_gpu_node_pool = false -secret_key = "" -compute_engine_service_account = "24235262657-compute@developer.gserviceaccount.com" \ No newline at end of file diff --git a/src/terraform/gke/variables.tf b/src/terraform/gke/variables.tf index 4aacae861..9fbcf98f6 100644 --- a/src/terraform/gke/variables.tf +++ b/src/terraform/gke/variables.tf @@ -3,22 +3,11 @@ variable "region" { type = string } -variable "zones" { - description = "The zones to host cluster in" - type = list(string) - default = [] -} - variable "zone" { description = "GCP Zone" type = string } -variable "regional" { - description = "Whether is a regional cluster (Zonal cluster if set false)" - type = bool -} - variable "secret_key" { description = "GCP service account key" type = string @@ -38,12 +27,7 @@ variable "project_id" { variable "description" { description = "Description of the cluster" type = string -} - -variable "cluster_resource_labels" { - type = map(string) - description = "The GCE resource labels (a map of key/value pairs) to be applied to the cluster" - default = {} + default = "" } variable "cpu_node_type" { @@ -76,48 +60,20 @@ variable "max_gpu_node" { type = number } -variable "disk_type" { - description = "Type of disk " - type = string -} - -variable "disk_size_in_gb" { - description = "Disk Capacity" - type = number -} - -variable "max_pods_per_node" { - description = "Maximum number of pods per node" - type = number -} - variable "initial_node_count" { description = "Initial number of nodes in this pool" type = number -} - -variable "node_count" { - description = "Number of nodes in this pool" - type = number + default = 1 } variable "create_gpu_node_pool" { description = "Decide if this resource pool has to be created" type = bool + default = false } -variable "node_pools" { - type = list(map(any)) - description = "List of maps containing node pools" - - default = [ - { - name = "default-node-pool" - }, - ] -} - - -variable "compute_engine_service_account" { - description = "Service account to associate to the nodes in the cluster" -} +variable "disk_size_in_gb" { + description = "Disk Capacity" + type = number + default = 50 +} \ No newline at end of file diff --git a/src/terraform/gke/vpc.tf b/src/terraform/gke/vpc.tf index 19c350dbc..5fbbfdf65 100644 --- a/src/terraform/gke/vpc.tf +++ b/src/terraform/gke/vpc.tf @@ -3,7 +3,7 @@ provider "google" { project = var.project_id - region = local.region + region = var.region # credentials = file("./gke_secret.json") credentials = jsondecode(jsonencode(var.secret_key)) } @@ -17,7 +17,7 @@ resource "google_compute_network" "vpc" { # Subnet resource "google_compute_subnetwork" "subnet" { name = "${var.cluster_name}-subnet" - region = local.region + region = var.region network = google_compute_network.vpc.name ip_cidr_range = "10.10.0.0/24" } \ No newline at end of file From 2d42a21496ed5d49d602ad61db86cc24336f5097 Mon Sep 17 00:00:00 2001 From: cw75 Date: Wed, 31 May 2023 05:36:52 +0000 Subject: [PATCH 7/8] remove gpu from UI --- .../resources/dialogs/gcpDialog.tsx | 39 ------------------- src/ui/common/src/utils/resources.ts | 3 -- 2 files changed, 42 deletions(-) diff --git a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx index 2f3159cb1..6be630989 100644 --- a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx +++ b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx @@ -20,11 +20,8 @@ const Placeholders: OndemandGKEConfig = { gcp_config_serialized: '', keepalive: '1200', cpu_node_type: 'n1-standard-4', - gpu_node_type: 'g2-standard-4', min_cpu_node: '1', max_cpu_node: '1', - min_gpu_node: '0', - max_gpu_node: '1', }; const GCPPlaceholders: GCPConfig = { @@ -84,18 +81,6 @@ export const GCPDialog: React.FC< }} /> - { - setValue('gpu_node_type', event.target.value); - }} - /> - - - { - setValue('min_gpu_node', event.target.value); - }} - /> - - { - setValue('max_gpu_node', event.target.value); - }} - /> ); diff --git a/src/ui/common/src/utils/resources.ts b/src/ui/common/src/utils/resources.ts index ce89333c8..9569f5f3b 100644 --- a/src/ui/common/src/utils/resources.ts +++ b/src/ui/common/src/utils/resources.ts @@ -264,11 +264,8 @@ export type OndemandGKEConfig = { gcp_config_serialized: string; keepalive: string; cpu_node_type: string; - gpu_node_type: string; min_cpu_node: string; max_cpu_node: string; - min_gpu_node: string; - max_gpu_node: string; }; export type GCPConfig = { From df47823a9c764464a0a2fe7cb3c55fe109fc503f Mon Sep 17 00:00:00 2001 From: cw75 Date: Wed, 31 May 2023 08:13:42 +0000 Subject: [PATCH 8/8] cleanup --- sdk/aqueduct/resources/connect_config.py | 6 ------ src/golang/lib/dynamic/engine.go | 3 --- src/python/bin/aqueduct | 10 ++++++++++ src/terraform/gke/vpc.tf | 1 - .../src/components/resources/dialogs/gcpDialog.tsx | 2 +- .../resources/dialogs/onDemandKubernetesDialog.tsx | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sdk/aqueduct/resources/connect_config.py b/sdk/aqueduct/resources/connect_config.py index 12b75785c..64658b08f 100644 --- a/sdk/aqueduct/resources/connect_config.py +++ b/sdk/aqueduct/resources/connect_config.py @@ -280,8 +280,6 @@ class K8sConfig(BaseConnectionConfig): kubeconfig_path: str = "" cluster_name: str = "" use_same_cluster: str = "false" - # dynamic: str = "false" - # cloud_integration_id: str = "" cloud_provider: Optional[CloudProviderType] gcp_config: Optional[GCPConfig] cluster_config: Optional[DynamicK8sConfig] @@ -291,8 +289,6 @@ class _K8sConfigWithSerializedConfig(BaseConnectionConfig): kubeconfig_path: str cluster_name: str use_same_cluster: str = "false" - # dynamic: str = "false" - # cloud_integration_id: str = "" cloud_provider: Optional[CloudProviderType] gcp_config_serialized: Optional[str] # this is a json-serialized string of GCPConfig # add fields from DynamicK8sConfig @@ -451,8 +447,6 @@ def _prepare_k8s_config(config: K8sConfig) -> _K8sConfigWithSerializedConfig: kubeconfig_path=config.kubeconfig_path, cluster_name=config.cluster_name, use_same_cluster=config.use_same_cluster, - # dynamic=config.dynamic, - # cloud_integration_id=config.cloud_integration_id, cloud_provider=config.cloud_provider, gcp_config_serialized=( None if config.gcp_config is None else config.gcp_config.json(exclude_none=True) diff --git a/src/golang/lib/dynamic/engine.go b/src/golang/lib/dynamic/engine.go index 267c041a8..6cb61e24a 100644 --- a/src/golang/lib/dynamic/engine.go +++ b/src/golang/lib/dynamic/engine.go @@ -312,9 +312,6 @@ func CreateOrUpdateK8sCluster( if err := os.Remove(serviceAccountKeyPath); err != nil { return errors.Wrap(err, "Failed to remove temporary service account key file") } - - log.Info("Successfully created ondemand GKE cluster, waiting 10 seconds before proceeding...") - time.Sleep(10 * time.Second) } } diff --git a/src/python/bin/aqueduct b/src/python/bin/aqueduct index 558a385c6..0c46a07ea 100755 --- a/src/python/bin/aqueduct +++ b/src/python/bin/aqueduct @@ -225,6 +225,7 @@ def setup_server_binaries(): def download_terraform_template(): + # EKS template terraform_folder = os.path.join(server_directory, "template", "aws", "eks") terraform_zip_path = os.path.join(terraform_folder, "eks_terraform.zip") with open(terraform_zip_path, "wb") as f: @@ -232,6 +233,14 @@ def download_terraform_template(): with zipfile.ZipFile(terraform_zip_path, "r") as zip: zip.extractall(terraform_folder) os.remove(terraform_zip_path) + # GKE template + terraform_folder = os.path.join(server_directory, "template", "gke") + terraform_zip_path = os.path.join(terraform_folder, "gke_terraform.zip") + with open(terraform_zip_path, "wb") as f: + _download_file(os.path.join(s3_server_prefix, "template", "gke", "gke_terraform.zip"), f) + with zipfile.ZipFile(terraform_zip_path, "r") as zip: + zip.extractall(terraform_folder) + os.remove(terraform_zip_path) def update_ui_version(): @@ -345,6 +354,7 @@ def update(): os.path.join(server_directory, "template"), os.path.join(server_directory, "template", "aws"), os.path.join(server_directory, "template", "aws", "eks"), + os.path.join(server_directory, "template", "gke"), ] for directory in template_directories: diff --git a/src/terraform/gke/vpc.tf b/src/terraform/gke/vpc.tf index 5fbbfdf65..d37d352f6 100644 --- a/src/terraform/gke/vpc.tf +++ b/src/terraform/gke/vpc.tf @@ -4,7 +4,6 @@ provider "google" { project = var.project_id region = var.region - # credentials = file("./gke_secret.json") credentials = jsondecode(jsonencode(var.secret_key)) } diff --git a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx index 6be630989..0657b27cc 100644 --- a/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx +++ b/src/ui/common/src/components/resources/dialogs/gcpDialog.tsx @@ -74,7 +74,7 @@ export const GCPDialog: React.FC< spellCheck={false} required={false} label="CPU node type" - description="The EC2 instance type of the CPU node group." + description="The instance type of the CPU node group." placeholder={Placeholders.cpu_node_type} onChange={(event) => { setValue('cpu_node_type', event.target.value); diff --git a/src/ui/common/src/components/resources/dialogs/onDemandKubernetesDialog.tsx b/src/ui/common/src/components/resources/dialogs/onDemandKubernetesDialog.tsx index 835a1d472..acf665ee8 100644 --- a/src/ui/common/src/components/resources/dialogs/onDemandKubernetesDialog.tsx +++ b/src/ui/common/src/components/resources/dialogs/onDemandKubernetesDialog.tsx @@ -256,7 +256,7 @@ const OndemandK8sAWSDialog: React.FC> = ({ @@ -347,7 +347,7 @@ const OnDemandK8sGCPStep: React.FC> = ({