Skip to content

Commit

Permalink
Improve the way flags/config are set & read.
Browse files Browse the repository at this point in the history
Provides a reasonable fix for #1
  • Loading branch information
cyrildiagne committed Oct 12, 2019
1 parent 23f9681 commit ec01ad6
Show file tree
Hide file tree
Showing 15 changed files with 63 additions and 55 deletions.
2 changes: 1 addition & 1 deletion cmd/app-delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func delete(app string) error {
volumes := []string{dir + ":/app_home"}

// Run the command.
dockerErr := RunDockerWithProviderEnvs(docker.CommandOption{
dockerErr := RunDockerWithEnvs(docker.CommandOption{
Image: image,
Command: command,
AppendVolumes: volumes,
Expand Down
2 changes: 1 addition & 1 deletion cmd/app-deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func deploy(app string) error {
volumes := []string{dir + ":/app_home"}

// Run the command.
dockerErr := RunDockerWithProviderEnvs(docker.CommandOption{
dockerErr := RunDockerWithEnvs(docker.CommandOption{
Image: image,
Command: command,
AppendVolumes: volumes,
Expand Down
2 changes: 1 addition & 1 deletion cmd/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ func Delete() error {
// Command to run.
command := []string{"kuda_delete"}
// Run.
err := RunDockerWithProviderEnvs(docker.CommandOption{Image: image, Command: command})
err := RunDockerWithEnvs(docker.CommandOption{Image: image, Command: command})
return err
}
2 changes: 1 addition & 1 deletion cmd/dev-start.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func start(devImage string) error {
volumes := []string{dir + ":/app_home"}

// Run the command.
dockerErr := RunDockerWithProviderEnvs(docker.CommandOption{
dockerErr := RunDockerWithEnvs(docker.CommandOption{
Image: image,
Command: command,
AppendVolumes: volumes,
Expand Down
2 changes: 1 addition & 1 deletion cmd/dev-stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ func Stop() error {
// Command to run.
command := []string{"kuda_dev_stop"}
// Run
err := RunDockerWithProviderEnvs(docker.CommandOption{Image: image, Command: command})
err := RunDockerWithEnvs(docker.CommandOption{Image: image, Command: command})
return err
}
2 changes: 1 addition & 1 deletion cmd/get.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func get(property string) error {
command := []string{"kuda_get", property}

// Run the command.
dockerErr := RunDockerWithProviderEnvs(docker.CommandOption{
dockerErr := RunDockerWithEnvs(docker.CommandOption{
Image: image,
Command: command,
})
Expand Down
26 changes: 15 additions & 11 deletions cmd/setup-gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ import (
)

var provider = "gcp"
var project string
var credentials string
var providerVersion = "1.2.0"

// gcpCmd represents the `setup gcp` command
var gcpCmd = &cobra.Command{
Expand All @@ -45,27 +44,32 @@ var gcpCmd = &cobra.Command{
func init() {
setupCmd.AddCommand(gcpCmd)

gcpCmd.PersistentFlags().StringVarP(&project, "project", "p", "",
"GCP Project ID")
gcpCmd.MarkPersistentFlagRequired("project")
viper.BindPFlag("gcp_project_id", gcpCmd.PersistentFlags().Lookup("project"))
gcpCmd.PersistentFlags().StringP("gcp_project_id", "p", "", "GCP Project ID")
gcpCmd.MarkPersistentFlagRequired("gcp_project_id")

gcpCmd.PersistentFlags().StringVarP(&credentials, "credentials", "c", "",
"Path to GCP credentials JSON")
gcpCmd.MarkPersistentFlagRequired("credentials")
viper.BindPFlag("gcp_credentials", gcpCmd.PersistentFlags().Lookup("credentials"))
gcpCmd.PersistentFlags().StringP("gcp_credentials", "c", "", "Path to GCP credentials JSON")
gcpCmd.MarkPersistentFlagRequired("gcp_credentials")
viper.BindPFlags(gcpCmd.PersistentFlags())

gcpCmd.Flags().String("gcp_cluster_name", "kuda", "Name of the cluster.")
gcpCmd.Flags().String("gcp_compute_zone", "us-central1-a", "Compute Zone for the cluster.")
gcpCmd.Flags().String("gcp_machine_type", "n1-standard-4", "Machine type.")
gcpCmd.Flags().Int("gcp_pool_num_nodes", 1, "Default number of nodes on the system pool. ")
gcpCmd.Flags().String("gcp_gpu", "k80", "Default GPU to use")
gcpCmd.Flags().Bool("gcp_use_preemptible", false, "Wether or not to use pre-emptible instances")
viper.BindPFlags(gcpCmd.Flags())
}

func setup() error {
// Set provider config.
viper.Set("provider", provider)

// Setup the provider's image.
providerVersion := "1.2.0"
image := "gcr.io/kuda-project/provider-" + provider + ":" + providerVersion
viper.Set("image", image)

// Setup the volume mounting for the credentials.
credentials := viper.GetString("gcp_credentials")
volumeSecret := docker.VolumeMapping{
From: filepath.Dir(credentials),
To: "/secret",
Expand Down
2 changes: 1 addition & 1 deletion cmd/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func Setup() error {
// Command to run.
command := []string{"kuda_setup"}
// Run
err := RunDockerWithProviderEnvs(docker.CommandOption{Image: image, Command: command})
err := RunDockerWithEnvs(docker.CommandOption{Image: image, Command: command})
if err != nil {
// Ask if we should delete the cluster.
fmt.Print("There was an error setting up the cluster. Do you want to delete it? (y/n) ")
Expand Down
18 changes: 7 additions & 11 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,16 @@ import (
"github.com/spf13/viper"
)

// RunDockerWithProviderEnvs retrieves local environment variables
// RunDockerWithEnvs retrieves local environment variables
// that match a provider id and runs a docker image.
func RunDockerWithProviderEnvs(opts docker.CommandOption) error {
func RunDockerWithEnvs(opts docker.CommandOption) error {
// Environment variables for the Docker image.
// We look for all the configs that start with the provider name
// "gcp" and convert them in the environment variable
// format KUDA_GCP_*
provider := viper.GetString("provider")
// Convert all the viper configs to environment variable
// in the format KUDA_* where * is the config uppercased.
for k, e := range viper.AllSettings() {
if strings.HasPrefix(k, provider) {
key := "KUDA_" + strings.ToUpper(k)
value := fmt.Sprintf("%v", e)
opts.AppendEnv = append(opts.AppendEnv, key+"="+value)
}
key := "KUDA_" + strings.ToUpper(k)
value := fmt.Sprintf("%v", e)
opts.AppendEnv = append(opts.AppendEnv, key+"="+value)
}
return docker.RunDockerCommand(opts)
}
4 changes: 2 additions & 2 deletions docs/kuda/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ This command also allocates an empty GPU node that will be provisioned only when

**Flags for `gcp`:**

- **`[-p | --project]`**: A GCP project ID.
- **`[-c | --credentials]`**: Path to a GCP credentials file.
- **`[-p | --gcp_project_id]`**: An existing GCP project ID.
- **`[-c | --gcp_credentials]`**: Path to a GCP credentials file.

### → Delete

Expand Down
21 changes: 8 additions & 13 deletions images/providers/gcp/.config.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,25 @@
set -e

echo
echo -e "\e[1m \e[34mKuda GCP provider \e[36mv$(cat /kuda_cmd/VERSION) \e[0m"
echo

# Make sure gcp_project_id is set.
if [ -z "$KUDA_GCP_PROJECT_ID" ]; then
echo "\$KUDA_GCP_PROJECT_ID is undefined."
exit 1
fi

# Make sure gcp_credentials is set.
if [ -z "$KUDA_GCP_CREDENTIALS" ]; then
echo "\$KUDA_GCP_CREDENTIALS is undefined."
exit 1
fi

echo
echo -e "\e[1m \e[34mKuda GCP provider \e[36mv$(cat /kuda_cmd/VERSION) \e[0m"
echo

# Setup credential path to mounted volume in the docker image.
export KUDA_GCP_CREDENTIALS=/secret/$(basename $KUDA_GCP_CREDENTIALS)

# Set default config.
export KUDA_GCP_CLUSTER_NAME="${KUDA_GCP_CLUSTER_NAME:-kuda}"
export KUDA_GCP_COMPUTE_ZONE="${KUDA_GCP_COMPUTE_ZONE:-us-central1-a}"
export KUDA_GCP_MACHINE_TYPE="${KUDA_GCP_MACHINE_TYPE:-n1-standard-2}"

export KUDA_DEFAULT_POOL_NUM_NODES="${KUDA_DEFAULT_POOL_NUM_NODES:-1}"
export KUDA_DEFAULT_GPU="${KUDA_DEFAULT_GPU:-k80}"
export KUDA_DEFAULT_USE_PREEMPTIBLE="${KUDA_DEFAULT_USE_PREEMPTIBLE:-false}"

# Set default Kuda Dev config.
export KUDA_DEV_APP_NAME="${KUDA_DEV_APP_NAME:-kuda-dev}"
export KUDA_DEV_SYNC_PATH="${KUDA_DEV_SYNC_PATH:-/app_home}"

Expand Down
19 changes: 16 additions & 3 deletions images/providers/gcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,25 @@ gcloud projects add-iam-policy-binding <project> \
--role=roles/container.admin
```

# Configuration

You can override the following settings by adding them as flags of the `kuda setup` command (ex: `kuda setup gcp ... --gcp_cluster_name=mycluster`).

| Parameter | Default | Description |
| - | - | - |
| `gcp_project_id` | None (Required) | The GCP Project ID |
| `gcp_credentials` | None (Required) | Path to the GCP Credential JSON file |
| `gcp_cluster_name` | kuda | The new or existing cluster name |
| `gcp_compute_zone` | us-central1-a | The GCP compute zone |
| `gcp_machine_type` | n1-standard-4 | Default machine type for the nodes (Only evaluated during `setup`)|
| `gcp_pool_num_nodes` | 1 | Default number of nodes of the GPU pool (Only evaluated during `setup`) |
| `gcp_gpu` | k80 | The default GPU to use. (Only evaluated during `setup`) |
| `gcp_use_preemptible` | false | Wether or not the GPU nodes should be preemptible. (Only evaluated during `setup`) |


# Limitations

- The "Compute Engine API - GPUs (all regions)" quota must be requested manually [here](<https://console.cloud.google.com/iam-admin/quotas?metric=GPUs%20(all%20regions)>)
- By default, the system node and the load balancer will be kept on, incuring charges of about 45€ per months. You can manually scale down the system node to 0 to temporarily stop its associated charges or run `kuda delete` to completely delete the cluster.
- Currently the load balancer doesn't get deleted when you delete the cluster. Make sure to delete it manually [here](https://console.cloud.google.com/net-services/loadbalancing/loadBalancers/list) after deleting a cluster to avoid extra costs.
- You can find a list of parameters that you can override in `.config.sh`

# Development

2 changes: 1 addition & 1 deletion images/providers/gcp/dev_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ gcloud container clusters get-credentials $KUDA_GCP_CLUSTER_NAME

# TODO: Increase the number of GPU nodes by 1 to speed up initialization.
# gcloud container clusters resize $KUDA_GCP_CLUSTER_NAME \
# --node-pool $KUDA_DEFAULT_GPU \
# --node-pool $KUDA_GCP_GPU \
# --num-nodes 1 \
# --quiet

Expand Down
2 changes: 1 addition & 1 deletion images/providers/gcp/dev_stop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ kubectl delete virtualservice kuda-dev
# Resize the GPU cluster to 0. > Not mandatory
# since the autoscaler will automatically scale down to 0 after a while.
# gcloud container clusters resize $KUDA_GCP_CLUSTER_NAME \
# --node-pool $KUDA_DEFAULT_GPU \
# --node-pool $KUDA_GCP_GPU \
# --num-nodes 0 \
# --quiet
12 changes: 6 additions & 6 deletions images/providers/gcp/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function create_main_cluster() {
--cluster-version=latest \
--zone=$KUDA_GCP_COMPUTE_ZONE \
--scopes cloud-platform \
--num-nodes $KUDA_DEFAULT_POOL_NUM_NODES \
--num-nodes $KUDA_GCP_POOL_NUM_NODES \
--enable-stackdriver-kubernetes \
--issue-client-certificate \
--enable-basic-auth \
Expand All @@ -31,13 +31,13 @@ function create_main_cluster() {

function create_gpu_nodepools() {
preemptible_mode=""
if [ $KUDA_DEFAULT_USE_PREEMPTIBLE = true ]; then
if [ $KUDA_GCP_USE_PREEMPTIBLE = true ]; then
preemptible_mode='--preemptible'
fi
# Create the default GPU Node pool.
gcloud container node-pools create $KUDA_DEFAULT_GPU \
gcloud container node-pools create $KUDA_GCP_GPU \
--machine-type=$KUDA_GCP_MACHINE_TYPE \
--accelerator type=nvidia-tesla-$KUDA_DEFAULT_GPU,count=1 \
--accelerator type=nvidia-tesla-$KUDA_GCP_GPU,count=1 \
--zone $KUDA_GCP_COMPUTE_ZONE \
--cluster $KUDA_GCP_CLUSTER_NAME \
--num-nodes 1 \
Expand Down Expand Up @@ -109,10 +109,10 @@ gcloud container clusters get-credentials $KUDA_GCP_CLUSTER_NAME
# Check if GPU cluster exists otherwise create one.
if gcloud container node-pools list \
--zone $KUDA_GCP_COMPUTE_ZONE \
--cluster $KUDA_GCP_CLUSTER_NAME | grep -q $KUDA_DEFAULT_GPU; then
--cluster $KUDA_GCP_CLUSTER_NAME | grep -q $KUDA_GCP_GPU; then
echo "GPU node pool already exists."
else
echo "Creating new GPU node pool with default GPU $KUDA_DEFAULT_GPU"
echo "Creating new GPU node pool with default GPU $KUDA_GCP_GPU"
create_gpu_nodepools
fi

Expand Down

0 comments on commit ec01ad6

Please sign in to comment.