Skip to content

Commit

Permalink
Add e2e test for FromVolume ResumePolicy (#1284)
Browse files Browse the repository at this point in the history
* Add e2e test for from volume resume

* Resume experiment after completion

* Print controller logs

* Remove test prints

* Remove controller logs
  • Loading branch information
andreyvelich authored Jul 31, 2020
1 parent a42d8a9 commit ac1dc24
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 23 deletions.
48 changes: 34 additions & 14 deletions test/e2e/v1beta1/resume-e2e-experiment.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ func main() {
if err != nil {
log.Fatal("Get Experiment error. Experiment not created yet ", err)
}
if exp.Spec.Algorithm.AlgorithmName != "hyperband" {

var maxtrials int32 = 7
var paralleltrials int32 = 3
if exp.Spec.Algorithm.AlgorithmName != "hyperband" && exp.Spec.Algorithm.AlgorithmName != "darts" {
// Hyperband will validate the parallel trial count,
// thus we should not change it.
var maxtrials int32 = 7
var paralleltrials int32 = 3
// Not necessary to test parallel Trials for Darts
exp.Spec.MaxTrialCount = &maxtrials
exp.Spec.ParallelTrialCount = &paralleltrials
}
Expand All @@ -88,10 +90,11 @@ func main() {
if err != nil {
log.Fatal("Get Experiment error ", err)
}
if exp.IsRunning() {
log.Printf("Experiment %v started running", exp.Name)
if exp.IsRunning() && exp.Status.Trials == maxtrials {
log.Printf("Experiment %v started running with %v MaxTrialCount", exp.Name, maxtrials)
break
}

time.Sleep(5 * time.Second)
}

Expand Down Expand Up @@ -123,6 +126,7 @@ func main() {
log.Printf("Experiment %v finished", exp.Name)
break
}

time.Sleep(20 * time.Second)
}

Expand Down Expand Up @@ -163,24 +167,40 @@ func main() {
sug, err := kclient.GetSuggestion(exp.Name, exp.Namespace)
if exp.Spec.ResumePolicy == experimentsv1beta1.LongRunning {
if sug.IsSucceeded() {
log.Fatal("Suggestion is terminated while ResumePolicy = LongRunning")
log.Fatal("Suggestion is succeeded while ResumePolicy = LongRunning")
}
}
if exp.Spec.ResumePolicy == experimentsv1beta1.NeverResume {
if exp.Spec.ResumePolicy == experimentsv1beta1.NeverResume || exp.Spec.ResumePolicy == experimentsv1beta1.FromVolume {
if sug.IsRunning() {
log.Fatal("Suggestion is still running while ResumePolicy = NeverResume")
log.Fatalf("Suggestion is still running while ResumePolicy = %v", exp.Spec.ResumePolicy)
}

namespacedName := types.NamespacedName{Name: controllerUtil.GetAlgorithmServiceName(sug), Namespace: sug.Namespace}
service := &corev1.Service{}
err := kclient.GetClient().Get(context.TODO(), namespacedName, service)
err := kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.Service{})
if err == nil || !errors.IsNotFound(err) {
log.Fatal("Suggestion service is still alive while ResumePolicy = NeverResume")
log.Fatalf("Suggestion service is still alive while ResumePolicy = %v", exp.Spec.ResumePolicy)
}
log.Printf("Suggestion service %v has been deleted", controllerUtil.GetAlgorithmServiceName(sug))

namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmDeploymentName(sug), Namespace: sug.Namespace}
deployment := &appsv1.Deployment{}
err = kclient.GetClient().Get(context.TODO(), namespacedName, deployment)
err = kclient.GetClient().Get(context.TODO(), namespacedName, &appsv1.Deployment{})
if err == nil || !errors.IsNotFound(err) {
log.Fatal("Suggestion deployment is still alive while ResumePolicy = NeverResume")
log.Fatalf("Suggestion deployment is still alive while ResumePolicy = %v", exp.Spec.ResumePolicy)
}
log.Printf("Suggestion deployment %v has been deleted", controllerUtil.GetAlgorithmDeploymentName(sug))

if exp.Spec.ResumePolicy == experimentsv1beta1.FromVolume {
namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmPersistentVolumeClaimName(sug), Namespace: sug.Namespace}
err = kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.PersistentVolumeClaim{})
if err != nil {
log.Fatalf("Suggestion persistent volume claim is not alive while ResumePolicy = %v, error: %v", experimentsv1beta1.FromVolume, err)
}

namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmPersistentVolumeName(sug)}
err = kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.PersistentVolume{})
if err != nil {
log.Fatalf("Suggestion persistent volume is not alive while ResumePolicy = %v, error: %v", experimentsv1beta1.FromVolume, err)
}
}
}

Expand Down
34 changes: 25 additions & 9 deletions test/e2e/v1beta1/run-e2e-experiment.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,24 +148,40 @@ func main() {
sug, err := kclient.GetSuggestion(exp.Name, exp.Namespace)
if exp.Spec.ResumePolicy == experimentsv1beta1.LongRunning {
if sug.IsSucceeded() {
log.Fatal("Suggestion is terminated while ResumePolicy = LongRunning")
log.Fatal("Suggestion is succeeded while ResumePolicy = LongRunning")
}
}
if exp.Spec.ResumePolicy == experimentsv1beta1.NeverResume {
if exp.Spec.ResumePolicy == experimentsv1beta1.NeverResume || exp.Spec.ResumePolicy == experimentsv1beta1.FromVolume {
if sug.IsRunning() {
log.Fatal("Suggestion is still running while ResumePolicy = NeverResume")
log.Fatalf("Suggestion is still running while ResumePolicy = %v", exp.Spec.ResumePolicy)
}

namespacedName := types.NamespacedName{Name: controllerUtil.GetAlgorithmServiceName(sug), Namespace: sug.Namespace}
service := &corev1.Service{}
err := kclient.GetClient().Get(context.TODO(), namespacedName, service)
err := kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.Service{})
if err == nil || !errors.IsNotFound(err) {
log.Fatal("Suggestion service is still alive while ResumePolicy = NeverResume")
log.Fatalf("Suggestion service is still alive while ResumePolicy = %v", exp.Spec.ResumePolicy)
}
log.Printf("Suggestion service %v has been deleted", controllerUtil.GetAlgorithmServiceName(sug))

namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmDeploymentName(sug), Namespace: sug.Namespace}
deployment := &appsv1.Deployment{}
err = kclient.GetClient().Get(context.TODO(), namespacedName, deployment)
err = kclient.GetClient().Get(context.TODO(), namespacedName, &appsv1.Deployment{})
if err == nil || !errors.IsNotFound(err) {
log.Fatal("Suggestion deployment is still alive while ResumePolicy = NeverResume")
log.Fatalf("Suggestion deployment is still alive while ResumePolicy = %v", exp.Spec.ResumePolicy)
}
log.Printf("Suggestion deployment %v has been deleted", controllerUtil.GetAlgorithmDeploymentName(sug))

if exp.Spec.ResumePolicy == experimentsv1beta1.FromVolume {
namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmPersistentVolumeClaimName(sug), Namespace: sug.Namespace}
err = kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.PersistentVolumeClaim{})
if err != nil {
log.Fatalf("Suggestion persistent volume claim is not alive while ResumePolicy = %v, error: %v", experimentsv1beta1.FromVolume, err)
}

namespacedName = types.NamespacedName{Name: controllerUtil.GetAlgorithmPersistentVolumeName(sug)}
err = kclient.GetClient().Get(context.TODO(), namespacedName, &corev1.PersistentVolume{})
if err != nil {
log.Fatalf("Suggestion persistent volume is not alive while ResumePolicy = %v, error: %v", experimentsv1beta1.FromVolume, err)
}
}
}

Expand Down
70 changes: 70 additions & 0 deletions test/scripts/v1beta1/run-from-volume.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to build a cluster and create a namespace from our
# argo workflow

set -o errexit
set -o nounset
set -o pipefail

CLUSTER_NAME="${CLUSTER_NAME}"
ZONE="${GCP_ZONE}"
PROJECT="${GCP_PROJECT}"
GO_DIR=${GOPATH}/src/github.com/${REPO_OWNER}/${REPO_NAME}

echo "Activating service-account"
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}

echo "Configuring kubectl"

echo "CLUSTER_NAME: ${CLUSTER_NAME}"
echo "ZONE: ${GCP_ZONE}"
echo "PROJECT: ${GCP_PROJECT}"

gcloud --project ${PROJECT} container clusters get-credentials ${CLUSTER_NAME} \
--zone ${ZONE}
kubectl config set-context $(kubectl config current-context) --namespace=default
USER=$(gcloud config get-value account)

echo "All Katib components are running."
kubectl version
kubectl cluster-info
echo "Katib deployments"
kubectl -n kubeflow get deploy
echo "Katib services"
kubectl -n kubeflow get svc
echo "Katib pods"
kubectl -n kubeflow get pod

cd ${GO_DIR}/test/e2e/v1beta1

echo "Running e2e test for resume from volume experiment"
export KUBECONFIG=$HOME/.kube/config
./run-e2e-experiment ../../../examples/v1beta1/resume-experiment/from-volume-resume.yaml

kubectl -n kubeflow describe suggestion from-volume-resume
kubectl -n kubeflow describe experiment from-volume-resume

echo "Resuming the completed experiment with resume from volume"
./resume-e2e-experiment ../../../examples/v1beta1/resume-experiment/from-volume-resume.yaml

kubectl -n kubeflow describe suggestion from-volume-resume
kubectl -n kubeflow describe experiment from-volume-resume

kubectl -n kubeflow delete experiment from-volume-resume

exit 0
7 changes: 7 additions & 0 deletions test/workflows/components/workflows-v1beta1.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,10 @@
name: "run-darts-e2e-tests",
template: "run-darts-e2e-tests",
},
{
name: "run-from-volume-e2e-tests",
template: "run-from-volume-e2e-tests",
},
],
],
},
Expand Down Expand Up @@ -410,6 +414,9 @@
$.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-darts-e2e-tests", testWorkerImage, [
"test/scripts/v1beta1/run-suggestion-darts.sh",
]), // run darts algorithm
$.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("run-from-volume-e2e-tests", testWorkerImage, [
"test/scripts/v1beta1/run-from-volume.sh",
]), // run resume from volume suggestion test
$.parts(namespace, name, overrides).e2e(prow_env, bucket).buildTemplate("create-pr-symlink", testWorkerImage, [
"python",
"-m",
Expand Down

0 comments on commit ac1dc24

Please sign in to comment.