Skip to content

Commit a93ea43

Browse files
committed
Fix to handle hash collisions correctly for DaemonSet
1 parent e626d35 commit a93ea43

File tree

3 files changed

+164
-5
lines changed

3 files changed

+164
-5
lines changed

pkg/controller/daemon/update.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -330,16 +330,16 @@ func (dsc *DaemonSetsController) snapshot(ds *apps.DaemonSet, revision int64) (*
330330
}
331331

332332
history, err = dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Create(history)
333-
if errors.IsAlreadyExists(err) {
333+
if outerErr := err; errors.IsAlreadyExists(outerErr) {
334334
// TODO: Is it okay to get from historyLister?
335335
existedHistory, getErr := dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Get(name, metav1.GetOptions{})
336336
if getErr != nil {
337337
return nil, getErr
338338
}
339339
// Check if we already created it
340-
done, err := Match(ds, existedHistory)
341-
if err != nil {
342-
return nil, err
340+
done, matchErr := Match(ds, existedHistory)
341+
if matchErr != nil {
342+
return nil, matchErr
343343
}
344344
if done {
345345
return existedHistory, nil
@@ -360,7 +360,7 @@ func (dsc *DaemonSetsController) snapshot(ds *apps.DaemonSet, revision int64) (*
360360
return nil, updateErr
361361
}
362362
glog.V(2).Infof("Found a hash collision for DaemonSet %q - bumping collisionCount to %d to resolve it", ds.Name, *currDS.Status.CollisionCount)
363-
return nil, err
363+
return nil, outerErr
364364
}
365365
return history, err
366366
}

test/integration/daemonset/BUILD

+2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ go_test(
1616
deps = [
1717
"//pkg/api/legacyscheme:go_default_library",
1818
"//pkg/api/v1/pod:go_default_library",
19+
"//pkg/controller:go_default_library",
1920
"//pkg/controller/daemon:go_default_library",
2021
"//pkg/features:go_default_library",
2122
"//pkg/scheduler:go_default_library",
2223
"//pkg/scheduler/algorithm:go_default_library",
2324
"//pkg/scheduler/algorithmprovider:go_default_library",
2425
"//pkg/scheduler/factory:go_default_library",
26+
"//pkg/util/labels:go_default_library",
2527
"//pkg/util/metrics:go_default_library",
2628
"//staging/src/k8s.io/api/apps/v1:go_default_library",
2729
"//staging/src/k8s.io/api/core/v1:go_default_library",

test/integration/daemonset/daemonset_test.go

+157
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,15 @@ import (
4141
"k8s.io/client-go/tools/record"
4242
"k8s.io/kubernetes/pkg/api/legacyscheme"
4343
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
44+
"k8s.io/kubernetes/pkg/controller"
4445
"k8s.io/kubernetes/pkg/controller/daemon"
4546
"k8s.io/kubernetes/pkg/features"
4647
"k8s.io/kubernetes/pkg/scheduler"
4748
"k8s.io/kubernetes/pkg/scheduler/algorithm"
4849
"k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
4950
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
5051
"k8s.io/kubernetes/pkg/scheduler/factory"
52+
labelsutil "k8s.io/kubernetes/pkg/util/labels"
5153
"k8s.io/kubernetes/pkg/util/metrics"
5254
"k8s.io/kubernetes/test/integration/framework"
5355
)
@@ -372,6 +374,52 @@ func waitForPodsCreated(podInformer cache.SharedIndexInformer, num int) error {
372374
})
373375
}
374376

377+
func waitForDaemonSetAndControllerRevisionCreated(c clientset.Interface, name string, namespace string) error {
378+
return wait.PollImmediate(100*time.Millisecond, 10*time.Second, func() (bool, error) {
379+
ds, err := c.AppsV1().DaemonSets(namespace).Get(name, metav1.GetOptions{})
380+
if err != nil {
381+
return false, err
382+
}
383+
if ds == nil {
384+
return false, nil
385+
}
386+
387+
revs, err := c.AppsV1().ControllerRevisions(namespace).List(metav1.ListOptions{})
388+
if err != nil {
389+
return false, err
390+
}
391+
if revs.Size() == 0 {
392+
return false, nil
393+
}
394+
395+
for _, rev := range revs.Items {
396+
for _, oref := range rev.OwnerReferences {
397+
if oref.Kind == "DaemonSet" && oref.UID == ds.UID {
398+
return true, nil
399+
}
400+
}
401+
}
402+
return false, nil
403+
})
404+
}
405+
406+
func hashAndNameForDaemonSet(ds *apps.DaemonSet) (string, string) {
407+
hash := fmt.Sprint(controller.ComputeHash(&ds.Spec.Template, ds.Status.CollisionCount))
408+
name := ds.Name + "-" + hash
409+
return hash, name
410+
}
411+
412+
func validateDaemonSetCollisionCount(dsClient appstyped.DaemonSetInterface, dsName string, expCount int32, t *testing.T) {
413+
ds, err := dsClient.Get(dsName, metav1.GetOptions{})
414+
if err != nil {
415+
t.Fatalf("Failed to look up DaemonSet: %v", err)
416+
}
417+
collisionCount := ds.Status.CollisionCount
418+
if *collisionCount != expCount {
419+
t.Fatalf("Expected collisionCount to be %d, but found %d", expCount, *collisionCount)
420+
}
421+
}
422+
375423
func validateDaemonSetStatus(
376424
dsClient appstyped.DaemonSetInterface,
377425
dsName string,
@@ -740,3 +788,112 @@ func TestInsufficientCapacityNodeWhenScheduleDaemonSetPodsEnabled(t *testing.T)
740788
validateDaemonSetStatus(dsClient, ds.Name, 1, t)
741789
})
742790
}
791+
792+
// TestLaunchWithHashCollision tests that a DaemonSet can be updated even if there is a
793+
// hash collision with an existing ControllerRevision
794+
func TestLaunchWithHashCollision(t *testing.T) {
795+
server, closeFn, dc, informers, clientset := setup(t)
796+
defer closeFn()
797+
ns := framework.CreateTestingNamespace("one-node-daemonset-test", server, t)
798+
defer framework.DeleteTestingNamespace(ns, server, t)
799+
800+
dsClient := clientset.AppsV1().DaemonSets(ns.Name)
801+
podInformer := informers.Core().V1().Pods().Informer()
802+
nodeClient := clientset.CoreV1().Nodes()
803+
804+
stopCh := make(chan struct{})
805+
defer close(stopCh)
806+
807+
informers.Start(stopCh)
808+
go dc.Run(1, stopCh)
809+
810+
setupScheduler(t, clientset, informers, stopCh)
811+
812+
// Create single node
813+
_, err := nodeClient.Create(newNode("single-node", nil))
814+
if err != nil {
815+
t.Fatalf("Failed to create node: %v", err)
816+
}
817+
818+
// Create new DaemonSet with RollingUpdate strategy
819+
orgDs := newDaemonSet("foo", ns.Name)
820+
oneIntString := intstr.FromInt(1)
821+
orgDs.Spec.UpdateStrategy = apps.DaemonSetUpdateStrategy{
822+
Type: apps.RollingUpdateDaemonSetStrategyType,
823+
RollingUpdate: &apps.RollingUpdateDaemonSet{
824+
MaxUnavailable: &oneIntString,
825+
},
826+
}
827+
ds, err := dsClient.Create(orgDs)
828+
if err != nil {
829+
t.Fatalf("Failed to create DaemonSet: %v", err)
830+
}
831+
832+
// Wait for the DaemonSet to be created before proceeding
833+
err = waitForDaemonSetAndControllerRevisionCreated(clientset, ds.Name, ds.Namespace)
834+
if err != nil {
835+
t.Fatalf("Failed to create DeamonSet: %v", err)
836+
}
837+
838+
ds, err = dsClient.Get(ds.Name, metav1.GetOptions{})
839+
if err != nil {
840+
t.Fatalf("Failed to get DaemonSet: %v", err)
841+
}
842+
var orgCollisionCount int32
843+
if ds.Status.CollisionCount != nil {
844+
orgCollisionCount = *ds.Status.CollisionCount
845+
}
846+
847+
// Look up the ControllerRevision for the DaemonSet
848+
_, name := hashAndNameForDaemonSet(ds)
849+
revision, err := clientset.AppsV1().ControllerRevisions(ds.Namespace).Get(name, metav1.GetOptions{})
850+
if err != nil || revision == nil {
851+
t.Fatalf("Failed to look up ControllerRevision: %v", err)
852+
}
853+
854+
// Create a "fake" ControllerRevision that we know will create a hash collision when we make
855+
// the next update
856+
one := int64(1)
857+
ds.Spec.Template.Spec.TerminationGracePeriodSeconds = &one
858+
859+
newHash, newName := hashAndNameForDaemonSet(ds)
860+
newRevision := &apps.ControllerRevision{
861+
ObjectMeta: metav1.ObjectMeta{
862+
Name: newName,
863+
Namespace: ds.Namespace,
864+
Labels: labelsutil.CloneAndAddLabel(ds.Spec.Template.Labels, apps.DefaultDaemonSetUniqueLabelKey, newHash),
865+
Annotations: ds.Annotations,
866+
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(ds, apps.SchemeGroupVersion.WithKind("DaemonSet"))},
867+
},
868+
Data: revision.Data,
869+
Revision: revision.Revision + 1,
870+
}
871+
_, err = clientset.AppsV1().ControllerRevisions(ds.Namespace).Create(newRevision)
872+
if err != nil {
873+
t.Fatalf("Failed to create ControllerRevision: %v", err)
874+
}
875+
876+
// Make an update of the DaemonSet which we know will create a hash collision when
877+
// the next ControllerRevision is created.
878+
_, err = dsClient.Update(ds)
879+
if err != nil {
880+
t.Fatalf("Failed to update DaemonSet: %v", err)
881+
}
882+
883+
// Wait for any pod with the latest Spec to exist
884+
err = wait.PollImmediate(100*time.Millisecond, 10*time.Second, func() (bool, error) {
885+
objects := podInformer.GetIndexer().List()
886+
for _, object := range objects {
887+
pod := object.(*v1.Pod)
888+
if *pod.Spec.TerminationGracePeriodSeconds == *ds.Spec.Template.Spec.TerminationGracePeriodSeconds {
889+
return true, nil
890+
}
891+
}
892+
return false, nil
893+
})
894+
if err != nil {
895+
t.Fatalf("Failed to wait for Pods with the latest Spec to be created: %v", err)
896+
}
897+
898+
validateDaemonSetCollisionCount(dsClient, ds.Name, orgCollisionCount+1, t)
899+
}

0 commit comments

Comments
 (0)