Skip to content

Commit

Permalink
making crashing build test fail faster (#352)
Browse files Browse the repository at this point in the history
  • Loading branch information
dgkanatsios committed Jul 29, 2022
1 parent 5ab9198 commit 70710ea
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 61 deletions.
17 changes: 9 additions & 8 deletions cmd/e2e/utilities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,16 +461,12 @@ func verifyGameServerPodEvictionAnnotation(ctx context.Context, kubeClient clien
// verifyPods checks if the Pod state is equal to the expected
func verifyPods(ctx context.Context, kubeClient client.Client, state buildState) error {
pods := corev1.PodList{}

if err := kubeClient.List(ctx, &pods, client.InNamespace(testNamespace)); err != nil {
if err := kubeClient.List(ctx, &pods, client.InNamespace(testNamespace), client.MatchingLabels{LabelBuildName: state.buildName}); err != nil {
return err
}

var observedCount int
for _, pod := range pods.Items {
if pod.Labels[LabelBuildID] != state.buildID {
continue
}
if pod.Status.Phase != corev1.PodRunning {
continue
}
Expand All @@ -486,15 +482,20 @@ func verifyPods(ctx context.Context, kubeClient client.Client, state buildState)
if observedCount == state.podRunningCount {
return nil
}
// get a []string with all Pod states

// if observed count is not equal to running count, this means that a least one Pod has an error, so get a []string with all Pod states
var observedStates []string
for _, pod := range pods.Items {
observedStates = append(observedStates, fmt.Sprintf("%s:%s", pod.Name, pod.Status.Phase))
s := fmt.Sprintf("%s:%s", pod.Name, pod.Status.Phase)
if pod.Status.Phase != corev1.PodRunning && len(pod.Status.ContainerStatuses) > 0 && pod.Status.ContainerStatuses[0].LastTerminationState.Terminated != nil {
s += fmt.Sprintf("-(Terminated.Reason: %s)", pod.Status.ContainerStatuses[0].LastTerminationState.Terminated.Reason)
}
observedStates = append(observedStates, s)
}
return fmt.Errorf("Expecting %d Pods in state Running, got %d. Pod states: %v", state.podRunningCount, observedCount, observedStates)
}

// verifyGameServerDetail checks if the GameServerDetail CR is valid and has the appropriate state
// verifyGameServerDetail checks if the GameServerDetail CR is valid and has the expected state
func verifyGameServerDetail(ctx context.Context, kubeClient client.Client, gameServerDetailName string, expectedConnectedPlayersCount int, expectedConnectedPlayers []string) error {
gameServerDetail := mpsv1alpha1.GameServerDetail{}
if err := kubeClient.Get(ctx, types.NamespacedName{Name: gameServerDetailName, Namespace: testNamespace}, &gameServerDetail); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ vet: ## Run go vet against code.
ENVTEST_ASSETS_DIR=$(shell pwd)/testbin
.PHONY: test
test: manifests generate fmt vet envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" THUNDERNETES_SAMPLE_IMAGE=$(IMAGE_NAME_SAMPLE):$(NETCORE_SAMPLE_TAG) THUNDERNETES_INIT_CONTAINER_IMAGE=$(IMAGE_NAME_INIT_CONTAINER):$(IMAGE_TAG) THUNDERNETES_INIT_CONTAINER_IMAGE_WIN=$(IMAGE_NAME_INIT_CONTAINER_WIN):$(IMAGE_TAG) go test -race ./... -v -coverprofile cover.out
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test -race ./... -v -coverprofile cover.out

##@ Build

Expand Down
4 changes: 1 addition & 3 deletions pkg/operator/api/v1alpha1/gameserver_webhook_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package v1alpha1

import (
"os"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -62,7 +60,7 @@ func createTestGameServer(name, buildID string, hostNetwork bool) GameServer {
Containers: []corev1.Container{
{
Name: "testcontainer",
Image: os.Getenv("THUNDERNETES_SAMPLE_IMAGE"),
Image: "testimage",
Ports: []corev1.ContainerPort{
{
ContainerPort: 80,
Expand Down
9 changes: 1 addition & 8 deletions pkg/operator/api/v1alpha1/gameserverbuild_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package v1alpha1

import (
"math/rand"
"os"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
Expand All @@ -12,11 +10,6 @@ import (
"k8s.io/apimachinery/pkg/util/uuid"
)

const (
timeout = time.Second * 5
interval = time.Millisecond * 250
)

var _ = Describe("GameServerBuild webhook tests", func() {
Context("testing validation webhooks for gameserverbuild", func() {

Expand Down Expand Up @@ -105,7 +98,7 @@ func createTestGameServerBuild(buildName, buildID string, standingBy, max int, h
Containers: []corev1.Container{
{
Name: "testcontainer",
Image: os.Getenv("THUNDERNETES_SAMPLE_IMAGE"),
Image: "testimage",
Ports: []corev1.ContainerPort{
{
ContainerPort: 80,
Expand Down
16 changes: 9 additions & 7 deletions pkg/operator/controllers/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"bytes"
"context"
"errors"
"fmt"
"math/rand"
"os"
Expand Down Expand Up @@ -58,11 +59,13 @@ func init() {

InitContainerImage = os.Getenv("THUNDERNETES_INIT_CONTAINER_IMAGE")
if InitContainerImage == "" {
panic("THUNDERNETES_INIT_CONTAINER_IMAGE cannot be empty")
log.Log.Error(errors.New("THUNDERNETES_INIT_CONTAINER_IMAGE is not set, setting to a mock value"), "")
InitContainerImage = "testInitContainerImage"
}
InitContainerImageWin = os.Getenv("THUNDERNETES_INIT_CONTAINER_IMAGE_WIN")
if InitContainerImageWin == "" {
panic("THUNDERNETES_INIT_CONTAINER_IMAGE_WIN cannot be empty")
log.Log.Error(errors.New("THUNDERNETES_INIT_CONTAINER_IMAGE_WIN is not set, setting to a mock value"), "")
InitContainerImageWin = "testInitContainerImageWin"
}
}

Expand Down Expand Up @@ -426,11 +429,10 @@ func IsNodeReadyAndSchedulable(node *corev1.Node) bool {
return false
}

// useSpecificNodePoolAndNodeNotGameServer returns true if
// 1. the cluster contains a specific Node Pool/Group for GameServers (designated by the mps.playfab.com/gameservernode=true Label)
// 2. and the current Node does *not* have this Label
func useSpecificNodePoolAndNodeNotGameServer(useSpecificNodePool bool, node *corev1.Node) bool {
return useSpecificNodePool && node.Labels[LabelGameServerNode] != "true"
// isNodeGameServerNode returns true if Node has the Label mps.playfab.com/gameservernode=true set
// this Label should be set when the cluster contains a specific Node Pool/Group for GameServers
func isNodeGameServerNode(node *corev1.Node) bool {
return node.Labels != nil && node.Labels[LabelGameServerNode] == "true"
}

// ByState is a slice of GameServers
Expand Down
12 changes: 12 additions & 0 deletions pkg/operator/controllers/controller_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,5 +177,17 @@ var _ = Describe("Utilities tests", func() {
Expect(s).To(HavePrefix(prefix))
Expect(len(s)).To(BeNumerically(">", len(prefix)))
})
It("should check if a Node is a GameServer Node", func() {
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
LabelGameServerNode: "true",
},
},
}
Expect(isNodeGameServerNode(node)).To(BeTrue())
node.Labels[LabelGameServerNode] = "nottrue"
Expect(isNodeGameServerNode(node)).To(BeFalse())
})
})
})
29 changes: 19 additions & 10 deletions pkg/operator/controllers/gameserverbuild_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,40 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
)

// We have observed cases in which we'll create more than one GameServer for a GameServerBuild
// This is because on the first reconcile we'll see that we have 0 GameServers and we'll create some
// We have observed cases in which the controller creates more GameServers than necessary for a GameServerBuild
// This is because e.g. on the first reconcile call, the controller sees that we have 0 GameServers and it starts to create some
// On the subsequent reconcile, the cache might have not been updated yet, so we'll still see 0 GameServers (or less than asked) and create more,
// so eventually we'll end up with more GameServers than requested
// The code will handle and delete the extra GameServers eventually, but it's a waste of resources unfortunately.
// The solution is to create a synchonized map (since it will be accessed by multiple reconciliations - one for each GameServerBuild)
// In this map, the key is GameServerBuild.Name whereas the value is map[string]interface{} and contains the GameServer.Name for all the GameServers under creation
// We use map[string]interface{} instead a []string to facilitate constant time lookups for GameServer names.
// On every reconcile loop, we check if all the GameServers for this GameServerBuild are present in cache)
// The controller code will eventually delete the extra GameServers, but we can improve this process.
// The solution is to create a synchonized map to track which objects were just created
// (synchronized since it might be accessed by multiple reconciliation goroutines - one for each GameServerBuild)
// In this map, the key is GameServerBuild.Name
// The value is a struct with map[string]interface{} and a mutex
// The map acts like a set which contains the GameServer.Name for all the GameServers under creation
// (We use map[string]interface{} instead of a []string to facilitate constant time lookups for GameServer names)
// On every reconcile loop, we check if all the GameServers for this GameServerBuild are present in cache
// If they are, we remove the GameServerBuild entry from the gameServersUnderCreation map
// If at least one of them is not in the cache, this means that the cache has not been fully updated yet
// so we will exit the current reconcile loop, cache will be updated in a subsequent loop
// so we will exit the current reconcile loop, as GameServers are created the controller will reconcile again
var gameServersUnderCreation = sync.Map{}

// Similar logic to gameServersUnderCreation, but this time for deletion of game servers
// Similar logic to gameServersUnderCreation, but this time regarding deletion of game servers
// On every reconcile loop, we check if all the GameServers under deletion for this GameServerBuild have been removed from cache
// If even one of them exists in cache, we exit the reconcile loop
// In a subsequent loop, cache will be updated
var gameServersUnderDeletion = sync.Map{}

// a map to hold the number of crashes per Build
// concurrent since the reconcile loop can be called multiple times for different GameServerBuilds
// key is namespace/name of the GameServerBuild
// value is the number of crashes
var crashesPerBuild = sync.Map{}

const (
maxNumberOfGameServersToAdd = 20
// maximum number of GameServers to create per reconcile loop
// we have this in place since each create call is synchronous and we want to minimize the time for each reconcile loop
maxNumberOfGameServersToAdd = 20
// maximum number of GameServers to delete per reconcile loop
maxNumberOfGameServersToDelete = 20
)

Expand Down Expand Up @@ -400,6 +408,7 @@ func getKeyForCrashesPerBuildMap(gsb *mpsv1alpha1.GameServerBuild) string {
}

// deleteNonActiveGameServers loops through all the GameServers CRs and deletes non-Active ones
// after it sorts all of them by state
func (r *GameServerBuildReconciler) deleteNonActiveGameServers(ctx context.Context,
gsb *mpsv1alpha1.GameServerBuild,
gameServers *mpsv1alpha1.GameServerList,
Expand Down
42 changes: 21 additions & 21 deletions pkg/operator/controllers/port_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ import (

// PortRegistry implements a custom map for the port registry
type PortRegistry struct {
client client.Client // used to get the list of nodes
NodeCount int // the number of Ready and Schedulable nodes in the cluster
HostPortsPerNode map[int32]int // a slice for the entire port range. increases by 1 for each registered port
Min int32 // Minimum Port
Max int32 // Maximum Port
lockMutex sync.Mutex // lock for the map
useSpecificNodePool bool // if true, we only take into account Nodes that have the Label "mps.playfab.com/gameservernode"=true
nextPortNumber int32 // the next port to be assigned
logger logr.Logger
client client.Client // used to get the list of nodes
NodeCount int // the number of Ready and Schedulable nodes in the cluster
HostPortsPerNode map[int32]int // a slice for the entire port range. increases by 1 for each registered port
Min int32 // Minimum Port
Max int32 // Maximum Port
lockMutex sync.Mutex // lock for the map
useSpecificNodePoolForGameServers bool // if true, we only take into account Nodes that have the Label "mps.playfab.com/gameservernode"=true
nextPortNumber int32 // the next port to be assigned
logger logr.Logger
}

// NewPortRegistry initializes the map[port]counter that holds the port registry
Expand All @@ -45,14 +45,14 @@ func NewPortRegistry(client client.Client, gameServers *mpsv1alpha1.GameServerLi
}

pr := &PortRegistry{
client: client,
Min: min,
Max: max,
lockMutex: sync.Mutex{},
useSpecificNodePool: useSpecificNodePool,
nextPortNumber: min,
NodeCount: nodeCount,
logger: log.Log.WithName("portregistry"),
client: client,
Min: min,
Max: max,
lockMutex: sync.Mutex{},
useSpecificNodePoolForGameServers: useSpecificNodePool,
nextPortNumber: min,
NodeCount: nodeCount,
logger: log.Log.WithName("portregistry"),
}

// initialize the ports
Expand Down Expand Up @@ -96,7 +96,7 @@ func (pr *PortRegistry) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
var err error

// if we have a specific node pool/group for game servers (with mps.playfab.com/gameservernode=true Label)
if pr.useSpecificNodePool {
if pr.useSpecificNodePoolForGameServers {
err = pr.client.List(ctx, &nodeList, client.MatchingLabels{LabelGameServerNode: "true"})
} else { // get all the Nodes
err = pr.client.List(ctx, &nodeList)
Expand Down Expand Up @@ -205,7 +205,7 @@ func (pr *PortRegistry) SetupWithManager(mgr ctrl.Manager) error {
WithEventFilter(predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
node := e.Object.(*v1.Node)
if useSpecificNodePoolAndNodeNotGameServer(pr.useSpecificNodePool, node) {
if pr.useSpecificNodePoolForGameServers && !isNodeGameServerNode(node) {
return false
}
return IsNodeReadyAndSchedulable(node)
Expand All @@ -214,15 +214,15 @@ func (pr *PortRegistry) SetupWithManager(mgr ctrl.Manager) error {
node := e.Object.(*v1.Node)
// ignore this Node if we have a specific node pool for game servers (with mps.playfab.com/gameservernode=true Label)
// and the current Node does not have this Label
if useSpecificNodePoolAndNodeNotGameServer(pr.useSpecificNodePool, node) {
if pr.useSpecificNodePoolForGameServers && !isNodeGameServerNode(node) {
return false
}
return true
},
UpdateFunc: func(e event.UpdateEvent) bool {
oldNode := e.ObjectOld.(*v1.Node)
newNode := e.ObjectNew.(*v1.Node)
if useSpecificNodePoolAndNodeNotGameServer(pr.useSpecificNodePool, newNode) {
if pr.useSpecificNodePoolForGameServers && !isNodeGameServerNode(newNode) {
return false
}
return IsNodeReadyAndSchedulable(oldNode) != IsNodeReadyAndSchedulable(newNode)
Expand Down
5 changes: 2 additions & 3 deletions pkg/operator/controllers/test_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package controllers

import (
"context"
"os"

. "github.com/onsi/gomega"
mpsv1alpha1 "github.com/playfab/thundernetes/pkg/operator/api/v1alpha1"
Expand Down Expand Up @@ -195,7 +194,7 @@ func testGenerateGameServerBuild(buildName, buildNamespace, buildID string, stan
Containers: []corev1.Container{
{
Name: "testcontainer",
Image: os.Getenv("THUNDERNETES_SAMPLE_IMAGE"),
Image: "testimage",
Ports: []corev1.ContainerPort{
{
ContainerPort: 80,
Expand Down Expand Up @@ -230,7 +229,7 @@ func testGenerateGameServer(buildName, buildID, gsNamespace, gsName string) *mps
Containers: []corev1.Container{
{
Name: "testcontainer",
Image: os.Getenv("THUNDERNETES_SAMPLE_IMAGE"),
Image: "testimage",
Ports: []corev1.ContainerPort{
{
ContainerPort: 80,
Expand Down

0 comments on commit 70710ea

Please sign in to comment.