Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixing NodeAgent crash #396

Merged
merged 2 commits into from
Sep 18, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions cmd/nodeagent/nodeagentmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const (
// The game server process tells the NodeAgent about its state (if it's Initializing or StandingBy)
// and NodeAgent tells the game server if it has been allocated (its state having been converted to Active)
type NodeAgentManager struct {
gameServerMap *sync.Map // we use a sync map instead of a regular map since this will be updated by multiple goroutines
gameServerMap *sync.Map // map[GameServerName]GameServerInfo, to be updated concurrently
dynamicClient dynamic.Interface
watchStopper chan struct{}
nodeName string
Expand Down Expand Up @@ -111,10 +111,10 @@ func (n *NodeAgentManager) runHeartbeatTimeCheckerLoop() {
// HeartbeatTimeChecker checks that heartbeats are still being sent for each GameServerInfo
// in the local gameServerMap, if not it will send a patch to mark those GameServers as unhealthy,
// it follows these two rules:
// 1. if the server hasn't sent its first heartbeat, it has FirstHeartbeatTimeout
// milliseconds since its creation before being marked as unhealthy
// 2. if the server has sent its first heartbeat, it has HeartbeatTimeout milliseconds
// since its last heartbeat before being marked as unhealthy
// 1. if the server hasn't sent its first heartbeat, it has FirstHeartbeatTimeout
// milliseconds since its creation before being marked as unhealthy
// 2. if the server has sent its first heartbeat, it has HeartbeatTimeout milliseconds
// since its last heartbeat before being marked as unhealthy
func (n *NodeAgentManager) HeartbeatTimeChecker() {
n.gameServerMap.Range(func(key interface{}, value interface{}) bool {
currentTime := n.nowFunc().UnixMilli()
Expand Down Expand Up @@ -282,7 +282,15 @@ func (n *NodeAgentManager) gameServerCreatedOrUpdated(obj *unstructured.Unstruct

// gameServerDeleted is called when a GameServer CR is deleted
func (n *NodeAgentManager) gameServerDeleted(objUnstructured interface{}) {
obj := objUnstructured.(*unstructured.Unstructured)
// https://github.com/PlayFab/thundernetes/issues/395
// obj can be a cache.DeletedFinalStateUnknown.
// Delete was not observed by the watcher but is removed from kube-apiserver. This is the last known state and the object no longer exists.
// https://pkg.go.dev/k8s.io/client-go/tools/cache#DeletedFinalStateUnknown
var obj *unstructured.Unstructured
obj, ok := objUnstructured.(*unstructured.Unstructured)
if !ok {
obj = objUnstructured.(cache.DeletedFinalStateUnknown).Obj.(*unstructured.Unstructured)
}

gameServerName := obj.GetName()
gameServerNamespace := obj.GetNamespace()
Expand Down