Skip to content

Commit

Permalink
nrt: log: enhance logging in container scope
Browse files Browse the repository at this point in the history
before kubernetes-sigs#710 and kubernetes-sigs#725, we logged the container being processed alongside
the pod (identified by namespace/name pair).
It was dropped by mistake and not deliberately.
This is useful information when troubleshooting, so let's add it back.

Signed-off-by: Francesco Romani <[email protected]>
  • Loading branch information
ffromani committed May 29, 2024
1 parent 004e0d9 commit 7a8afdf
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 19 deletions.
27 changes: 15 additions & 12 deletions pkg/noderesourcetopology/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,32 +55,34 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, zones topology
// https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#understanding-init-containers
// therefore, we don't need to accumulate their resources together
for _, initContainer := range pod.Spec.InitContainers {
lh.V(6).Info("init container desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...)
// TODO: handle sidecar explicitely (new kind)
clh := lh.WithValues(logging.KeyContainer, initContainer.Name, logging.KeyContainerKind, logging.KindContainerInit)
clh.V(6).Info("desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...)

_, match := resourcesAvailableInAnyNUMANodes(lh, nodes, initContainer.Resources.Requests, qos, nodeInfo)
_, match := resourcesAvailableInAnyNUMANodes(clh, nodes, initContainer.Resources.Requests, qos, nodeInfo)
if !match {
// we can't align init container, so definitely we can't align a pod
lh.V(2).Info("cannot align container", "name", initContainer.Name, "kind", "init")
clh.V(2).Info("cannot align container")
return framework.NewStatus(framework.Unschedulable, "cannot align init container")
}
}

for _, container := range pod.Spec.Containers {
// TODO: add containerName
lh.V(6).Info("app container resources", stringify.ResourceListToLoggable(container.Resources.Requests)...)
clh := lh.WithValues(logging.KeyContainer, container.Name, logging.KeyContainerKind, logging.KindContainerApp)
clh.V(6).Info("app container resources", stringify.ResourceListToLoggable(container.Resources.Requests)...)

numaID, match := resourcesAvailableInAnyNUMANodes(lh, nodes, container.Resources.Requests, qos, nodeInfo)
numaID, match := resourcesAvailableInAnyNUMANodes(clh, nodes, container.Resources.Requests, qos, nodeInfo)
if !match {
// we can't align container, so definitely we can't align a pod
lh.V(2).Info("cannot align container", "name", container.Name, "kind", "app")
clh.V(2).Info("cannot align container")
return framework.NewStatus(framework.Unschedulable, "cannot align container")
}

// subtract the resources requested by the container from the given NUMA.
// this is necessary, so we won't allocate the same resources for the upcoming containers
subtractFromNUMA(lh, nodes, numaID, container)
subtractFromNUMA(clh, nodes, numaID, container)
clh.V(4).Info("app container placed", "numaCell", numaID)
}
lh.V(2).Info("can align all containers")
return nil
}

Expand Down Expand Up @@ -150,7 +152,7 @@ func resourcesAvailableInAnyNUMANodes(lh logr.Logger, numaNodes NUMANodeList, re

// at least one NUMA node is available
ret := !bitmask.IsEmpty()
lh.V(2).Info("final verdict", "suitable", ret)
lh.V(2).Info("final verdict", "suitable", ret, "numaCell", numaID)
return numaID, ret
}

Expand All @@ -172,11 +174,12 @@ func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, zones topologyv1alph
logNumaNodes(lh, "pod handler NUMA resources", nodeInfo.Node().Name, nodes)
lh.V(6).Info("pod desired resources", stringify.ResourceListToLoggable(resources)...)

if _, match := resourcesAvailableInAnyNUMANodes(lh, createNUMANodeList(lh, zones), resources, v1qos.GetPodQOS(pod), nodeInfo); !match {
numaID, match := resourcesAvailableInAnyNUMANodes(lh, createNUMANodeList(lh, zones), resources, v1qos.GetPodQOS(pod), nodeInfo)
if !match {
lh.V(2).Info("cannot align pod", "name", pod.Name)
return framework.NewStatus(framework.Unschedulable, "cannot align pod")
}
lh.V(2).Info("can align pod")
lh.V(4).Info("all container placed", "numaCell", numaID)
return nil
}

Expand Down
17 changes: 12 additions & 5 deletions pkg/noderesourcetopology/logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ import (

// well-known structured log keys
const (
KeyLogID string = "logID"
KeyPod string = "pod"
KeyPodUID string = "podUID"
KeyNode string = "node"
KeyFlow string = "flow"
KeyLogID string = "logID"
KeyPod string = "pod"
KeyPodUID string = "podUID"
KeyNode string = "node"
KeyFlow string = "flow"
KeyContainer string = "container"
KeyContainerKind string = "kind"
)

const (
Expand All @@ -42,6 +44,11 @@ const (
FlowCacheSync string = "resync"
)

const (
KindContainerInit string = "init"
KindContainerApp string = "app"
)

const (
SubsystemForeignPods string = "foreignpods"
SubsystemNRTCache string = "nrtcache"
Expand Down
11 changes: 9 additions & 2 deletions pkg/noderesourcetopology/stringify/stringify.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,16 @@ func nrtResourceInfo(resInfo topologyv1alpha2.ResourceInfo) string {
return resInfo.Name + "=" + humanize.IBytes(uint64(capVal)) + "/" + humanize.IBytes(uint64(allocVal)) + "/" + humanize.IBytes(uint64(availVal))
}

func needsHumanization(resName string) bool {
func needsHumanization(rn string) bool {
resName := corev1.ResourceName(rn)
// memory-related resources may be expressed in KiB/Bytes, which makes
// for long numbers, harder to read and compare. To make it easier for
// the reader, we express them in a more compact form using go-humanize.
return resName == string(corev1.ResourceMemory) || v1helper.IsHugePageResourceName(corev1.ResourceName(resName))
if resName == corev1.ResourceMemory {
return true
}
if resName == corev1.ResourceStorage || resName == corev1.ResourceEphemeralStorage {
return true
}
return v1helper.IsHugePageResourceName(resName)
}
21 changes: 21 additions & 0 deletions pkg/noderesourcetopology/stringify/stringify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,27 @@ func TestResourceListToLoggable(t *testing.T) {
},
expected: ` logID="TEST4" awesome.net/gpu="4" cpu="24" example.com/netdevice="16" hugepages-2Mi="1.0 GiB" memory="16 GiB"`,
},
{
name: "CPUs, Memory, EphemeralStorage",
logID: "TEST5",
resources: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("24"),
corev1.ResourceMemory: resource.MustParse("16Gi"),
corev1.ResourceEphemeralStorage: resource.MustParse("4Gi"),
},
expected: ` logID="TEST5" cpu="24" ephemeral-storage="4.0 GiB" memory="16 GiB"`,
},
{
name: "CPUs, Memory, EphemeralStorage, hugepages-1Gi",
logID: "TEST6",
resources: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("24"),
corev1.ResourceMemory: resource.MustParse("16Gi"),
corev1.ResourceName("hugepages-1Gi"): resource.MustParse("4Gi"),
corev1.ResourceEphemeralStorage: resource.MustParse("6Gi"),
},
expected: ` logID="TEST6" cpu="24" ephemeral-storage="6.0 GiB" hugepages-1Gi="4.0 GiB" memory="16 GiB"`,
},
}

for _, tt := range tests {
Expand Down

0 comments on commit 7a8afdf

Please sign in to comment.