-
Notifications
You must be signed in to change notification settings - Fork 222
Add Performance Profile Controler must-gather #341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dbb0d6f
68c6a82
c574111
6cbcd89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| build_root_image: | ||
| name: release | ||
| namespace: openshift | ||
| tag: rhel-8-release-golang-1.18-openshift-4.12 | ||
| tag: rhel-8-release-golang-1.19-openshift-4.12 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,16 @@ | ||
| FROM registry.ci.openshift.org/ocp/builder:rhel-8-golang-1.18-openshift-4.12 AS builder | ||
| FROM registry.ci.openshift.org/ocp/builder:rhel-8-golang-1.19-openshift-4.12 AS builder | ||
| WORKDIR /go/src/github.com/openshift/must-gather | ||
| COPY . . | ||
| ENV GO_PACKAGE github.com/openshift/must-gather | ||
|
|
||
| FROM registry.ci.openshift.org/ocp/4.12:cli | ||
| COPY --from=builder /go/src/github.com/openshift/must-gather/collection-scripts/* /usr/bin/ | ||
| RUN yum install --setopt=tsflags=nodocs -y jq && yum clean all && rm -rf /var/cache/yum/* | ||
| RUN yum install -y pciutils util-linux hostname rsync tar ethtool | ||
marioferh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| ARG BIN_DIR=build/_output/bin/ | ||
| ARG NODE_GATHER_MANIFESTS_DIR=performance-profile-node-gather/ | ||
|
|
||
| COPY ${NODE_GATHER_MANIFESTS_DIR} /etc/performance-profile-node-gather | ||
| # rename to be consistent with all other must-gather helper | ||
| COPY ${BIN_DIR}gather-sysinfo /usr/bin/gather_sysinfo | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| #!/bin/bash | ||
|
|
||
| function ppc_nodes(){ | ||
|
|
||
| check_node_gather_pods_ready() { | ||
| line=$(oc get ds perf-node-gather-daemonset -o=custom-columns=DESIRED:.status.desiredNumberScheduled,READY:.status.numberReady --no-headers -n perf-node-gather) | ||
|
|
||
| IFS=$' ' | ||
| read desired ready <<< $line | ||
| IFS=$'\n' | ||
|
|
||
| if [[ "$desired" != "0" ]] && [[ "$ready" == "$desired" ]] | ||
| then | ||
| return 0 | ||
| else | ||
| return 1 | ||
| fi | ||
| } | ||
|
|
||
| IFS=$'\n' | ||
|
|
||
| BASE_COLLECTION_PATH="/must-gather" | ||
| NODES_PATH=${BASE_COLLECTION_PATH}/nodes | ||
| mkdir -p ${NODES_PATH} | ||
| NAMESPACE_MANIFEST="/etc/performance-profile-node-gather/namespace.yaml" | ||
| SERVICEACCOUNT_MANIFEST="/etc/performance-profile-node-gather/serviceaccount.yaml" | ||
| DAEMONSET_MANIFEST="/etc/performance-profile-node-gather/daemonset.yaml" | ||
| CLUSTER_ROLE_MANIFEST="/etc/performance-profile-node-gather/clusterrole.yaml" | ||
| CLUSTER_ROLE_BINDING_MANIFEST="/etc/performance-profile-node-gather/clusterrolebinding.yaml" | ||
| NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) | ||
| # Once you start the pod, the Kubernetes will set the pod hostname to the name of the pod | ||
| # https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-hostname-and-subdomain-fields | ||
| POD_NAME=${HOSTNAME} | ||
| MUST_GATHER_IMAGE=$(oc get pod -n $NAMESPACE $POD_NAME -o jsonpath="{.spec.containers[0].image}") | ||
|
|
||
| POD_IP=$(hostname -I | tr -d "[:blank:]" ) | ||
| echo "[$NAMESPACE/$POD_IP/$POD_NAME]" >> ${BASE_COLLECTION_PATH}/debug | ||
| oc get pod -n $NAMESPACE $POD_NAME -o json >> ${BASE_COLLECTION_PATH}/debug | ||
|
|
||
| sed -i -e "s#MUST_GATHER_IMAGE#$MUST_GATHER_IMAGE#" $DAEMONSET_MANIFEST | ||
|
|
||
| oc create -f $NAMESPACE_MANIFEST | ||
| oc create -f $SERVICEACCOUNT_MANIFEST | ||
| oc adm policy add-scc-to-user privileged -n perf-node-gather -z perf-node-gather | ||
| oc create -f $CLUSTER_ROLE_MANIFEST | ||
| oc create -f $CLUSTER_ROLE_BINDING_MANIFEST | ||
| oc create -f $DAEMONSET_MANIFEST | ||
|
|
||
| COUNTER=0 | ||
| until check_node_gather_pods_ready || [ $COUNTER -eq 300 ]; do | ||
| (( COUNTER++ )) | ||
| sleep 1 | ||
| done | ||
|
|
||
| for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName --no-headers --field-selector=status.phase!=Running -n perf-node-gather) | ||
| do | ||
| echo "Failed to collect perf-node-gather data from node ${line} due to pod scheduling failure." >> ${NODES_PATH}/skipped_nodes.txt | ||
| done | ||
|
|
||
| COLLECTABLE_NODES=() | ||
| for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers --field-selector=status.phase=Running -n perf-node-gather) | ||
| do | ||
| node=$(echo $line | awk -F ' ' '{print $1}') | ||
| pod=$(echo $line | awk -F ' ' '{print $2}') | ||
| NODE_PATH=${NODES_PATH}/$node | ||
| mkdir -p "${NODE_PATH}" | ||
|
|
||
| oc exec $pod -n perf-node-gather -- lspci -nvv > $NODE_PATH/lspci | ||
| oc exec $pod -n perf-node-gather -- lscpu -e > $NODE_PATH/lscpu | ||
| oc exec $pod -n perf-node-gather -- cat /proc/cmdline > $NODE_PATH/proc_cmdline | ||
| oc exec $pod -n perf-node-gather -- dmesg > $NODE_PATH/dmesg | ||
| oc exec $pod -n perf-node-gather -- ethtool -k eth0 > $NODE_PATH/ethtool_features | ||
| oc exec $pod -n perf-node-gather -- ethtool -l eth0 > $NODE_PATH/ethtool_channels | ||
|
|
||
| COLLECTABLE_NODES+=($node) | ||
|
|
||
| oc exec $pod -n perf-node-gather -- gather_sysinfo --json cpuaff --procfs=/host/proc --sysfs=/host/sys > $NODE_PATH/cpu_affinities.json | ||
| oc exec $pod -n perf-node-gather -- gather_sysinfo --json irqaff --procfs=/host/proc --sysfs=/host/sys > $NODE_PATH/irq_affinities.json | ||
| oc exec $pod -n perf-node-gather -- gather_sysinfo --json podres --socket-path=unix:///host/podresources/kubelet.sock > $NODE_PATH/podresources.json | ||
|
|
||
| oc exec $pod -n perf-node-gather -- gather_sysinfo snapshot --debug --root=/host --output=- > $NODE_PATH/sysinfo.tgz 2> $NODE_PATH/sysinfo.log | ||
|
|
||
| oc exec $pod -n perf-node-gather -- gather_sysinfo podinfo --node-name $node > $NODE_PATH/pods_info.json | ||
| done | ||
|
|
||
| # Collect journal logs for specified units for all nodes | ||
| NODE_UNITS=(kubelet) | ||
| ADM_PIDS=() | ||
| for NODE in ${COLLECTABLE_NODES[@]}; do | ||
| NODE_PATH=${NODES_PATH}/$NODE | ||
| mkdir -p ${NODE_PATH} | ||
| for UNIT in ${NODE_UNITS[@]}; do | ||
| timeout -k 5m 30m bash -c "oc adm node-logs $NODE -u $UNIT --since '-8h' | gzip" > ${NODE_PATH}/${NODE}_logs_$UNIT.gz & | ||
| ADM_PIDS+=($!) | ||
| done | ||
| done | ||
| wait "${ADM_PIDS[@]}" | ||
|
|
||
| oc delete -f $DAEMONSET_MANIFEST | ||
| oc delete -f $CLUSTER_ROLE_BINDING_MANIFEST | ||
| oc delete -f $CLUSTER_ROLE_MANIFEST | ||
| oc delete -f $SERVICEACCOUNT_MANIFEST | ||
| oc delete -f $NAMESPACE_MANIFEST | ||
| } | ||
|
|
||
| function ppc_namespace() { | ||
| # we control the subs, so this is the most reliable way to get the namespace | ||
| ns=$( oc get subs -A --field-selector metadata.name='performance-addon-operator-subscription' -o=jsonpath='{.items[0].metadata.namespace}{"\n"}' 2> /dev/null ) | ||
| # trying again with the pods, which are _usually_ reliable - but users can change them | ||
| [ -z "${ns}" ] && ns=$( oc get pods -A -l name='performance-operator' -o=jsonpath='{.items[0].metadata.namespace}{"\n"}' 2> /dev/null ) | ||
| # namespace suggested by the documentation. This is a fancier way to check for its existence | ||
| [ -z "${ns}" ] && ns=$( oc get ns openshift-performance-addon-operator -o jsonpath='{.metadata.name}{"\n"}' 2> /dev/null ) | ||
| # we should never get there. This is the last resort. | ||
| [ -z "${ns}" ] && ns="openshift-operators" | ||
| echo ${ns} | ||
| } | ||
|
|
||
| function version() { | ||
| # get version from imageV | ||
| version=$( \ | ||
| oc status | grep '^pod' | \ | ||
| sed -n -r -e 's/.*([[:digit:]]+\.[[:digit:]]+(:?\.[[:digit:]])?(:?-[^@]+)?).*/\1/p' \ | ||
| ) | ||
|
|
||
| # if version not found, fallback to imageID | ||
| [ -z "${version}" ] && version=$(oc status | grep '^pod.*runs' | sed -r -e 's/^pod.*runs //') | ||
|
|
||
| # if version still not found, use Unknown | ||
| [ -z "${version}" ] && version="Unknown" | ||
|
|
||
| echo ${version} | ||
| } | ||
|
|
||
| # generate /must-gather/version file | ||
| . version | ||
| echo "performance-addon-operator/must-gather" > /must-gather/version | ||
| version >> /must-gather/version | ||
|
|
||
| . namespace | ||
| PPC_NAMESPACE=$( ppc_namespace ) | ||
|
|
||
| # resource list | ||
| resources=() | ||
|
|
||
| # performance operator namespace | ||
| resources+=(ns/${PPC_NAMESPACE}) | ||
|
|
||
| # performance operator profiles | ||
| resources+=(performanceprofile) | ||
|
|
||
| # machine/node resources | ||
| resources+=(nodes machineconfigs machineconfigpools featuregates kubeletconfigs tuneds) | ||
|
|
||
| # run the collection of resources using must-gather | ||
| for resource in ${resources[@]}; do | ||
| /usr/bin/oc adm inspect --dest-dir must-gather --all-namespaces ${resource} | ||
| done | ||
|
|
||
| # Collect nodes details | ||
| ppc_nodes | ||
|
|
||
| exit 0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,4 +2,105 @@ module github.com/openshift/must-gather | |
|
|
||
| go 1.17 | ||
|
|
||
| require github.com/openshift/build-machinery-go v0.0.0-20210423112049-9415d7ebd33e | ||
| require ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like mentioned before, I don't think we want to handle it here, it should be built outside of this repo and only installed here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would require an RPM in the package repos. The tool is specific to the must gather data collection though. Also we want the tool to be always in sync with the MG and OCP versions. Building it here is actually much much easier and more reliable.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on what I was told it is already built as RPM so that shouldn't be the problem. |
||
| github.com/marioferh/gather-sysinfo v0.0.1 | ||
| github.com/openshift/build-machinery-go v0.0.0-20210423112049-9415d7ebd33e | ||
| ) | ||
|
|
||
| require ( | ||
| github.com/Microsoft/go-winio v0.4.17 // indirect | ||
| github.com/PuerkitoBio/purell v1.1.1 // indirect | ||
| github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect | ||
| github.com/beorn7/perks v1.0.1 // indirect | ||
| github.com/blang/semver/v4 v4.0.0 // indirect | ||
| github.com/cespare/xxhash/v2 v2.1.2 // indirect | ||
| github.com/davecgh/go-spew v1.1.1 // indirect | ||
| github.com/emicklei/go-restful v2.9.5+incompatible // indirect | ||
| github.com/go-logr/logr v1.2.3 // indirect | ||
| github.com/go-openapi/jsonpointer v0.19.5 // indirect | ||
| github.com/go-openapi/jsonreference v0.19.5 // indirect | ||
| github.com/go-openapi/swag v0.19.14 // indirect | ||
| github.com/gogo/protobuf v1.3.2 // indirect | ||
| github.com/golang/protobuf v1.5.2 // indirect | ||
| github.com/google/cadvisor v0.44.1 // indirect | ||
| github.com/google/gnostic v0.5.7-v3refs // indirect | ||
| github.com/google/gofuzz v1.1.0 // indirect | ||
| github.com/google/uuid v1.1.2 // indirect | ||
| github.com/imdario/mergo v0.3.5 // indirect | ||
| github.com/inconshreveable/mousetrap v1.0.0 // indirect | ||
| github.com/jaypipes/ghw v0.8.1-0.20210605191321-eb162add542b // indirect | ||
| github.com/josharian/intern v1.0.0 // indirect | ||
| github.com/json-iterator/go v1.1.12 // indirect | ||
| github.com/mailru/easyjson v0.7.6 // indirect | ||
| github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect | ||
| github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible // indirect | ||
| github.com/moby/sys/mountinfo v0.6.0 // indirect | ||
| github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect | ||
| github.com/modern-go/reflect2 v1.0.2 // indirect | ||
| github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect | ||
| github.com/openshift-kni/debug-tools v0.1.8 // indirect | ||
| github.com/prometheus/client_golang v1.12.1 // indirect | ||
| github.com/prometheus/client_model v0.2.0 // indirect | ||
| github.com/prometheus/common v0.32.1 // indirect | ||
| github.com/prometheus/procfs v0.7.3 // indirect | ||
| github.com/spf13/cobra v1.4.0 // indirect | ||
| github.com/spf13/pflag v1.0.5 // indirect | ||
| golang.org/x/net v0.1.0 // indirect | ||
| golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect | ||
| golang.org/x/sys v0.1.0 // indirect | ||
| golang.org/x/term v0.1.0 // indirect | ||
| golang.org/x/text v0.4.0 // indirect | ||
| golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect | ||
| google.golang.org/appengine v1.6.7 // indirect | ||
| google.golang.org/genproto v0.0.0-20220107163113-42d7afdf6368 // indirect | ||
| google.golang.org/grpc v1.40.0 // indirect | ||
| google.golang.org/protobuf v1.28.0 // indirect | ||
| gopkg.in/inf.v0 v0.9.1 // indirect | ||
| gopkg.in/yaml.v2 v2.4.0 // indirect | ||
| gopkg.in/yaml.v3 v3.0.1 // indirect | ||
| k8s.io/api v0.24.2 // indirect | ||
| k8s.io/apimachinery v0.24.2 // indirect | ||
| k8s.io/apiserver v0.24.2 // indirect | ||
| k8s.io/client-go v0.24.2 // indirect | ||
| k8s.io/component-base v0.24.2 // indirect | ||
| k8s.io/klog/v2 v2.60.1 // indirect | ||
| k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42 // indirect | ||
| k8s.io/kubelet v0.23.0 // indirect | ||
| k8s.io/kubernetes v0.24.1 // indirect | ||
| k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 // indirect | ||
| sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect | ||
| sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect | ||
| sigs.k8s.io/yaml v1.2.0 // indirect | ||
| ) | ||
|
|
||
| // Pinned to kubernetes-1.24.2 | ||
| replace ( | ||
| k8s.io/api => k8s.io/api v0.24.2 | ||
| k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.2 | ||
| k8s.io/apimachinery => k8s.io/apimachinery v0.24.2 | ||
| k8s.io/apiserver => k8s.io/apiserver v0.24.2 | ||
| k8s.io/cli-runtime => k8s.io/cli-runtime v0.24.2 | ||
| k8s.io/client-go => k8s.io/client-go v0.24.2 | ||
| k8s.io/cloud-provider => k8s.io/cloud-provider v0.24.2 | ||
| k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.24.2 | ||
| k8s.io/code-generator => k8s.io/code-generator v0.24.2 | ||
| k8s.io/component-base => k8s.io/component-base v0.24.2 | ||
| k8s.io/component-helpers => k8s.io/component-helpers v0.24.2 | ||
| k8s.io/controller-manager => k8s.io/controller-manager v0.24.2 | ||
| k8s.io/cri-api => k8s.io/cri-api v0.24.2 | ||
| k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.24.2 | ||
| k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.24.2 | ||
| k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.24.2 | ||
| k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.2 | ||
| k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.2 | ||
| k8s.io/kubectl => k8s.io/kubectl v0.24.2 | ||
| k8s.io/kubelet => k8s.io/kubelet v0.24.2 | ||
| k8s.io/kubernetes => k8s.io/kubernetes v1.24.2 | ||
| k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.2 | ||
| k8s.io/metrics => k8s.io/metrics v0.24.2 | ||
| k8s.io/mount-utils => k8s.io/mount-utils v0.24.2 | ||
| k8s.io/pod-security-admission => k8s.io/pod-security-admission v0.24.2 | ||
| k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.24.2 | ||
| sigs.k8s.io/controller-runtime => sigs.k8s.io/controller-runtime v0.11.1 | ||
| sigs.k8s.io/controller-tools => sigs.k8s.io/controller-tools v0.7.0 | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rhel-8-release-golang-1.19-openshift-4.13 would be the correct one here and everywhere else too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done