Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
2 changes: 1 addition & 1 deletion .ci-operator.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
build_root_image:
name: release
namespace: openshift
tag: rhel-8-release-golang-1.18-openshift-4.12
tag: rhel-8-release-golang-1.19-openshift-4.12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rhel-8-release-golang-1.19-openshift-4.13 would be the correct one here and everywhere else too.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

10 changes: 9 additions & 1 deletion Dockerfile.rhel7
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
FROM registry.ci.openshift.org/ocp/builder:rhel-8-golang-1.18-openshift-4.12 AS builder
FROM registry.ci.openshift.org/ocp/builder:rhel-8-golang-1.19-openshift-4.12 AS builder
WORKDIR /go/src/github.com/openshift/must-gather
COPY . .
ENV GO_PACKAGE github.com/openshift/must-gather

FROM registry.ci.openshift.org/ocp/4.12:cli
COPY --from=builder /go/src/github.com/openshift/must-gather/collection-scripts/* /usr/bin/
RUN yum install --setopt=tsflags=nodocs -y jq && yum clean all && rm -rf /var/cache/yum/*
RUN yum install -y pciutils util-linux hostname rsync tar ethtool

ARG BIN_DIR=build/_output/bin/
ARG NODE_GATHER_MANIFESTS_DIR=performance-profile-node-gather/

COPY ${NODE_GATHER_MANIFESTS_DIR} /etc/performance-profile-node-gather
# rename to be consistent with all other must-gather helper
COPY ${BIN_DIR}gather-sysinfo /usr/bin/gather_sysinfo
162 changes: 162 additions & 0 deletions collection-scripts/gather_ppc
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/bin/bash

function ppc_nodes(){

check_node_gather_pods_ready() {
line=$(oc get ds perf-node-gather-daemonset -o=custom-columns=DESIRED:.status.desiredNumberScheduled,READY:.status.numberReady --no-headers -n perf-node-gather)

IFS=$' '
read desired ready <<< $line
IFS=$'\n'

if [[ "$desired" != "0" ]] && [[ "$ready" == "$desired" ]]
then
return 0
else
return 1
fi
}

IFS=$'\n'

BASE_COLLECTION_PATH="/must-gather"
NODES_PATH=${BASE_COLLECTION_PATH}/nodes
mkdir -p ${NODES_PATH}
NAMESPACE_MANIFEST="/etc/performance-profile-node-gather/namespace.yaml"
SERVICEACCOUNT_MANIFEST="/etc/performance-profile-node-gather/serviceaccount.yaml"
DAEMONSET_MANIFEST="/etc/performance-profile-node-gather/daemonset.yaml"
CLUSTER_ROLE_MANIFEST="/etc/performance-profile-node-gather/clusterrole.yaml"
CLUSTER_ROLE_BINDING_MANIFEST="/etc/performance-profile-node-gather/clusterrolebinding.yaml"
NAMESPACE=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
# Once you start the pod, the Kubernetes will set the pod hostname to the name of the pod
# https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-hostname-and-subdomain-fields
POD_NAME=${HOSTNAME}
MUST_GATHER_IMAGE=$(oc get pod -n $NAMESPACE $POD_NAME -o jsonpath="{.spec.containers[0].image}")

POD_IP=$(hostname -I | tr -d "[:blank:]" )
echo "[$NAMESPACE/$POD_IP/$POD_NAME]" >> ${BASE_COLLECTION_PATH}/debug
oc get pod -n $NAMESPACE $POD_NAME -o json >> ${BASE_COLLECTION_PATH}/debug

sed -i -e "s#MUST_GATHER_IMAGE#$MUST_GATHER_IMAGE#" $DAEMONSET_MANIFEST

oc create -f $NAMESPACE_MANIFEST
oc create -f $SERVICEACCOUNT_MANIFEST
oc adm policy add-scc-to-user privileged -n perf-node-gather -z perf-node-gather
oc create -f $CLUSTER_ROLE_MANIFEST
oc create -f $CLUSTER_ROLE_BINDING_MANIFEST
oc create -f $DAEMONSET_MANIFEST

COUNTER=0
until check_node_gather_pods_ready || [ $COUNTER -eq 300 ]; do
(( COUNTER++ ))
sleep 1
done

for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName --no-headers --field-selector=status.phase!=Running -n perf-node-gather)
do
echo "Failed to collect perf-node-gather data from node ${line} due to pod scheduling failure." >> ${NODES_PATH}/skipped_nodes.txt
done

COLLECTABLE_NODES=()
for line in $(oc get pod -o=custom-columns=NODE:.spec.nodeName,NAME:.metadata.name --no-headers --field-selector=status.phase=Running -n perf-node-gather)
do
node=$(echo $line | awk -F ' ' '{print $1}')
pod=$(echo $line | awk -F ' ' '{print $2}')
NODE_PATH=${NODES_PATH}/$node
mkdir -p "${NODE_PATH}"

oc exec $pod -n perf-node-gather -- lspci -nvv > $NODE_PATH/lspci
oc exec $pod -n perf-node-gather -- lscpu -e > $NODE_PATH/lscpu
oc exec $pod -n perf-node-gather -- cat /proc/cmdline > $NODE_PATH/proc_cmdline
oc exec $pod -n perf-node-gather -- dmesg > $NODE_PATH/dmesg
oc exec $pod -n perf-node-gather -- ethtool -k eth0 > $NODE_PATH/ethtool_features
oc exec $pod -n perf-node-gather -- ethtool -l eth0 > $NODE_PATH/ethtool_channels

COLLECTABLE_NODES+=($node)

oc exec $pod -n perf-node-gather -- gather_sysinfo --json cpuaff --procfs=/host/proc --sysfs=/host/sys > $NODE_PATH/cpu_affinities.json
oc exec $pod -n perf-node-gather -- gather_sysinfo --json irqaff --procfs=/host/proc --sysfs=/host/sys > $NODE_PATH/irq_affinities.json
oc exec $pod -n perf-node-gather -- gather_sysinfo --json podres --socket-path=unix:///host/podresources/kubelet.sock > $NODE_PATH/podresources.json

oc exec $pod -n perf-node-gather -- gather_sysinfo snapshot --debug --root=/host --output=- > $NODE_PATH/sysinfo.tgz 2> $NODE_PATH/sysinfo.log

oc exec $pod -n perf-node-gather -- gather_sysinfo podinfo --node-name $node > $NODE_PATH/pods_info.json
done

# Collect journal logs for specified units for all nodes
NODE_UNITS=(kubelet)
ADM_PIDS=()
for NODE in ${COLLECTABLE_NODES[@]}; do
NODE_PATH=${NODES_PATH}/$NODE
mkdir -p ${NODE_PATH}
for UNIT in ${NODE_UNITS[@]}; do
timeout -k 5m 30m bash -c "oc adm node-logs $NODE -u $UNIT --since '-8h' | gzip" > ${NODE_PATH}/${NODE}_logs_$UNIT.gz &
ADM_PIDS+=($!)
done
done
wait "${ADM_PIDS[@]}"

oc delete -f $DAEMONSET_MANIFEST
oc delete -f $CLUSTER_ROLE_BINDING_MANIFEST
oc delete -f $CLUSTER_ROLE_MANIFEST
oc delete -f $SERVICEACCOUNT_MANIFEST
oc delete -f $NAMESPACE_MANIFEST
}

function ppc_namespace() {
# we control the subs, so this is the most reliable way to get the namespace
ns=$( oc get subs -A --field-selector metadata.name='performance-addon-operator-subscription' -o=jsonpath='{.items[0].metadata.namespace}{"\n"}' 2> /dev/null )
# trying again with the pods, which are _usually_ reliable - but users can change them
[ -z "${ns}" ] && ns=$( oc get pods -A -l name='performance-operator' -o=jsonpath='{.items[0].metadata.namespace}{"\n"}' 2> /dev/null )
# namespace suggested by the documentation. This is a fancier way to check for its existence
[ -z "${ns}" ] && ns=$( oc get ns openshift-performance-addon-operator -o jsonpath='{.metadata.name}{"\n"}' 2> /dev/null )
# we should never get there. This is the last resort.
[ -z "${ns}" ] && ns="openshift-operators"
echo ${ns}
}

function version() {
# get version from imageV
version=$( \
oc status | grep '^pod' | \
sed -n -r -e 's/.*([[:digit:]]+\.[[:digit:]]+(:?\.[[:digit:]])?(:?-[^@]+)?).*/\1/p' \
)

# if version not found, fallback to imageID
[ -z "${version}" ] && version=$(oc status | grep '^pod.*runs' | sed -r -e 's/^pod.*runs //')

# if version still not found, use Unknown
[ -z "${version}" ] && version="Unknown"

echo ${version}
}

# generate /must-gather/version file
. version
echo "performance-addon-operator/must-gather" > /must-gather/version
version >> /must-gather/version

. namespace
PPC_NAMESPACE=$( ppc_namespace )

# resource list
resources=()

# performance operator namespace
resources+=(ns/${PPC_NAMESPACE})

# performance operator profiles
resources+=(performanceprofile)

# machine/node resources
resources+=(nodes machineconfigs machineconfigpools featuregates kubeletconfigs tuneds)

# run the collection of resources using must-gather
for resource in ${resources[@]}; do
/usr/bin/oc adm inspect --dest-dir must-gather --all-namespaces ${resource}
done

# Collect nodes details
ppc_nodes

exit 0
103 changes: 102 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,105 @@ module github.com/openshift/must-gather

go 1.17

require github.com/openshift/build-machinery-go v0.0.0-20210423112049-9415d7ebd33e
require (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like mentioned before, I don't think we want to handle it here, it should be built outside of this repo and only installed here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would require an RPM in the package repos. The tool is specific to the must gather data collection though. Also we want the tool to be always in sync with the MG and OCP versions. Building it here is actually much much easier and more reliable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on what I was told it is already built as RPM so that shouldn't be the problem.

github.com/marioferh/gather-sysinfo v0.0.1
github.com/openshift/build-machinery-go v0.0.0-20210423112049-9415d7ebd33e
)

require (
github.com/Microsoft/go-winio v0.4.17 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.5 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/cadvisor v0.44.1 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/uuid v1.1.2 // indirect
github.com/imdario/mergo v0.3.5 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jaypipes/ghw v0.8.1-0.20210605191321-eb162add542b // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible // indirect
github.com/moby/sys/mountinfo v0.6.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/openshift-kni/debug-tools v0.1.8 // indirect
github.com/prometheus/client_golang v1.12.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
github.com/spf13/cobra v1.4.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/net v0.1.0 // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/term v0.1.0 // indirect
golang.org/x/text v0.4.0 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220107163113-42d7afdf6368 // indirect
google.golang.org/grpc v1.40.0 // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.24.2 // indirect
k8s.io/apimachinery v0.24.2 // indirect
k8s.io/apiserver v0.24.2 // indirect
k8s.io/client-go v0.24.2 // indirect
k8s.io/component-base v0.24.2 // indirect
k8s.io/klog/v2 v2.60.1 // indirect
k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42 // indirect
k8s.io/kubelet v0.23.0 // indirect
k8s.io/kubernetes v0.24.1 // indirect
k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 // indirect
sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
sigs.k8s.io/yaml v1.2.0 // indirect
)

// Pinned to kubernetes-1.24.2
replace (
k8s.io/api => k8s.io/api v0.24.2
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.2
k8s.io/apimachinery => k8s.io/apimachinery v0.24.2
k8s.io/apiserver => k8s.io/apiserver v0.24.2
k8s.io/cli-runtime => k8s.io/cli-runtime v0.24.2
k8s.io/client-go => k8s.io/client-go v0.24.2
k8s.io/cloud-provider => k8s.io/cloud-provider v0.24.2
k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.24.2
k8s.io/code-generator => k8s.io/code-generator v0.24.2
k8s.io/component-base => k8s.io/component-base v0.24.2
k8s.io/component-helpers => k8s.io/component-helpers v0.24.2
k8s.io/controller-manager => k8s.io/controller-manager v0.24.2
k8s.io/cri-api => k8s.io/cri-api v0.24.2
k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.24.2
k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.24.2
k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.24.2
k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.2
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.2
k8s.io/kubectl => k8s.io/kubectl v0.24.2
k8s.io/kubelet => k8s.io/kubelet v0.24.2
k8s.io/kubernetes => k8s.io/kubernetes v1.24.2
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.2
k8s.io/metrics => k8s.io/metrics v0.24.2
k8s.io/mount-utils => k8s.io/mount-utils v0.24.2
k8s.io/pod-security-admission => k8s.io/pod-security-admission v0.24.2
k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.24.2
sigs.k8s.io/controller-runtime => sigs.k8s.io/controller-runtime v0.11.1
sigs.k8s.io/controller-tools => sigs.k8s.io/controller-tools v0.7.0
)
Loading