diff --git a/hack/log/log-dump-daemonset-windows.yaml b/hack/log/log-dump-daemonset-windows.yaml new file mode 100644 index 00000000000..db23e9c4676 --- /dev/null +++ b/hack/log/log-dump-daemonset-windows.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: log-dump-node-windows +spec: + selector: + matchLabels: + app: log-dump-node-windows + template: + metadata: + labels: + app: + log-dump-node-windows + spec: + securityContext: + windowsOptions: + runAsUserName: ContainerAdministrator + containers: + - name: log-dump-node-windows + image: mcr.microsoft.com/oss/kubernetes/pause:3.6 + volumeMounts: + - name: varlog + mountPath: /var/log + nodeSelector: + kubernetes.io/os: windows + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + volumes: + - name: varlog + hostPath: + path: /var/log diff --git a/hack/log/log-dump.sh b/hack/log/log-dump.sh index e198ec1d990..e716536623a 100755 --- a/hack/log/log-dump.sh +++ b/hack/log/log-dump.sh @@ -127,8 +127,52 @@ dump_workload_cluster_logs() { wait } +dump_workload_cluster_logs_windows() { + echo "Deploying log-dump-daemonset-windows" + "${KUBECTL}" apply -f "${REPO_ROOT}/hack/log/log-dump-daemonset-windows.yaml" + echo "Waiting for log-dump-daemonset-windows" + "${KUBECTL}" wait pod -l app=log-dump-node-windows --for=condition=Ready --timeout=5m + + IFS=" " read -ra log_dump_pods <<< "$(kubectl get pod -l app=log-dump-node-windows -ojsonpath='{.items[*].metadata.name}')" + + for log_dump_pod in "${log_dump_pods[@]}"; do + local node_name + node_name="$(get_node_name "${log_dump_pod}")" + echo "Getting logs for node ${node_name}" + + local log_dump_dir="${ARTIFACTS}/workload-cluster/${node_name}" + mkdir -p "${log_dump_dir}" + + # make a new folder to copy logs to since files cannot be read to directly + "${KUBECTL}" exec "${log_dump_pod}" -- cmd.exe /c mkdir log + "${KUBECTL}" exec "${log_dump_pod}" -- cmd.exe /c xcopy /s c:\\var\\log\\kubelet c:\\log\\ + "${KUBECTL}" exec "${log_dump_pod}" -- cmd.exe /c xcopy /s c:\\var\\log\\pods c:\\log\\ + + # Get a list of all of the files to copy with dir + # /s - recurse + # /B - bare format (no heading info or summaries) + # /A-D - exclude directories + IFS=" " read -ra log_dump_files <<< "$(kubectl exec "${log_dump_pod}" -- cmd.exe /c dir /s /B /A-D log | tr '\n' ' ' | tr -d '\r' )" + echo "Collecting pod logs" + + for log_dump_file in "${log_dump_files[@]}"; do + echo " Getting logfile ${log_dump_file}" + # reverse slashes and remove c:\log\ from paths + fixed_dump_file_path="$(echo "${log_dump_file//\\//}" | cut -d "/" -f3-)" + dir="$(dirname "${fixed_dump_file_path}")" + file="$(basename "${fixed_dump_file_path}")" + mkdir -p "${log_dump_dir}"/"${dir}" + "${KUBECTL}" exec "${log_dump_pod}" -- cmd.exe /c type "${log_dump_file}" > "${log_dump_dir}"/"${dir}"/"${file}" + done + + echo "Exported logs for node \"${node_name}\"" + done + +} + cleanup() { "${KUBECTL}" delete -f "${REPO_ROOT}/hack/log/log-dump-daemonset.yaml" || true + "${KUBECTL}" delete -f "${REPO_ROOT}/hack/log/log-dump-daemonset-windows.yaml" || true # shellcheck source=hack/log/redact.sh source "${REPO_ROOT}/hack/log/redact.sh" } @@ -138,5 +182,12 @@ trap cleanup EXIT echo "================ DUMPING LOGS FOR MANAGEMENT CLUSTER ================" dump_mgmt_cluster_logs -echo "================ DUMPING LOGS FOR WORKLOAD CLUSTER ================" +echo "================ DUMPING LOGS FOR WORKLOAD CLUSTER (Linux) ==========" dump_workload_cluster_logs + +if [[ -z "${TEST_WINDOWS}" ]]; then + echo "TEST_WINDOWS envvar not set, skipping log collection for Windows nodes." +else + echo "================ DUMPING LOGS FOR WORKLOAD CLUSTER (Windows) ========" + dump_workload_cluster_logs_windows +fi diff --git a/scripts/ci-entrypoint.sh b/scripts/ci-entrypoint.sh index 3819e76cd03..1a7bc38effa 100755 --- a/scripts/ci-entrypoint.sh +++ b/scripts/ci-entrypoint.sh @@ -112,10 +112,10 @@ create_cluster() { } wait_for_nodes() { - echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s) and ${WORKER_MACHINE_COUNT} worker machine(s) to become Ready" + echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT} windows machine(s) to become Ready" # Ensure that all nodes are registered with the API server before checking for readiness - local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT))" + local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))" while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do sleep 10 done