diff --git a/scripts/ci-e2e.sh b/scripts/ci-e2e.sh index b5861a29fd5..ec5c87c1881 100755 --- a/scripts/ci-e2e.sh +++ b/scripts/ci-e2e.sh @@ -78,12 +78,15 @@ export KIND_EXPERIMENTAL_DOCKER_NETWORK="bridge" # Generate SSH key. AZURE_SSH_PUBLIC_KEY_FILE=${AZURE_SSH_PUBLIC_KEY_FILE:-""} if [ -z "${AZURE_SSH_PUBLIC_KEY_FILE}" ]; then + echo "generating sshkey for e2e" SSH_KEY_FILE=.sshkey rm -f "${SSH_KEY_FILE}" 2>/dev/null ssh-keygen -t rsa -b 2048 -f "${SSH_KEY_FILE}" -N '' 1>/dev/null AZURE_SSH_PUBLIC_KEY_FILE="${SSH_KEY_FILE}.pub" fi export AZURE_SSH_PUBLIC_KEY_B64=$(cat "${AZURE_SSH_PUBLIC_KEY_FILE}" | base64 | tr -d '\r\n') +# Windows sets the public key via cloudbase-init which take the raw text as input +export AZURE_SSH_PUBLIC_KEY=$(cat "${AZURE_SSH_PUBLIC_KEY_FILE}" | tr -d '\r\n') cleanup() { ${REPO_ROOT}/hack/log/redact.sh || true diff --git a/test/e2e/azure_logcollector.go b/test/e2e/azure_logcollector.go index 00e9fa83b86..aa310a4e091 100644 --- a/test/e2e/azure_logcollector.go +++ b/test/e2e/azure_logcollector.go @@ -23,9 +23,11 @@ import ( "io/ioutil" "net/http" "path/filepath" - "sigs.k8s.io/cluster-api-provider-azure/azure" "strings" + "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" + "sigs.k8s.io/cluster-api-provider-azure/azure" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-30/compute" autorest "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure/auth" @@ -40,8 +42,7 @@ import ( type AzureLogCollector struct{} // CollectMachineLog collects logs from a machine. -func (k AzureLogCollector) CollectMachineLog(ctx context.Context, - managementClusterClient client.Client, m *clusterv1.Machine, outputPath string) error { +func (k AzureLogCollector) CollectMachineLog(ctx context.Context, managementClusterClient client.Client, m *clusterv1.Machine, outputPath string) error { var errors []error if err := collectLogsFromNode(ctx, managementClusterClient, m, outputPath); err != nil { @@ -62,8 +63,25 @@ func collectLogsFromNode(ctx context.Context, managementClusterClient client.Cli if err != nil { return err } + + Logf("INFO: Collecting logs for machine %s in cluster %s in namespace %s\n", m.GetName(), cluster.Name, cluster.Namespace) + isWindows, err := isNodeWindows(ctx, managementClusterClient, m) + if err != nil { + return err + } + controlPlaneEndpoint := cluster.Spec.ControlPlaneEndpoint.Host hostname := m.Spec.InfrastructureRef.Name + if isWindows { + // Windows host name ends up being different than the infra machine name + // due to Windows name limitations in Azure so use ipaddress instead + if len(m.Status.Addresses) > 0 { + hostname = m.Status.Addresses[0].Address + } else { + Logf("INFO: Unable to collect logs as node doesn't have addresses") + } + } + port := e2eConfig.GetVariable(VMSSHPort) execToPathFn := func(outputFileName, command string, args ...string) func() error { return func() error { @@ -78,7 +96,34 @@ func collectLogsFromNode(ctx context.Context, managementClusterClient client.Cli } } - return kinderrors.AggregateConcurrent([]func() error{ + if isWindows { + // if we initiate to many ssh connections they get dropped (default is 10) so split it up + var errors []error + errors = append(errors, kinderrors.AggregateConcurrent(windowsInfo(execToPathFn))) + errors = append(errors, kinderrors.AggregateConcurrent(windowsK8sLogs(execToPathFn))) + errors = append(errors, kinderrors.AggregateConcurrent(windowsNetworkLogs(execToPathFn))) + return kinderrors.NewAggregate(errors) + } + + return kinderrors.AggregateConcurrent(linuxLogs(execToPathFn)) +} + +func isNodeWindows(ctx context.Context, managementClusterClient client.Client, m *clusterv1.Machine) (bool, error) { + key := client.ObjectKey{ + Namespace: m.Spec.InfrastructureRef.Namespace, + Name: m.Spec.InfrastructureRef.Name, + } + + azMachine := &v1alpha4.AzureMachine{} + if err := managementClusterClient.Get(ctx, key, azMachine); err != nil { + return false, err + } + + return azMachine.Spec.OSDisk.OSType == azure.WindowsOS, nil +} + +func linuxLogs(execToPathFn func(outputFileName string, command string, args ...string) func() error) []func() error { + return []func() error{ execToPathFn( "journal.log", "journalctl", "--no-pager", "--output=short-precise", @@ -107,7 +152,102 @@ func collectLogsFromNode(ctx context.Context, managementClusterClient client.Cli "cloud-init-output.log", "cat", "/var/log/cloud-init-output.log", ), - }) + } +} + +func windowsK8sLogs(execToPathFn func(outputFileName string, command string, args ...string) func() error) []func() error { + return []func() error{ + execToPathFn( + "hyperv-operation.log", + "Get-WinEvent", "-LogName Microsoft-Windows-Hyper-V-Compute-Operational | Select-Object -Property TimeCreated, Id, LevelDisplayName, Message | Sort-Object TimeCreated | Format-Table -Wrap -Autosize", + ), + execToPathFn( + "docker.log", + "get-eventlog", "-LogName Application -Source Docker | Select-Object Index, TimeGenerated, EntryType, Message | Sort-Object Index | Format-Table -Wrap -Autosize", + ), + execToPathFn( + "containers.log", + "docker", "ps -a", + ), + execToPathFn( + "containers-hcs.log", + "hcsdiag", "list", + ), + execToPathFn( + "kubelet.log", + `Get-ChildItem "C:\\var\\log\\kubelet\\" | ForEach-Object { write-output "$_" ;cat "c:\\var\\log\\kubelet\\$_" }`, + ), + } +} + +func windowsInfo(execToPathFn func(outputFileName string, command string, args ...string) func() error) []func() error { + return []func() error{ + execToPathFn( + "reboots.log", + "Get-WinEvent", `-ErrorAction Ignore -FilterHashtable @{logname = 'System'; id = 1074, 1076, 2004, 6005, 6006, 6008 } | Select-Object -Property TimeCreated, Id, LevelDisplayName, Message | Format-Table -Wrap -Autosize`, + ), + execToPathFn( + "scm.log", + "Get-WinEvent", `-FilterHashtable @{logname = 'System'; ProviderName = 'Service Control Manager' } | Select-Object -Property TimeCreated, Id, LevelDisplayName, Message | Format-Table -Wrap -Autosize`, + ), + execToPathFn( + "pagefile.log", + "Get-CimInstance", "win32_pagefileusage | Format-List *", + ), + execToPathFn( + "cloudbase-init-unattend.log", + "get-content 'C:\\Program Files\\Cloudbase Solutions\\Cloudbase-Init\\log\\cloudbase-init-unattend.log'", + ), + execToPathFn( + "cloudbase-init.log", + "get-content 'C:\\Program Files\\Cloudbase Solutions\\Cloudbase-Init\\log\\cloudbase-init.log'", + ), + execToPathFn( + "services.log", + "get-service", + ), + } +} + +func windowsNetworkLogs(execToPathFn func(outputFileName string, command string, args ...string) func() error) []func() error { + return []func() error{ + execToPathFn( + "network.log", + "Get-HnsNetwork | Select Name, Type, Id, AddressPrefix | Format-Table -Wrap -Autosize", + ), + execToPathFn( + "network-detailed.log", + "Get-hnsnetwork | Convertto-json -Depth 20", + ), + execToPathFn( + "network-individual-detailed.log", + "Get-hnsnetwork | % { Get-HnsNetwork -Id $_.ID -Detailed } | Convertto-json -Depth 20", + ), + execToPathFn( + "hnsendpoints.log", + "Get-HnsEndpoint | Select IpAddress, MacAddress, IsRemoteEndpoint, State", + ), + execToPathFn( + "hnsendpolicy-detailed.log", + "Get-hnspolicylist | Convertto-json -Depth 20", + ), + execToPathFn( + "ipconfig.log", + "ipconfig /allcompartments /all", + ), + execToPathFn( + "ips.log", + "Get-NetIPAddress -IncludeAllCompartments", + ), + execToPathFn( + "interfaces.log", + "Get-NetIPInterface -IncludeAllCompartments", + ), + execToPathFn( + "hnsdiag.txt", + "hnsdiag list all -d", + ), + } } // collectBootLog collects boot logs of the vm by using azure boot diagnostics