From 9361eb3afb524372c8185fe80ef9f1fbd020ae46 Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 12 Aug 2022 08:46:16 +0200 Subject: [PATCH 1/3] cmd/tetragon-vmtests-run: fix --just-boot --just-boot does not work anymore because the service is executed even if it was not enabled. Not sure why. As a simple solution, don't add the service if user specifies --just-boot. Signed-off-by: Kornilios Kourtis --- cmd/tetragon-vmtests-run/image.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cmd/tetragon-vmtests-run/image.go b/cmd/tetragon-vmtests-run/image.go index ded5ccd43d1..3c7e3475c20 100644 --- a/cmd/tetragon-vmtests-run/image.go +++ b/cmd/tetragon-vmtests-run/image.go @@ -91,10 +91,8 @@ func buildTesterService(rcnf *RunConf, tmpDir string) ([]images.Action, error) { */ } - if !rcnf.justBoot { - enableTester := images.Action{Op: &images.RunCommand{Cmd: "systemctl enable tetragon-tester.service"}} - actions = append(actions, enableTester) - } + enableTester := images.Action{Op: &images.RunCommand{Cmd: "systemctl enable tetragon-tester.service"}} + actions = append(actions, enableTester) return actions, nil } @@ -132,7 +130,7 @@ func buildTesterActions(rcnf *RunConf, tmpDir string) ([]images.Action, error) { Op: &images.CopyInCommand{LocalPath: tmpConfFile, RemoteDir: remoteConfDir}, }) - if !rcnf.useTetragonTesterInit { + if !rcnf.useTetragonTesterInit && !rcnf.justBoot { acts, err := buildTesterService(rcnf, tmpDir) if err != nil { return nil, err From fa862fdc680d12f00d807c912d33393f44108fc4 Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 12 Aug 2022 08:46:16 +0200 Subject: [PATCH 2/3] gh/vmtests: actually panic on RCU stalls This reverts commit dd396c68e8b5a511b9c2ad582bc08e7b2726bbcf, which was setting /proc/sys/kernel/panic_on_rcu_stall on the host system rather than the VM and was thus innefective. Instead, add an entry to /etc/sysctl.d/local.conf that properly sets this inside the VM. Signed-off-by: Kornilios Kourtis --- .github/workflows/vmtests.yml | 4 ---- cmd/tetragon-vmtests-run/image.go | 4 ++++ go.mod | 2 +- go.sum | 4 ++-- .../little-vm-helper/pkg/images/actions.go | 18 ++++++++++++++++++ .../cilium/little-vm-helper/pkg/images/conf.go | 2 ++ .../pkg/images/step_create_image.go | 9 +++++++-- vendor/modules.txt | 2 +- 8 files changed, 35 insertions(+), 10 deletions(-) diff --git a/.github/workflows/vmtests.yml b/.github/workflows/vmtests.yml index be8cb5a9bbe..24226698386 100644 --- a/.github/workflows/vmtests.yml +++ b/.github/workflows/vmtests.yml @@ -92,10 +92,6 @@ jobs: run: | sudo chmod go+rX -R /boot/ - - name: set panic on RCU stall - run: | - sudo sh -c 'echo 1 > /proc/sys/kernel/panic_on_rcu_stall' - - name: download build data uses: actions/download-artifact@v3 with: diff --git a/cmd/tetragon-vmtests-run/image.go b/cmd/tetragon-vmtests-run/image.go index 3c7e3475c20..7fefeee49bd 100644 --- a/cmd/tetragon-vmtests-run/image.go +++ b/cmd/tetragon-vmtests-run/image.go @@ -166,6 +166,10 @@ func buildTestImage(log *logrus.Logger, rcnf *RunConf) error { {Op: &images.SetHostnameCommand{Hostname: hostname}}, // NB: some of the tetragon tests expect a /usr/bin/cp {Op: &images.RunCommand{Cmd: "cp /bin/cp /usr/bin/cp"}}, + {Op: &images.AppendLineCommand{ + File: "/etc/sysctl.d/local.conf", + Line: "kernel.panic_on_rcu_stall=1", + }}, } actions = append(actions, fsActions...) actions = append(actions, testerActions...) diff --git a/go.mod b/go.mod index 7f77ff45e13..0d1edb48313 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/cilium/cilium-e2e v0.0.0-00010101000000-000000000000 github.com/cilium/ebpf v0.9.0 github.com/cilium/hubble v0.5.3-0.20220311154618-3e44df066567 - github.com/cilium/little-vm-helper v0.0.0-20220729082344-f2ca5b097f67 + github.com/cilium/little-vm-helper v0.0.0-20220812055014-101c3e342e13 github.com/cilium/lumberjack/v2 v2.2.2 github.com/cilium/tetragon/api v0.0.0-00010101000000-000000000000 github.com/cilium/tetragon/pkg/k8s v0.0.0-00010101000000-000000000000 diff --git a/go.sum b/go.sum index f96997d7534..08df0d6f485 100644 --- a/go.sum +++ b/go.sum @@ -183,8 +183,8 @@ github.com/cilium/hubble v0.5.3-0.20220311154618-3e44df066567/go.mod h1:Y522N+Ka github.com/cilium/ipam v0.0.0-20200217195329-a46f8d55f9db/go.mod h1:URWgSDyRFKKBgnY4Svj37siCG145nu3qJt6oHlZRdqU= github.com/cilium/ipam v0.0.0-20201020084809-76717fcdb3a2/go.mod h1:Ascfar4FtgB+K+mwqbZpSb3WVZ5sPFIarg+iAOXNZqI= github.com/cilium/kafka v0.0.0-20180809090225-01ce283b732b/go.mod h1:ktgizta3CPZBKz5uW272SJyjiro0vn4nOVP7Pk4RopA= -github.com/cilium/little-vm-helper v0.0.0-20220729082344-f2ca5b097f67 h1:gVu/1IyhT3t8fDGYVeDFJG3FCknd0lOFHmU6gkgHtlo= -github.com/cilium/little-vm-helper v0.0.0-20220729082344-f2ca5b097f67/go.mod h1:Ya+Z4TOpZlsTtFZg3teKp9lzNd/BTgC4MlRN4drM328= +github.com/cilium/little-vm-helper v0.0.0-20220812055014-101c3e342e13 h1:R1X8fZxqDnOZqDEWIcWR3t5utglPk2cWzS8VPBf1PFE= +github.com/cilium/little-vm-helper v0.0.0-20220812055014-101c3e342e13/go.mod h1:Ya+Z4TOpZlsTtFZg3teKp9lzNd/BTgC4MlRN4drM328= github.com/cilium/lumberjack/v2 v2.2.2 h1:RKTdhb63DY0Xu7pE1pipMj7Zq28LyvBGSrCneHiKm4A= github.com/cilium/lumberjack/v2 v2.2.2/go.mod h1:yfbtPGmg4i//5oEqzaMxDqSWqgfZFmMoV70Mc2k6v0A= github.com/cilium/proxy v0.0.0-20191113190709-4c7b379792e6/go.mod h1:lbRnBzpxwMP5KsTu99cM654ShwTWamyhrF6cCLuYqhE= diff --git a/vendor/github.com/cilium/little-vm-helper/pkg/images/actions.go b/vendor/github.com/cilium/little-vm-helper/pkg/images/actions.go index f856fdda983..2ba721c775a 100644 --- a/vendor/github.com/cilium/little-vm-helper/pkg/images/actions.go +++ b/vendor/github.com/cilium/little-vm-helper/pkg/images/actions.go @@ -30,6 +30,7 @@ var actionOpInstances = []ActionOp{ &MkdirCommand{}, &UploadCommand{}, &ChmodCommand{}, + &AppendLineCommand{}, } type VirtCustomizeAction struct { @@ -135,3 +136,20 @@ func (c *ChmodCommand) ToStep(s *StepConf) multistep.Step { Args: []string{"--chmod", fmt.Sprintf("%s:%s", c.Permissions, c.File)}, } } + +// AppendLineCommand +type AppendLineCommand struct { + File string + Line string +} + +func (c *AppendLineCommand) ActionOpName() string { + return "append-line" +} + +func (c *AppendLineCommand) ToStep(s *StepConf) multistep.Step { + return &VirtCustomizeStep{ + StepConf: s, + Args: []string{"--append-line", fmt.Sprintf("%s:%s", c.File, c.Line)}, + } +} diff --git a/vendor/github.com/cilium/little-vm-helper/pkg/images/conf.go b/vendor/github.com/cilium/little-vm-helper/pkg/images/conf.go index 3426cc80c49..1e9cee5eab7 100644 --- a/vendor/github.com/cilium/little-vm-helper/pkg/images/conf.go +++ b/vendor/github.com/cilium/little-vm-helper/pkg/images/conf.go @@ -6,6 +6,8 @@ type ImgConf struct { Name string `json:"name"` // Parent is the name parent image (or "" if image does not have a parent) Parent string `json:"parent,omitempty"` + // ImageSize is the size of the image (defaults to images.DefaultImageSize) + ImageSize string `json:"image_size,omitempty"` // Packages is the list of packages contained in the image Packages []string `json:"packages"` // Actions is a list of additional actions for building the image. diff --git a/vendor/github.com/cilium/little-vm-helper/pkg/images/step_create_image.go b/vendor/github.com/cilium/little-vm-helper/pkg/images/step_create_image.go index 766efa32cca..7ebbb281782 100644 --- a/vendor/github.com/cilium/little-vm-helper/pkg/images/step_create_image.go +++ b/vendor/github.com/cilium/little-vm-helper/pkg/images/step_create_image.go @@ -90,6 +90,11 @@ func (s *CreateImage) makeRootImage(ctx context.Context) error { } }() + imgSize := DefaultImageSize + if size := s.imgCnf.ImageSize; size != "" { + imgSize = size + } + // example: guestfish -N foo.img=disk:8G -- mkfs ext4 /dev/sda : mount /dev/sda / : tar-in /tmp/foo.tar / if s.bootable { dirname, err := os.MkdirTemp("", "extlinux-") @@ -105,7 +110,7 @@ func (s *CreateImage) makeRootImage(ctx context.Context) error { } cmd = exec.CommandContext(ctx, GuestFish, - "-N", fmt.Sprintf("%s=disk:%s", imgFname, DefaultImageSize), + "-N", fmt.Sprintf("%s=disk:%s", imgFname, imgSize), "--", "part-disk", "/dev/sda", "mbr", ":", @@ -123,7 +128,7 @@ func (s *CreateImage) makeRootImage(ctx context.Context) error { ) } else { cmd = exec.CommandContext(ctx, GuestFish, - "-N", fmt.Sprintf("%s=disk:%s", imgFname, DefaultImageSize), + "-N", fmt.Sprintf("%s=disk:%s", imgFname, imgSize), "--", "mkfs", "ext4", "/dev/sda", ":", diff --git a/vendor/modules.txt b/vendor/modules.txt index 9764fccedb4..bd47b6414c9 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -74,7 +74,7 @@ github.com/cilium/hubble/pkg/k8s github.com/cilium/hubble/pkg/parser/endpoint github.com/cilium/hubble/pkg/parser/getters github.com/cilium/hubble/pkg/servicecache -# github.com/cilium/little-vm-helper v0.0.0-20220729082344-f2ca5b097f67 +# github.com/cilium/little-vm-helper v0.0.0-20220812055014-101c3e342e13 ## explicit; go 1.18 github.com/cilium/little-vm-helper/pkg/images github.com/cilium/little-vm-helper/pkg/logcmd From 8efe9bf1cfb60296fcbbc943e2d793a6e77ca121 Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 12 Aug 2022 08:46:16 +0200 Subject: [PATCH 3/3] cmd/tetragon-vmtests-run: buffer qemu output We 've been seeing RCU stalls such as, when running qemu in GH: Running test pkg.sensors.test.TestSensorLseekLoad .[ 116.892213] rcu: INFO: rcu_sched self-detected stall on CPU [ 116.892213] rcu: 0-...!: (20987 ticks this GP) idle=d3e/1/0x4000000000000002 softirq=23120/23120 fqs=0 [ 116.892213] (t=21004 jiffies g=49257 q=8) [ 116.892213] rcu: rcu_sched kthread starved for 21004 jiffies! g49257 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 [ 116.892213] rcu: RCU grace-period kthread stack dump: [ 116.892213] rcu_sched R running task 14920 11 2 0x90004000 [ 116.892213] Call Trace: [ 116.892213] __schedule+0x288/0x600 [ 116.892213] ? __mod_timer+0x1a6/0x3c0 [ 116.892213] schedule+0x34/0xa0 [ 116.892213] schedule_timeout+0x84/0x140 [ 116.892213] ? __next_timer_interrupt+0xc0/0xc0 [ 116.892213] rcu_gp_kthread+0x4f6/0xd40 [ 116.892213] ? kfree_call_rcu+0x10/0x10 [ 116.892213] kthread+0x107/0x120 [ 116.892213] ? __kthread_bind_mask+0x60/0x60 [ 116.892213] ret_from_fork+0x35/0x40 [ 116.892213] NMI backtrace for cpu 0 [ 116.892213] CPU: 0 PID: 413 Comm: pkg.sensors.tes Not tainted 5.4.209 #1 [ 116.892213] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 116.892213] Call Trace: [ 116.892213] [ 116.892213] dump_stack+0x50/0x63 [ 116.892213] nmi_cpu_backtrace.cold+0x13/0x50 [ 116.892213] ? lapic_can_unplug_cpu+0x60/0x60 [ 116.892213] nmi_trigger_cpumask_backtrace+0x7c/0x90 [ 116.892213] rcu_dump_cpu_stacks+0x7c/0xaa [ 116.892213] rcu_sched_clock_irq.cold+0x1b3/0x39e [ 116.892213] ? can_stop_idle_tick+0x70/0x70 [ 116.892213] update_process_times+0x56/0x90 [ 116.892213] tick_sched_handle+0x2f/0x40 [ 116.892213] tick_sched_timer+0x4b/0xb0 [ 116.892213] __hrtimer_run_queues+0x127/0x2a0 [ 116.892213] hrtimer_interrupt+0xf0/0x280 [ 116.892213] smp_apic_timer_interrupt+0x5d/0x120 [ 116.892213] apic_timer_interrupt+0xf/0x20 [ 116.892213] ... repeted until timeout ... From reading https://www.kernel.org/doc/Documentation/RCU/stallwarn.txt, one of my theories is that writes to the console get delayed and the kernel enters some weird livelock state. This patch buffers qemu output aiming to avoid hitting RCU stalls such as the one above. Signed-off-by: Kornilios Kourtis --- cmd/tetragon-vmtests-run/run_tests.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cmd/tetragon-vmtests-run/run_tests.go b/cmd/tetragon-vmtests-run/run_tests.go index dbcc3f0feaa..c65502f57a1 100644 --- a/cmd/tetragon-vmtests-run/run_tests.go +++ b/cmd/tetragon-vmtests-run/run_tests.go @@ -1,6 +1,7 @@ package main import ( + "bufio" "context" "encoding/json" "fmt" @@ -28,11 +29,17 @@ func runTests( ctx, cancel := signal.NotifyContext(ctx, unix.SIGINT, unix.SIGTERM) defer cancel() qemuCmd := exec.CommandContext(ctx, qemuBin, qemuArgs...) - qemuCmd.Stdout = os.Stdout - qemuCmd.Stderr = os.Stderr + + // buffer output from qemu's stdout/stderr to avoid delays + bout := bufio.NewWriter(os.Stdout) + berr := bufio.NewWriter(os.Stderr) + qemuCmd.Stdout = bout + qemuCmd.Stderr = berr if err := qemuCmd.Run(); err != nil { return nil, err } + bout.Flush() + berr.Flush() fmt.Printf("results directory: %s\n", rcnf.testerConf.ResultsDir) resFile := filepath.Join(rcnf.testerConf.ResultsDir, "results.json")