program, btf: probe correct log buffer size

The kernel exposes a log of operations to aid debugging a program or BTF load. Until recently there was no way to know the size of that buffer, so the library forces the user to specify a buffer size. From a cursory survey on sourcegraph.com it seems that users either hardcode a large fixed buffer or probe the correct size by doubling the buffer size when hitting ENOSPC. Since commit 47a71c1f9af0 ("bpf: Add log_true_size output field to return necessary log buffer size") the kernel does provide a hint to user space. Move probing of the correct log buffer size into the library. On recent (>= 6.4) kernels this is guaranteed to work in a single call. On older kernels we use the doubling strategy employed by cilium and tetragon. Signed-off-by: Lorenz Bauer <[email protected]>
cilium · Jul 3, 2024 · f2b2f6d · f2b2f6d
1 parent 12aca10
commit f2b2f6d
Show file tree

Hide file tree

Showing 6 changed files with 113 additions and 143 deletions.
diff --git a/btf/btf_test.go b/btf/btf_test.go
@@ -316,10 +316,6 @@ func TestVerifierError(t *testing.T) {
 	if !errors.As(err, &ve) {
 		t.Fatalf("expected a VerifierError, got: %v", err)
 	}
-
-	if ve.Truncated {
-		t.Fatalf("expected non-truncated verifier log: %v", err)
-	}
 }
 
 func TestGuessBTFByteOrder(t *testing.T) {

diff --git a/btf/handle.go b/btf/handle.go
@@ -41,6 +41,8 @@ func NewHandle(b *Builder) (*Handle, error) {
 //
 // Returns an error wrapping ErrNotSupported if the kernel doesn't support BTF.
 func NewHandleFromRawBTF(btf []byte) (*Handle, error) {
+	const minLogSize = 64 * 1024
+
 	if uint64(len(btf)) > math.MaxUint32 {
 		return nil, errors.New("BTF exceeds the maximum size")
 	}
@@ -50,26 +52,54 @@ func NewHandleFromRawBTF(btf []byte) (*Handle, error) {
 		BtfSize: uint32(len(btf)),
 	}
 
-	fd, err := sys.BtfLoad(attr)
-	if err == nil {
-		return &Handle{fd, attr.BtfSize, false}, nil
+	var (
+		logBuf []byte
+		err    error
+	)
+	for {
+		var fd *sys.FD
+		fd, err = sys.BtfLoad(attr)
+		if err == nil {
+			return &Handle{fd, attr.BtfSize, false}, nil
+		}
+
+		if attr.BtfLogTrueSize != 0 && attr.BtfLogSize >= attr.BtfLogTrueSize {
+			// The log buffer already has the correct size.
+			break
+		}
+
+		if attr.BtfLogSize != 0 && !errors.Is(err, unix.ENOSPC) {
+			// Up until at least kernel 6.0, the BTF verifier does not return ENOSPC
+			// if there are other verification errors. ENOSPC is only returned when
+			// the BTF blob is correct, a log was requested, and the provided buffer
+			// is too small. We're therefore not sure whether we got the full
+			// log or not.
+			break
+		}
+
+		// Make an educated guess how large the buffer should be. Start
+		// at a reasonable minimum and then double the size.
+		logSize := uint32(max(len(logBuf)*2, minLogSize))
+		if int(logSize) < len(logBuf) {
+			return nil, errors.New("overflow while probing log buffer size")
+		}
+
+		if attr.BtfLogTrueSize != 0 {
+			// The kernel has given us a hint how large the log buffer has to be.
+			logSize = attr.BtfLogTrueSize
+		}
+
+		logBuf = make([]byte, logSize)
+		attr.BtfLogSize = logSize
+		attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
+		attr.BtfLogLevel = 1
 	}
 
 	if err := haveBTF(); err != nil {
 		return nil, err
 	}
 
-	logBuf := make([]byte, 64*1024)
-	attr.BtfLogBuf = sys.NewSlicePointer(logBuf)
-	attr.BtfLogSize = uint32(len(logBuf))
-	attr.BtfLogLevel = 1
-
-	// Up until at least kernel 6.0, the BTF verifier does not return ENOSPC
-	// if there are other verification errors. ENOSPC is only returned when
-	// the BTF blob is correct, a log was requested, and the provided buffer
-	// is too small.
-	_, ve := sys.BtfLoad(attr)
-	return nil, internal.ErrorWithLog("load btf", err, logBuf, errors.Is(ve, unix.ENOSPC))
+	return nil, internal.ErrorWithLog("load btf", err, logBuf)
 }
 
 // NewHandleFromID returns the BTF handle for a given id.

diff --git a/internal/errors.go b/internal/errors.go
@@ -12,7 +12,7 @@ import (
 //
 // The default error output is a summary of the full log. The latter can be
 // accessed via VerifierError.Log or by formatting the error, see Format.
-func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError {
+func ErrorWithLog(source string, err error, log []byte) *VerifierError {
 	const whitespace = "\t\r\v\n "
 
 	// Convert verifier log C string by truncating it on the first 0 byte
@@ -23,7 +23,7 @@ func ErrorWithLog(source string, err error, log []byte, truncated bool) *Verifie
 
 	log = bytes.Trim(log, whitespace)
 	if len(log) == 0 {
-		return &VerifierError{source, err, nil, truncated}
+		return &VerifierError{source, err, nil}
 	}
 
 	logLines := bytes.Split(log, []byte{'\n'})
@@ -34,7 +34,7 @@ func ErrorWithLog(source string, err error, log []byte, truncated bool) *Verifie
 		lines = append(lines, string(bytes.TrimRight(line, whitespace)))
 	}
 
-	return &VerifierError{source, err, lines, truncated}
+	return &VerifierError{source, err, lines}
 }
 
 // VerifierError includes information from the eBPF verifier.
@@ -46,8 +46,6 @@ type VerifierError struct {
 	Cause error
 	// The verifier output split into lines.
 	Log []string
-	// Whether the log output is truncated, based on several heuristics.
-	Truncated bool
 }
 
 func (le *VerifierError) Unwrap() error {
@@ -70,7 +68,7 @@ func (le *VerifierError) Error() string {
 	}
 
 	lines := log[n-1:]
-	if n >= 2 && (includePreviousLine(log[n-1]) || le.Truncated) {
+	if n >= 2 && includePreviousLine(log[n-1]) {
 		// Add one more line of context if it aids understanding the error.
 		lines = log[n-2:]
 	}
@@ -81,22 +79,9 @@ func (le *VerifierError) Error() string {
 	}
 
 	omitted := len(le.Log) - len(lines)
-	if omitted == 0 && !le.Truncated {
-		return b.String()
-	}
-
-	b.WriteString(" (")
-	if le.Truncated {
-		b.WriteString("truncated")
-	}
-
 	if omitted > 0 {
-		if le.Truncated {
-			b.WriteString(", ")
-		}
-		fmt.Fprintf(&b, "%d line(s) omitted", omitted)
+		fmt.Fprintf(&b, " (%d line(s) omitted)", omitted)
 	}
-	b.WriteString(")")
 
 	return b.String()
 }
@@ -188,10 +173,6 @@ func (le *VerifierError) Format(f fmt.State, verb rune) {
 			}
 		}
 
-		if le.Truncated {
-			fmt.Fprintf(f, "\n\t(truncated)")
-		}
-
 	default:
 		fmt.Fprintf(f, "%%!%c(BADVERB)", verb)
 	}

diff --git a/internal/errors_test.go b/internal/errors_test.go
@@ -20,36 +20,33 @@ func TestVerifierErrorWhitespace(t *testing.T) {
 		0, 0, // trailing NUL bytes
 	)
 
-	err := ErrorWithLog("frob", errors.New("test"), b, false)
+	err := ErrorWithLog("frob", errors.New("test"), b)
 	qt.Assert(t, qt.Equals(err.Error(), "frob: test: unreachable insn 28"))
 
 	for _, log := range [][]byte{
 		nil,
 		[]byte("\x00"),
 		[]byte(" "),
 	} {
-		err = ErrorWithLog("frob", errors.New("test"), log, false)
+		err = ErrorWithLog("frob", errors.New("test"), log)
 		qt.Assert(t, qt.Equals(err.Error(), "frob: test"), qt.Commentf("empty log %q has incorrect format", log))
 	}
 }
 
 func TestVerifierErrorWrapping(t *testing.T) {
-	ve := ErrorWithLog("frob", unix.ENOENT, nil, false)
+	ve := ErrorWithLog("frob", unix.ENOENT, nil)
 	qt.Assert(t, qt.ErrorIs(ve, unix.ENOENT), qt.Commentf("should wrap provided error"))
-	qt.Assert(t, qt.IsFalse(ve.Truncated), qt.Commentf("verifier log should not be marked as truncated"))
 
-	ve = ErrorWithLog("frob", unix.EINVAL, nil, true)
+	ve = ErrorWithLog("frob", unix.EINVAL, nil)
 	qt.Assert(t, qt.ErrorIs(ve, unix.EINVAL), qt.Commentf("should wrap provided error"))
-	qt.Assert(t, qt.IsTrue(ve.Truncated), qt.Commentf("verifier log should be marked as truncated"))
 
-	ve = ErrorWithLog("frob", unix.EINVAL, []byte("foo"), false)
+	ve = ErrorWithLog("frob", unix.EINVAL, []byte("foo"))
 	qt.Assert(t, qt.ErrorIs(ve, unix.EINVAL), qt.Commentf("should wrap provided error"))
 	qt.Assert(t, qt.StringContains(ve.Error(), "foo"), qt.Commentf("verifier log should appear in error string"))
 
-	ve = ErrorWithLog("frob", unix.ENOSPC, []byte("foo"), true)
+	ve = ErrorWithLog("frob", unix.ENOSPC, []byte("foo"))
 	qt.Assert(t, qt.ErrorIs(ve, unix.ENOSPC), qt.Commentf("should wrap provided error"))
 	qt.Assert(t, qt.StringContains(ve.Error(), "foo"), qt.Commentf("verifier log should appear in error string"))
-	qt.Assert(t, qt.IsTrue(ve.Truncated), qt.Commentf("verifier log should be marked truncated"))
 }
 
 func TestVerifierErrorSummary(t *testing.T) {
@@ -84,5 +81,5 @@ func readErrorFromFile(tb testing.TB, file string) *VerifierError {
 		tb.Fatal("Read file:", err)
 	}
 
-	return ErrorWithLog("file", unix.EINVAL, contents, false)
+	return ErrorWithLog("file", unix.EINVAL, contents)
 }
diff --git a/prog.go b/prog.go
@@ -46,13 +46,9 @@ const (
 	outputPad = 256 + 2
 )
 
-// DefaultVerifierLogSize is the default number of bytes allocated for the
+// minVerifierLogSize is the default number of bytes allocated for the
 // verifier log.
-const DefaultVerifierLogSize = 64 * 1024
-
-// maxVerifierLogSize is the maximum size of verifier log buffer the kernel
-// will accept before returning EINVAL.
-const maxVerifierLogSize = math.MaxUint32 >> 2
+const minVerifierLogSize = 64 * 1024
 
 // ProgramOptions control loading a program into the kernel.
 type ProgramOptions struct {
@@ -73,17 +69,6 @@ type ProgramOptions struct {
 	// attempt at loading the program.
 	LogLevel LogLevel
 
-	// Controls the output buffer size for the verifier log, in bytes. See the
-	// documentation on ProgramOptions.LogLevel for details about how this value
-	// is used.
-	//
-	// If this value is set too low to fit the verifier log, the resulting
-	// [ebpf.VerifierError]'s Truncated flag will be true, and the error string
-	// will also contain a hint to that effect.
-	//
-	// Defaults to DefaultVerifierLogSize.
-	LogSize int
-
 	// Disables the verifier log completely, regardless of other options.
 	LogDisabled bool
 
@@ -262,10 +247,6 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
 		return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
 	}
 
-	if opts.LogSize < 0 {
-		return nil, errors.New("ProgramOptions.LogSize must be a positive value; disable verifier logs using ProgramOptions.LogDisabled")
-	}
-
 	// Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load")
 	// require the version field to be set to the value of the KERNEL_VERSION
 	// macro for kprobe-type programs.
@@ -404,37 +385,59 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
 		}
 	}
 
-	if opts.LogSize == 0 {
-		opts.LogSize = DefaultVerifierLogSize
-	}
-
-	// The caller requested a specific verifier log level. Set up the log buffer.
+	// The caller requested a specific verifier log level. Set up the log buffer
+	// so that there is a chance of loading the program in a single shot.
 	var logBuf []byte
 	if !opts.LogDisabled && opts.LogLevel != 0 {
-		logBuf = make([]byte, opts.LogSize)
+		logBuf = make([]byte, minVerifierLogSize)
 		attr.LogLevel = opts.LogLevel
 		attr.LogSize = uint32(len(logBuf))
 		attr.LogBuf = sys.NewSlicePointer(logBuf)
 	}
 
-	fd, err := sys.ProgLoad(attr)
-	if err == nil {
-		return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
-	}
+	for {
+		var fd *sys.FD
+		fd, err = sys.ProgLoad(attr)
+		if err == nil {
+			return &Program{unix.ByteSliceToString(logBuf), fd, spec.Name, "", spec.Type}, nil
+		}
 
-	// An error occurred loading the program, but the caller did not explicitly
-	// enable the verifier log. Re-run with branch-level verifier logs enabled to
-	// obtain more info. Preserve the original error to return it to the caller.
-	// An undersized log buffer will result in ENOSPC regardless of the underlying
-	// cause.
-	var err2 error
-	if !opts.LogDisabled && opts.LogLevel == 0 {
-		logBuf = make([]byte, opts.LogSize)
-		attr.LogLevel = LogLevelBranch
-		attr.LogSize = uint32(len(logBuf))
-		attr.LogBuf = sys.NewSlicePointer(logBuf)
+		if opts.LogDisabled {
+			break
+		}
 
-		_, err2 = sys.ProgLoad(attr)
+		if attr.LogTrueSize != 0 && attr.LogSize >= attr.LogTrueSize {
+			// The log buffer already has the correct size.
+			break
+		}
+
+		if attr.LogSize != 0 && !errors.Is(err, unix.ENOSPC) {
+			// Logging is enabled and the error is not ENOSPC, so we can infer
+			// that the log buffer is large enough.
+			break
+		}
+
+		if attr.LogLevel == 0 {
+			// Logging is not enabled but loading the program failed. Enable
+			// basic logging.
+			attr.LogLevel = LogLevelBranch
+		}
+
+		// Make an educated guess how large the buffer should be. Start
+		// at minVerifierLogSize and then double the size.
+		logSize := uint32(max(len(logBuf)*2, minVerifierLogSize))
+		if int(logSize) < len(logBuf) {
+			return nil, errors.New("overflow while probing log buffer size")
+		}
+
+		if attr.LogTrueSize != 0 {
+			// The kernel has given us a hint how large the log buffer has to be.
+			logSize = attr.LogTrueSize
+		}
+
+		logBuf = make([]byte, logSize)
+		attr.LogSize = logSize
+		attr.LogBuf = sys.NewSlicePointer(logBuf)
 	}
 
 	end := bytes.IndexByte(logBuf, 0)
@@ -452,10 +455,6 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
 		}
 
 	case errors.Is(err, unix.EINVAL):
-		if opts.LogSize > maxVerifierLogSize {
-			return nil, fmt.Errorf("load program: %w (ProgramOptions.LogSize exceeds maximum value of %d)", err, maxVerifierLogSize)
-		}
-
 		if bytes.Contains(tail, coreBadCall) {
 			err = errBadRelocation
 			break
@@ -479,8 +478,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, er
 		}
 	}
 
-	truncated := errors.Is(err, unix.ENOSPC) || errors.Is(err2, unix.ENOSPC)
-	return nil, internal.ErrorWithLog("load program", err, logBuf, truncated)
+	return nil, internal.ErrorWithLog("load program", err, logBuf)
 }
 
 // NewProgramFromFD creates a program from a raw fd.