diff --git a/interpreter/ruby/ec.go b/interpreter/ruby/ec.go new file mode 100644 index 000000000..8ce15d6d4 --- /dev/null +++ b/interpreter/ruby/ec.go @@ -0,0 +1,69 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ruby // import "go.opentelemetry.io/ebpf-profiler/interpreter/ruby" + +import ( + "debug/elf" + "fmt" + + "go.opentelemetry.io/ebpf-profiler/asm/amd" + "go.opentelemetry.io/ebpf-profiler/asm/arm" + "go.opentelemetry.io/ebpf-profiler/internal/log" + "go.opentelemetry.io/ebpf-profiler/libpf" + "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" +) + +// extractEcTLSOffset extracts the direct TP-relative TLS offset for ruby_current_ec +// by disassembling rb_current_ec_noinline. This is used for statically-linked Ruby +// binaries where TLS descriptors are not available. +// +// The function uses the same TLS extraction infrastructure as Python 3.13+ +// (asm/amd.ExtractTLSOffset and asm/arm.ExtractTLSOffset). +func extractEcTLSOffset(ef *pfelf.File) (int64, error) { + symbolName := libpf.SymbolName("rb_current_ec_noinline") + sym, code, err := ef.SymbolData(symbolName, 2048) + if err != nil { + // Fallback: try VisitSymbols for binaries with local symbols not in .dynsym + sym = &libpf.Symbol{} + found := false + if visitErr := ef.VisitSymbols(func(s libpf.Symbol) bool { + if s.Name == symbolName { + data, readErr := ef.VirtualMemory(int64(s.Address), int(s.Size), 2048) + if readErr != nil { + log.Errorf("Failed to read memory for %s: %v", symbolName, readErr) + } else { + code = data + sym.Address = s.Address + found = true + } + return false + } + return true + }); visitErr != nil { + return 0, fmt.Errorf("failed to visit symbols: %w", visitErr) + } + if !found { + return 0, fmt.Errorf("symbol %s not found", symbolName) + } + } + + if len(code) < 4 { + return 0, fmt.Errorf("%s function too small (%d bytes)", symbolName, len(code)) + } + + var offset int32 + switch ef.Machine { + case elf.EM_X86_64: + offset, err = amd.ExtractTLSOffset(code, uint64(sym.Address), nil) + case elf.EM_AARCH64: + offset, err = arm.ExtractTLSOffset(code, uint64(sym.Address), ef) + default: + return 0, fmt.Errorf("unsupported architecture: %s", ef.Machine) + } + if err != nil { + return 0, fmt.Errorf("failed to extract TLS offset from %s: %w", symbolName, err) + } + + return int64(offset), nil +} diff --git a/interpreter/ruby/ec_test.go b/interpreter/ruby/ec_test.go new file mode 100644 index 000000000..34dca526f --- /dev/null +++ b/interpreter/ruby/ec_test.go @@ -0,0 +1,77 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ruby + +import ( + "debug/elf" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/asm/amd" + "go.opentelemetry.io/ebpf-profiler/asm/arm" +) + +func TestExtractEcTLSOffset(t *testing.T) { + tests := map[string]struct { + machine elf.Machine + code []byte + offset int32 + }{ + // rb_current_ec_noinline for statically-linked ruby 4.0 on x86_64: + // mov %fs:0xffffffffffffff88,%rax + // ret + "ruby 4.0 static / x86_64": { + machine: elf.EM_X86_64, + code: []byte{ + 0x64, 0x48, 0x8b, 0x04, 0x25, 0x88, 0xff, 0xff, 0xff, + 0xc3, + }, + offset: -120, + }, + // rb_current_ec_noinline for statically-linked ruby 3.4.7 on x86_64: + // mov %fs:0xfffffffffffffff8,%rax + // ret + "ruby 3.4.7 static / x86_64": { + machine: elf.EM_X86_64, + code: []byte{ + 0x64, 0x48, 0x8b, 0x04, 0x25, 0xf8, 0xff, 0xff, 0xff, + 0xc3, + }, + offset: -8, + }, + // rb_current_ec_noinline for statically-linked ruby 3.4.7 on aarch64: + // mrs x0, tpidr_el0 + // add x0, x0, #0x0, lsl #12 + // add x0, x0, #0x38 + // ldr x0, [x0] + // ret + "ruby 3.4.7 static / aarch64": { + machine: elf.EM_AARCH64, + code: []byte{ + 0x40, 0xd0, 0x3b, 0xd5, // mrs x0, tpidr_el0 + 0x00, 0x00, 0x40, 0x91, // add x0, x0, #0x0, lsl #12 + 0x00, 0xe0, 0x00, 0x91, // add x0, x0, #0x38 + 0x00, 0x00, 0x40, 0xf9, // ldr x0, [x0] + 0xc0, 0x03, 0x5f, 0xd6, // ret + }, + offset: 56, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + var offset int32 + var err error + switch tc.machine { + case elf.EM_X86_64: + offset, err = amd.ExtractTLSOffset(tc.code, 0, nil) + case elf.EM_AARCH64: + offset, err = arm.ExtractTLSOffset(tc.code, 0, nil) + } + require.NoError(t, err) + assert.Equal(t, tc.offset, offset, "wrong ruby EC TLS offset") + }) + } +} diff --git a/interpreter/ruby/ruby.go b/interpreter/ruby/ruby.go index 22af461f8..ba885d2b0 100644 --- a/interpreter/ruby/ruby.go +++ b/interpreter/ruby/ruby.go @@ -96,8 +96,10 @@ const ( ) var ( - // regex to identify the Ruby interpreter executable - rubyRegex = regexp.MustCompile(`^(?:.*/)?libruby(?:-.*)?\.so\.(\d+)\.(\d+)\.(\d+)$`) + // regex to identify the Ruby interpreter shared library + libRubyRegex = regexp.MustCompile(`^(?:.*/)?libruby(?:-.*)?\.so\.(\d+)\.(\d+)\.(\d+)$`) + // regex to identify a statically-linked Ruby binary + binRubyRegex = regexp.MustCompile(`^(?:.*/)?(?:bin/)?ruby$`) // regex to extract a version from a string rubyVersionRegex = regexp.MustCompile(`^(\d+)\.(\d+)\.(\d+)$`) @@ -123,6 +125,10 @@ type rubyData struct { // Address to the ruby_current_ec variable in TLS, as an offset from tpbase currentEcTpBaseTlsOffset libpf.Address + // For statically-linked ruby, the direct TP-relative offset to ruby_current_ec + // extracted from disassembly of rb_current_ec_noinline + staticTLSOffset int64 + // Address to global symbols, for id to string mappings globalSymbolsAddr libpf.Address // version of the currently used Ruby interpreter. @@ -295,10 +301,14 @@ func (r *rubyData) String() string { func (r *rubyData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf.Address, rm remotememory.RemoteMemory, ) (interpreter.Instance, error) { - var tlsOffset uint64 - if r.currentEcTpBaseTlsOffset != 0 { + var tlsOffset int64 + if r.staticTLSOffset != 0 { + // For statically-linked ruby, use the direct TP-relative offset + // extracted from disassembly of rb_current_ec_noinline. + tlsOffset = r.staticTLSOffset + } else if r.currentEcTpBaseTlsOffset != 0 { // Read TLS offset from the TLS descriptor. - tlsOffset = rm.Uint64(bias + r.currentEcTpBaseTlsOffset + 8) + tlsOffset = int64(rm.Uint64(bias + r.currentEcTpBaseTlsOffset + 8)) } cdata := support.RubyProcInfo{ @@ -1238,7 +1248,8 @@ func determineRubyVersion(ef *pfelf.File) (uint32, error) { } func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { - if !rubyRegex.MatchString(info.FileName()) { + isBinRuby := binRubyRegex.MatchString(info.FileName()) + if !libRubyRegex.MatchString(info.FileName()) && !isBinRuby { return nil, nil } @@ -1363,11 +1374,26 @@ func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpr log.Warnf("failed to locate TLS descriptor: %v", err) } - log.Debugf("Discovered EC tls tpbase offset %x, fallback ctx %x, interp ranges: %v, global symbols: %x", currentEcTpBaseTlsOffset, currentCtxPtr, interpRanges, globalSymbols) + // For statically-linked ruby, extract the direct TP-relative offset from + // rb_current_ec_noinline disassembly. This is the same pattern Python 3.13+ + // uses for _PyThreadState_GetCurrent. + var staticTLSOffset int64 + if isBinRuby { + offset, ecErr := extractEcTLSOffset(ef) + if ecErr != nil { + log.Warnf("failed to extract EC TLS offset for static ruby: %v", ecErr) + } else { + staticTLSOffset = offset + } + } + + log.Debugf("Discovered EC tls tpbase offset %x, static tls offset %d, fallback ctx %x, interp ranges: %v, global symbols: %x", + currentEcTpBaseTlsOffset, staticTLSOffset, currentCtxPtr, interpRanges, globalSymbols) rid := &rubyData{ version: version, currentEcTpBaseTlsOffset: libpf.Address(currentEcTpBaseTlsOffset), + staticTLSOffset: staticTLSOffset, currentCtxPtr: libpf.Address(currentCtxPtr), hasGlobalSymbols: globalSymbols != 0, globalSymbolsAddr: libpf.Address(globalSymbols), diff --git a/interpreter/ruby/ruby_test.go b/interpreter/ruby/ruby_test.go index 3e89c89e9..e3328b68b 100644 --- a/interpreter/ruby/ruby_test.go +++ b/interpreter/ruby/ruby_test.go @@ -62,7 +62,7 @@ func TestRubyRegex(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - matches := rubyRegex.FindStringSubmatch(tt.input) + matches := libRubyRegex.FindStringSubmatch(tt.input) if !tt.match { assert.Nil(t, matches) return diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index 2c7fea548..3bb922419 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index 8d13ca789..c7b02f6c1 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/support/ebpf/types.h b/support/ebpf/types.h index 1168a761a..8903cb00e 100644 --- a/support/ebpf/types.h +++ b/support/ebpf/types.h @@ -468,8 +468,9 @@ typedef struct RubyProcInfo { // version of the Ruby interpreter. u32 version; - // tls_offset holds TLS base + ruby_current_ec tls symbol, as an offset from tpbase - u64 current_ec_tpbase_tls_offset; + // tls_offset holds TLS base + ruby_current_ec tls symbol, as an offset from tpbase. + // Signed because static TLS offsets (local exec model) are negative on x86_64. + s64 current_ec_tpbase_tls_offset; // current_ctx_ptr holds the address of the symbol ruby_current_execution_context_ptr. u64 current_ctx_ptr; diff --git a/support/types.go b/support/types.go index 9c4e115fe..e68df34ac 100644 --- a/support/types.go +++ b/support/types.go @@ -284,7 +284,7 @@ type PyProcInfo struct { } type RubyProcInfo struct { Version uint32 - Current_ec_tpbase_tls_offset uint64 + Current_ec_tpbase_tls_offset int64 Current_ctx_ptr uint64 Has_objspace bool Vm_stack uint8 diff --git a/tools/coredump/testdata/amd64/ruby-3.4.7-static-loop.json b/tools/coredump/testdata/amd64/ruby-3.4.7-static-loop.json new file mode 100644 index 000000000..52d3d0634 --- /dev/null +++ b/tools/coredump/testdata/amd64/ruby-3.4.7-static-loop.json @@ -0,0 +1,89 @@ +{ + "coredump-ref": "17a69422179148059ca9465a0cbc351a03da993c93cc9cebbd2fe0e4a661c132", + "threads": [ + { + "lwp": 18596, + "frames": [ + "Object#is_prime+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:14", + "ruby+0x203060", + "ruby+0x207e97", + "ruby+0x11a9cd", + "ruby+0x1f380b", + "ruby+0x1fa1d0", + "ruby+0x214344", + "Range#each+0 in :0", + "Object#is_prime+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:14", + "Object#sum_of_primes+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:24", + "block (2 levels) in
+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:34", + "ruby+0x203231", + "ruby+0x207e97", + "ruby+0x11a8bd", + "ruby+0x1f24f4", + "ruby+0x1fa1d0", + "ruby+0x214344", + "Range#each+0 in :0", + "block in
+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:33", + "Kernel#loop+0 in :168", + "
+0 in /home/dalehamel/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:32", + "ruby+0x2031b4", + "ruby+0x406f8", + "ruby+0x4263c", + "ruby+0x3d171", + "libc.so.6+0x27249", + "libc.so.6+0x27304", + "ruby+0x3d1b0" + ] + }, + { + "lwp": 18598, + "frames": [ + "libc.so.6+0x108f26", + "ruby+0x1b414e", + "libc.so.6+0x891f4", + "libc.so.6+0x1098db" + ] + } + ], + "modules": [ + { + "ref": "6363d41d2a3a7e65b6f44a2fa55234b3bd8ad1497d1b6c5892635b4cbacbaa24", + "local-path": "/home/dalehamel/.rubies/ruby-3.4.7/lib/ruby/3.4.0/x86_64-linux/enc/trans/transdb.so" + }, + { + "ref": "7f2ca87f652f56b094462474b076749e90e689d0ecb9cb63c7679820b271b4e7", + "local-path": "/usr/lib/x86_64-linux-gnu/libm.so.6" + }, + { + "ref": "5db18e8a8894ef4746eb8230855b638a5e52e782b2f10deede5f1dad846178bb", + "local-path": "/usr/lib/x86_64-linux-gnu/libcrypt.so.1.1.0" + }, + { + "ref": "7376c9af0afd6e7698a64ee19de3c8a0199418664974384c70435a51c7ff7f3f", + "local-path": "/usr/lib/x86_64-linux-gnu/libgmp.so.10.4.1" + }, + { + "ref": "7e2a72b4c4b38c61e6962de6e3f4a5e9ae692e732c68deead10a7ce2135a7f68", + "local-path": "/usr/lib/x86_64-linux-gnu/libz.so.1.2.13" + }, + { + "ref": "3159c3955f3aa3d599518107297a01b1548518c8e111a2dd32a6b1410d67a723", + "local-path": "/home/dalehamel/.rubies/ruby-3.4.7/lib/ruby/3.4.0/x86_64-linux/enc/encdb.so" + }, + { + "ref": "ff54e20c46ea00625e912916143f1ab0973426d153d343a766d650a323fb6deb", + "local-path": "/home/dalehamel/.rubies/ruby-3.4.7/bin/ruby" + }, + { + "ref": "0e9275bc9b81736220d63e9876de3050dfcae20e8b29beb46d3d54d1e4d8625b", + "local-path": "/home/dalehamel/.rubies/ruby-3.4.7/lib/ruby/3.4.0/x86_64-linux/monitor.so" + }, + { + "ref": "bff8750fe719e6000791b88b11747dce8772c37118d0b2348044b70819d13835", + "local-path": "/usr/lib/x86_64-linux-gnu/libc.so.6" + }, + { + "ref": "593bb1d5355658e645f36e6b1f49832691b24e177209765914e4cce51499dbb4", + "local-path": "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + } + ] +} diff --git a/tools/coredump/testdata/arm64/ruby-3.4.7-static-loop.json b/tools/coredump/testdata/arm64/ruby-3.4.7-static-loop.json new file mode 100644 index 000000000..44d143235 --- /dev/null +++ b/tools/coredump/testdata/arm64/ruby-3.4.7-static-loop.json @@ -0,0 +1,89 @@ +{ + "coredump-ref": "fcefaded8c9fdf2d69eb6edfa8fa7caa58a70b2b5875923a0ed8effb691727cc", + "threads": [ + { + "lwp": 1384196, + "frames": [ + "Object#is_prime+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:14", + "ruby+0x2021d0", + "ruby+0x206b6f", + "ruby+0x1198c3", + "ruby+0x1f6acb", + "ruby+0x1fa6c3", + "ruby+0x213907", + "Range#each+0 in :0", + "Object#is_prime+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:14", + "Object#sum_of_primes+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:24", + "block (2 levels) in
+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:34", + "ruby+0x20239f", + "ruby+0x206b6f", + "ruby+0x11982b", + "ruby+0x1f493b", + "ruby+0x1fa6c3", + "ruby+0x213907", + "Range#each+0 in :0", + "block in
+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:33", + "Kernel#loop+0 in :168", + "
+0 in /Users/dalehamel/src/github.com/open-telemetry/opentelemetry-ebpf-profiler/tools/coredump/testsources/ruby/loop.rb:32", + "ruby+0x20230b", + "ruby+0x3cdef", + "ruby+0x3f0c3", + "ruby+0x399cf", + "libc.so.6+0x284c3", + "libc.so.6+0x28597", + "ruby+0x39a6f" + ] + }, + { + "lwp": 1384198, + "frames": [ + "libc.so.6+0xebe34", + "ruby+0x1af363", + "libc.so.6+0x8595b", + "libc.so.6+0xebb0b" + ] + } + ], + "modules": [ + { + "ref": "a896f19dbc725967448640f79f14b7a7793da4b99c62e441b64775a1bb00f31b", + "local-path": "/opt/rubies/ruby-3.4.7/bin/ruby" + }, + { + "ref": "fd8ac7acb6b107b40b31f1fdded22ef4543c60ff9195e847fd27587f032ed5f7", + "local-path": "/opt/rubies/ruby-3.4.7/lib/ruby/3.4.0/aarch64-linux/monitor.so" + }, + { + "ref": "ef7faa5b266a3c40c1e40be004a5d6f27e16cf2787ef43cc2070d1feb2eee1ad", + "local-path": "/opt/rubies/ruby-3.4.7/lib/ruby/3.4.0/aarch64-linux/enc/encdb.so" + }, + { + "ref": "2fd7c8f96a208532befb77cec1cfee8a08217a97b77d8eed382537fa6cc6e72c", + "local-path": "/usr/lib/aarch64-linux-gnu/libc.so.6" + }, + { + "ref": "b41cebf0be70f869bf60228cb5761f875ced865b0f4016f544d82f7a9ded28b0", + "local-path": "/usr/lib/aarch64-linux-gnu/libcrypt.so.1.1.0" + }, + { + "ref": "f572bd122e2b3b14d2f9d8d4345e1351fd9fbf26f9402e3e42ef36b65f833411", + "local-path": "/usr/lib/aarch64-linux-gnu/ld-linux-aarch64.so.1" + }, + { + "ref": "4510e7c51844a966dce56e46504aea552b5b27a48eeb9c7b45eb3ac93274e771", + "local-path": "/opt/rubies/ruby-3.4.7/lib/ruby/3.4.0/aarch64-linux/enc/trans/transdb.so" + }, + { + "ref": "ac92dc80c7d902e3852986e858ba682a981c15a12ec0e163bd4d56dde9b3dd6e", + "local-path": "/usr/lib/aarch64-linux-gnu/libm.so.6" + }, + { + "ref": "39b6701812ed7135f28df49352b3f6664c7a9f56880a3fe50c1b87cd7681db9b", + "local-path": "/usr/lib/aarch64-linux-gnu/libgmp.so.10.5.0" + }, + { + "ref": "170380b4e7ab28ec86eb090b48df90f84089392cb72fecd5067e5b7a4dc5239f", + "local-path": "/usr/lib/aarch64-linux-gnu/libz.so.1.3" + } + ] +} diff --git a/tools/coredump/testsources/ruby/README-GC.md b/tools/coredump/testsources/ruby/README-GC.md index b25bcee50..e555bd99c 100644 --- a/tools/coredump/testsources/ruby/README-GC.md +++ b/tools/coredump/testsources/ruby/README-GC.md @@ -58,5 +58,15 @@ We should now have a coredump that is being taken during GC # Coredump without GC -Add `GC.disable` to the ruby script before running it to ensure GC won't be -running when the coredump is taken +Set the `RUBY_DISABLE_GC` environment variable before running the script: + +``` +RUBY_DISABLE_GC=1 ruby tools/coredump/testsources/ruby/loop.rb +``` + +The `loop.rb` script checks for this variable and calls `GC.disable` at startup. + +**Important:** When GC is disabled, Ruby's heap grows without bound. Capture the +coredump quickly (within a few seconds of starting the process) to avoid bloating +the coredump file size. A coredump taken after 2 seconds is typically ~30-40MB, +while one taken after 30+ seconds can exceed 100MB. diff --git a/tools/coredump/testsources/ruby/loop.rb b/tools/coredump/testsources/ruby/loop.rb index f10cce0d9..0aed28676 100644 --- a/tools/coredump/testsources/ruby/loop.rb +++ b/tools/coredump/testsources/ruby/loop.rb @@ -1,5 +1,9 @@ #!/usr/bin/env ruby +# Set RUBY_DISABLE_GC=1 to disable garbage collection. +# This avoids capturing GC frames in coredumps. +GC.disable if ENV["RUBY_DISABLE_GC"] + def is_prime(n) if n < 2 return false