-
Notifications
You must be signed in to change notification settings - Fork 399
Reusable amd64 interpreter for extraction of values from disassembly #447
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
fabled
merged 39 commits into
open-telemetry:main
from
grafana:reusable-amd-interpreter
Jul 11, 2025
Merged
Changes from all commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
1815c7a
reusable amd64 interpreter
korniltsev 3903ab0
update asm/variable changes
korniltsev bef475a
inline size* consts, rename sz to bits
korniltsev d154ed7
Apply suggestions from code review
korniltsev 316680d
fix build
korniltsev 3697495
rename U64 to Expression. Rename Eval to Match.
korniltsev 29361e7
rename Match argument to `pattern`
korniltsev db73b51
lint
korniltsev 6b2503b
update RIP for "skippable instructions"
korniltsev 530971f
rewrite interpreter with switch/case isntead of if/else
korniltsev 3f4a39c
lint
korniltsev 32cce7d
add mem test
korniltsev f7a0cfc
Update asm/amd/regs_state.go
korniltsev b829b15
add a comment
korniltsev a42eb0b
renames
korniltsev 3362ed1
remove RegsState.DebugString()
korniltsev 2816cbf
remove IsJump
korniltsev cdc7473
Apply suggestions from code review
korniltsev 4932145
fix build
korniltsev e57374e
review fixes
korniltsev 93d5870
Merge remote-tracking branch 'upstream/main' into reusable-amd-interp…
korniltsev a0f52cc
lint
korniltsev dc208b8
review fixes
korniltsev 456ee85
Merge remote-tracking branch 'upstream/main' into reusable-amd-interp…
korniltsev e4e2771
introduce module specific registers
korniltsev f1d3bad
split variable into variable and capure
korniltsev f9c7087
move interpreter code to a separate file
korniltsev 433c9c8
rename decode.go to wrapper_decode.go
korniltsev 8ca045d
cleanup ops Match
korniltsev d728d8e
update comment
korniltsev 96de7ff
remove operands push/pop operations
korniltsev 3b939cd
add more disassembly to tests
korniltsev aecbafe
fix operands order
korniltsev 1528ec1
remove equalsRecursive
korniltsev 229f391
Update asm/expression/named.go
korniltsev 54108ed
review fixes
korniltsev 9589217
review fixes
korniltsev 83ac91e
Apply suggestions from code review
korniltsev 3339941
Merge remote-tracking branch 'upstream/main' into reusable-amd-interp…
korniltsev File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| testdata/fuzz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,179 @@ | ||
| // Copyright The OpenTelemetry Authors | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package amd // import "go.opentelemetry.io/ebpf-profiler/asm/amd" | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "io" | ||
| "math" | ||
|
|
||
| "go.opentelemetry.io/ebpf-profiler/asm/expression" | ||
| "golang.org/x/arch/x86/x86asm" | ||
| ) | ||
|
|
||
| type CodeBlock struct { | ||
| Address expression.Expression | ||
| Code []byte | ||
| } | ||
|
|
||
| type Interpreter struct { | ||
| Regs Registers | ||
| code []byte | ||
| CodeAddress expression.Expression | ||
| pc int | ||
| } | ||
|
|
||
| func NewInterpreter() *Interpreter { | ||
| it := &Interpreter{} | ||
| it.initRegs() | ||
| return it | ||
| } | ||
|
|
||
| func NewInterpreterWithCode(code []byte) *Interpreter { | ||
| it := &Interpreter{code: code, CodeAddress: expression.Named("code address")} | ||
| it.initRegs() | ||
| return it | ||
| } | ||
|
|
||
| func (i *Interpreter) ResetCode(code []byte, address expression.Expression) { | ||
| i.code = code | ||
| i.CodeAddress = address | ||
| i.pc = 0 | ||
| } | ||
|
|
||
| func (i *Interpreter) Loop() (x86asm.Inst, error) { | ||
| return i.LoopWithBreak(func(x86asm.Inst) bool { return false }) | ||
| } | ||
|
|
||
| func (i *Interpreter) LoopWithBreak(breakLoop func(op x86asm.Inst) bool) (x86asm.Inst, error) { | ||
| prev := x86asm.Inst{} | ||
| for { | ||
| op, err := i.Step() | ||
| if err != nil { | ||
| return prev, err | ||
| } | ||
| if breakLoop(op) { | ||
| return op, nil | ||
| } | ||
| prev = op | ||
| } | ||
| } | ||
|
|
||
| func (i *Interpreter) Step() (x86asm.Inst, error) { | ||
| if len(i.code) == 0 { | ||
| return x86asm.Inst{}, io.EOF | ||
| } | ||
| var inst x86asm.Inst | ||
| var err error | ||
| if ok, instLen := DecodeSkippable(i.code); ok { | ||
| inst = x86asm.Inst{Op: x86asm.NOP, Len: instLen} | ||
| } else { | ||
| inst, err = x86asm.Decode(i.code, 64) | ||
| if err != nil { | ||
| return inst, fmt.Errorf("at 0x%x : %v", i.pc, err) | ||
| } | ||
| } | ||
| i.pc += inst.Len | ||
| i.code = i.code[inst.Len:] | ||
| i.Regs.setX86asm(x86asm.RIP, expression.Add(i.CodeAddress, expression.Imm(uint64(i.pc)))) | ||
| switch inst.Op { | ||
| case x86asm.ADD: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| left := i.Regs.getX86asm(dst) | ||
| switch src := inst.Args[1].(type) { | ||
| case x86asm.Imm: | ||
| right := expression.Imm(uint64(src)) | ||
| i.Regs.setX86asm(dst, expression.Add(left, right)) | ||
| case x86asm.Reg: | ||
| right := i.Regs.getX86asm(src) | ||
| i.Regs.setX86asm(dst, expression.Add(left, right)) | ||
| case x86asm.Mem: | ||
| right := i.MemArg(src) | ||
| right = expression.MemWithSegment(src.Segment, right, inst.MemBytes) | ||
| i.Regs.setX86asm(dst, expression.Add(left, right)) | ||
| } | ||
| } | ||
| case x86asm.SHL: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| if src, imm := inst.Args[1].(x86asm.Imm); imm { | ||
| v := expression.Multiply( | ||
| i.Regs.getX86asm(dst), | ||
| expression.Imm(uint64(math.Pow(2, float64(src)))), | ||
| ) | ||
| i.Regs.setX86asm(dst, v) | ||
| } | ||
| } | ||
| case x86asm.MOV, x86asm.MOVZX, x86asm.MOVSXD, x86asm.MOVSX: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| switch src := inst.Args[1].(type) { | ||
| case x86asm.Imm: | ||
| i.Regs.setX86asm(dst, expression.Imm(uint64(src))) | ||
| case x86asm.Reg: | ||
| i.Regs.setX86asm(dst, i.Regs.getX86asm(src)) | ||
| case x86asm.Mem: | ||
| v := i.MemArg(src) | ||
|
|
||
| dataSizeBits := inst.DataSize | ||
|
|
||
| v = expression.MemWithSegment(src.Segment, v, inst.MemBytes) | ||
| if inst.Op == x86asm.MOVSXD || inst.Op == x86asm.MOVSX { | ||
| v = expression.SignExtend(v, dataSizeBits) | ||
| } else { | ||
| v = expression.ZeroExtend(v, dataSizeBits) | ||
| } | ||
| i.Regs.setX86asm(dst, v) | ||
| } | ||
| } | ||
| case x86asm.XOR: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| if src, reg := inst.Args[1].(x86asm.Reg); reg { | ||
| if src == dst { | ||
| i.Regs.setX86asm(dst, expression.Imm(0)) | ||
| } | ||
| } | ||
| } | ||
| case x86asm.AND: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| if src, imm := inst.Args[1].(x86asm.Imm); imm { | ||
| if src == 3 { // todo other cases | ||
| i.Regs.setX86asm(dst, expression.ZeroExtend(i.Regs.getX86asm(dst), 2)) | ||
| } | ||
| } | ||
| } | ||
| case x86asm.LEA: | ||
| if dst, ok := inst.Args[0].(x86asm.Reg); ok { | ||
| if src, mem := inst.Args[1].(x86asm.Mem); mem { | ||
| v := i.MemArg(src) | ||
| i.Regs.setX86asm(dst, v) | ||
| } | ||
| } | ||
| default: | ||
| } | ||
| return inst, nil | ||
| } | ||
|
|
||
| func (i *Interpreter) MemArg(src x86asm.Mem) expression.Expression { | ||
| vs := make([]expression.Expression, 0, 3) | ||
| if src.Disp != 0 { | ||
| vs = append(vs, expression.Imm(uint64(src.Disp))) | ||
| } | ||
| if src.Base != 0 { | ||
| vs = append(vs, i.Regs.getX86asm(src.Base)) | ||
| } | ||
| if src.Index != 0 { | ||
| v := expression.Multiply( | ||
| i.Regs.getX86asm(src.Index), | ||
| expression.Imm(uint64(src.Scale)), | ||
| ) | ||
| vs = append(vs, v) | ||
| } | ||
| v := expression.Add(vs...) | ||
| return v | ||
| } | ||
|
|
||
| func (i *Interpreter) initRegs() { | ||
| for j := 0; j < len(i.Regs.regs); j++ { | ||
| i.Regs.regs[j] = expression.Named(Reg(j).String()) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,171 @@ | ||
| // Copyright The OpenTelemetry Authors | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| package amd | ||
|
|
||
| import ( | ||
| "io" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/assert" | ||
| "github.com/stretchr/testify/require" | ||
| "go.opentelemetry.io/ebpf-profiler/asm/expression" | ||
| ) | ||
|
|
||
| func BenchmarkPythonInterpreter(b *testing.B) { | ||
| for i := 0; i < b.N; i++ { | ||
| testPythonInterpreter(b) | ||
| } | ||
| } | ||
|
|
||
| func TestPythonInterpreter(t *testing.T) { | ||
| testPythonInterpreter(t) | ||
| } | ||
|
|
||
| func testPythonInterpreter(t testing.TB) { | ||
| // 00010000 4D 89 F2 mov r10, r14 | ||
| // 00010003 45 0F B6 36 movzx r14d, byte ptr [r14] | ||
| // 00010007 48 8D 05 2D B3 35 00 lea rax, [rip + 0x35b32d] | ||
| // 0001000E 4C 8B 6C 24 08 mov r13, qword ptr [rsp + 8] | ||
| // 00010013 48 89 C1 mov rcx, rax | ||
| // 00010016 48 89 44 24 10 mov qword ptr [rsp + 0x10], rax | ||
| // 0001001B 45 0F B6 5A 01 movzx r11d, byte ptr [r10 + 1] | ||
| // 00010020 41 0F B6 C6 movzx eax, r14b | ||
| // 00010024 48 8B 04 C1 mov rax, qword ptr [rcx + rax*8] | ||
| // 00010028 FF E0 jmp rax | ||
| code := []byte{ | ||
| 0x4d, 0x89, 0xf2, 0x45, 0x0f, 0xb6, 0x36, 0x48, 0x8d, 0x05, 0x2d, 0xb3, 0x35, | ||
| 0x00, 0x4c, 0x8b, 0x6c, 0x24, 0x08, 0x48, 0x89, 0xc1, 0x48, 0x89, 0x44, 0x24, | ||
| 0x10, 0x45, 0x0f, 0xb6, 0x5a, 0x01, 0x41, 0x0f, 0xb6, 0xc6, 0x48, 0x8b, 0x04, | ||
| 0xc1, 0xff, 0xe0, | ||
| } | ||
| it := NewInterpreterWithCode(code) | ||
| it.CodeAddress = expression.Imm(0x8AF05) | ||
| r14 := it.Regs.Get(R14) | ||
| _, err := it.Loop() | ||
| if err == nil || err != io.EOF { | ||
| t.Fatal(err) | ||
| } | ||
| actual := it.Regs.Get(RAX) | ||
| expected := expression.Mem( | ||
| expression.Add( | ||
| expression.Multiply( | ||
| expression.ZeroExtend8(expression.Mem1(r14)), | ||
| expression.Imm(8), | ||
| ), | ||
| expression.NewImmediateCapture("switch table"), | ||
| ), | ||
| 8, | ||
| ) | ||
| if !actual.Match(expected) { | ||
| t.Fatal() | ||
| } | ||
| } | ||
|
|
||
| func TestRecoverSwitchCase(t *testing.T) { | ||
| blocks := []CodeBlock{ | ||
| { | ||
| Address: expression.Imm(0x3310E3), | ||
| // 003310E3 48 8B 44 24 20 mov rax, qword ptr [rsp + 0x20] | ||
| // 003310E8 48 89 18 mov qword ptr [rax], rbx | ||
| // 003310EB 49 83 C2 02 add r10, 2 | ||
| // 003310EF 44 89 E0 mov eax, r12d | ||
| // 003310F2 83 E0 03 and eax, 3 | ||
| // 003310F5 31 DB xor ebx, ebx | ||
| // 003310F7 41 F6 C4 04 test r12b, 4 | ||
| // 003310FB 4C 89 74 24 10 mov qword ptr [rsp + 0x10], r14 | ||
| // 00331100 74 08 je 0x33110a | ||
| Code: []byte{0x48, 0x8b, 0x44, 0x24, 0x20, 0x48, 0x89, 0x18, 0x49, | ||
| 0x83, 0xc2, 0x02, 0x44, 0x89, 0xe0, 0x83, 0xe0, 0x03, 0x31, 0xdb, | ||
| 0x41, 0xf6, 0xc4, 0x04, 0x4c, 0x89, 0x74, 0x24, 0x10, 0x74, 0x08}, | ||
| }, | ||
| { | ||
| Address: expression.Imm(0x33110a), | ||
| // 0033110A 4D 89 DC mov r12, r11 | ||
| // 0033110D 4D 8D 47 F8 lea r8, [r15 - 8] | ||
| // 00331111 4C 89 7C 24 60 mov qword ptr [rsp + 0x60], r15 | ||
| // 00331116 4D 8B 7F F8 mov r15, qword ptr [r15 - 8] | ||
| // 0033111A 48 8B 0D 87 06 17 01 mov rcx, qword ptr [rip + 0x1170687] | ||
| // 00331121 89 C0 mov eax, eax | ||
| // 00331123 48 8D 15 02 E7 C0 00 lea rdx, [rip + 0xc0e702] | ||
| // 0033112A 48 63 04 82 movsxd rax, dword ptr [rdx + rax*4] | ||
| // 0033112E 48 01 D0 add rax, rdx | ||
| // 00331131 4C 89 D5 mov rbp, r10 | ||
| // 00331134 4D 89 C5 mov r13, r8 | ||
| // 00331137 FF E0 jmp rax | ||
| Code: []byte{ | ||
| 0x4d, 0x89, 0xdc, 0x4d, 0x8d, 0x47, 0xf8, 0x4c, 0x89, 0x7c, 0x24, | ||
| 0x60, 0x4d, 0x8b, 0x7f, 0xf8, 0x48, 0x8b, 0x0d, 0x87, 0x06, 0x17, | ||
| 0x01, 0x89, 0xc0, 0x48, 0x8d, 0x15, 0x02, 0xe7, 0xc0, 0x00, 0x48, | ||
| 0x63, 0x04, 0x82, 0x48, 0x01, 0xd0, 0x4c, 0x89, 0xd5, 0x4d, 0x89, | ||
| 0xc5, 0xff, 0xe0, | ||
| }, | ||
| }, | ||
| } | ||
| it := NewInterpreter() | ||
| initR12 := it.Regs.Get(R12) | ||
| it.ResetCode(blocks[0].Code, blocks[0].Address) | ||
| _, err := it.Loop() | ||
| require.ErrorIs(t, err, io.EOF) | ||
|
|
||
| expected := expression.ZeroExtend(initR12, 2) | ||
| assertEval(t, it.Regs.Get(RAX), expected) | ||
| it.ResetCode(blocks[1].Code, blocks[1].Address) | ||
| _, err = it.Loop() | ||
| require.ErrorIs(t, err, io.EOF) | ||
| table := expression.NewImmediateCapture("table") | ||
| base := expression.NewImmediateCapture("base") | ||
| expected = expression.Add( | ||
| expression.SignExtend( | ||
| expression.Mem( | ||
| expression.Add( | ||
| expression.Multiply( | ||
| expression.ZeroExtend(initR12, 2), | ||
| expression.Imm(4), | ||
| ), | ||
| table, | ||
| ), | ||
| 4, | ||
| ), | ||
| 64, | ||
| ), | ||
| base, | ||
| ) | ||
| assertEval(t, it.Regs.Get(RAX), expected) | ||
| assert.EqualValues(t, 0xf3f82c, table.CapturedValue()) | ||
| assert.EqualValues(t, 0xf3f82c, base.CapturedValue()) | ||
| } | ||
|
|
||
| func assertEval(t *testing.T, left, right expression.Expression) { | ||
| if !left.Match(right) { | ||
| assert.Failf(t, "failed to eval %s to %s", left.DebugString(), right.DebugString()) | ||
| t.Logf("left %s", left.DebugString()) | ||
| t.Logf("right %s", right.DebugString()) | ||
| } | ||
| } | ||
|
|
||
| func FuzzInterpreter(f *testing.F) { | ||
| f.Fuzz(func(_ *testing.T, code []byte) { | ||
| i := NewInterpreterWithCode(code) | ||
| _, _ = i.Loop() | ||
| }) | ||
| } | ||
|
|
||
| func TestMoveSignExtend(t *testing.T) { | ||
| i := NewInterpreterWithCode([]byte{ | ||
| // 00000000 B8 01 00 00 00 mov eax, 1 | ||
| // 00000005 8B 40 04 mov eax, dword ptr [rax + 4] | ||
| // 00000008 B8 02 00 00 00 mov eax, 2 | ||
| // 0000000D 48 0F B6 40 04 movzx rax, byte ptr [rax + 4] | ||
| // 00000012 B8 03 00 00 00 mov eax, 3 | ||
| // 00000017 48 0F BF 40 04 movsx rax, word ptr [rax + 4] | ||
| 0xB8, 0x01, 0x00, 0x00, 0x00, 0x8B, 0x40, 0x04, | ||
| 0xB8, 0x02, 0x00, 0x00, 0x00, 0x48, 0x0F, 0xB6, | ||
| 0x40, 0x04, 0xB8, 0x03, 0x00, 0x00, 0x00, 0x48, | ||
| 0x0F, 0xBF, 0x40, 0x04, | ||
| }) | ||
| _, err := i.Loop() | ||
| require.ErrorIs(t, err, io.EOF) | ||
| pattern := expression.SignExtend(expression.Mem(expression.Imm(7), 2), 64) | ||
| require.True(t, i.Regs.Get(RAX).Match(pattern)) | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
General observation: All switch cases call
i.Regs.setX86asm()if everything works as expected. Should we inform the user, that something unexpected happened, ifi.Regs.setX86asm()is not used in some cases?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are instructions and instruction types that currently do nothing in the interpreter and ignored, for example writing or reading memory, tests, jumps. We should not inform on those. We have tests for stub decodings, they should be enough.