Skip to content

Commit

Permalink
tools/syz-trace2syz/proggen: add ParseFile function
Browse files Browse the repository at this point in the history
Current code structuring has 2 problems:

1. parsing anything with proggen requires complex multistep dance including
 - parsing data with parser
 - walking the resulting tree manually and calling proggen on each
 - then for each context
   - calling FillOutMemory (unclear why it's not part of parsing)
   - calling prog.Finalize
   - checking is the program is not too large
All of this duplicated across trace2syz and tests.
And any new tests or fuzzers we will write will need to duplicate
all of this logic too.

2. As the result of this structuring, lots of proggen guts
and implementation details are exposed.
While none of the callers are actually intersted in Context details,
they are not interested in Context itself whatsoever.

What every caller wants is "here is data to parse, give me programs".
Add such function.
  • Loading branch information
dvyukov committed Dec 7, 2018
1 parent 840b5cc commit 8056889
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 98 deletions.
10 changes: 0 additions & 10 deletions tools/syz-trace2syz/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package parser
import (
"bufio"
"bytes"
"io/ioutil"
"strings"

"github.com/google/syzkaller/pkg/log"
Expand Down Expand Up @@ -48,12 +47,3 @@ func ParseLoop(data []byte) *TraceTree {
}
return tree
}

// Parse parses a trace of system calls and returns an intermediate representation
func Parse(filename string) *TraceTree {
data, err := ioutil.ReadFile(filename)
if err != nil {
log.Fatalf("error reading file: %s", err.Error())
}
return ParseLoop(data)
}
10 changes: 5 additions & 5 deletions tools/syz-trace2syz/proggen/call_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ var discriminatorArgs = map[string][]int{
"getsockname": {0},
}

type CallSelector struct {
type callSelector struct {
callCache map[string][]*prog.Syscall
}

func NewCallSelector() *CallSelector {
return &CallSelector{callCache: make(map[string][]*prog.Syscall)}
func newCallSelector() *callSelector {
return &callSelector{callCache: make(map[string][]*prog.Syscall)}
}

// Select returns the best matching descrimination for this syscall.
func (cs *CallSelector) Select(ctx *Context, call *parser.Syscall) *prog.Syscall {
func (cs *callSelector) Select(ctx *Context, call *parser.Syscall) *prog.Syscall {
match := ctx.Target.SyscallMap[call.CallName]
discriminators := discriminatorArgs[call.CallName]
if len(discriminators) == 0 {
Expand All @@ -49,7 +49,7 @@ func (cs *CallSelector) Select(ctx *Context, call *parser.Syscall) *prog.Syscall
}

// callSet returns all syscalls with the given name.
func (cs *CallSelector) callSet(ctx *Context, callName string) []*prog.Syscall {
func (cs *callSelector) callSet(ctx *Context, callName string) []*prog.Syscall {
calls, ok := cs.callCache[callName]
if ok {
return calls
Expand Down
11 changes: 3 additions & 8 deletions tools/syz-trace2syz/proggen/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,17 @@ type Context struct {
CurrentStraceArg parser.IrType
Target *prog.Target
Tracker *memoryTracker
CallSelector *CallSelector
callSelector *callSelector
}

func newContext(target *prog.Target, selector *CallSelector) *Context {
func newContext(target *prog.Target) *Context {
return &Context{
ReturnCache: newRCache(),
Tracker: newTracker(),
Target: target,
CallSelector: selector,
callSelector: newCallSelector(),
Prog: &prog.Prog{
Target: target,
},
}
}

// FillOutMemory assigns addresses to pointer arguments.
func (ctx *Context) FillOutMemory() error {
return ctx.Tracker.fillOutPtrArgs(ctx.Prog)
}
54 changes: 49 additions & 5 deletions tools/syz-trace2syz/proggen/proggen.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,49 @@ package proggen

import (
"encoding/binary"
"io/ioutil"
"math/rand"

"github.com/google/syzkaller/pkg/log"
"github.com/google/syzkaller/prog"
"github.com/google/syzkaller/tools/syz-trace2syz/parser"
)

// GenSyzProg converts a trace to one of our programs.
func GenSyzProg(trace *parser.Trace, target *prog.Target, selector *CallSelector) *Context {
ctx := newContext(target, selector)
func ParseFile(filename string, target *prog.Target) []*prog.Prog {
data, err := ioutil.ReadFile(filename)
if err != nil {
log.Fatalf("error reading file: %v", err)
}
return ParseData(data, target)
}

func ParseData(data []byte, target *prog.Target) []*prog.Prog {
tree := parser.ParseLoop(data)
if tree == nil {
return nil
}
var progs []*prog.Prog
parseTree(tree, tree.RootPid, target, &progs)
return progs
}

// parseTree groups system calls in the trace by process id.
// The tree preserves process hierarchy i.e. parent->[]child
func parseTree(tree *parser.TraceTree, pid int64, target *prog.Target, progs *[]*prog.Prog) {
log.Logf(2, "parsing trace pid %v", pid)
if p := genProg(tree.TraceMap[pid], target); p != nil {
*progs = append(*progs, p)
}
for _, childPid := range tree.Ptree[pid] {
if tree.TraceMap[childPid] != nil {
parseTree(tree, childPid, target, progs)
}
}
}

// genProg converts a trace to one of our programs.
func genProg(trace *parser.Trace, target *prog.Target) *prog.Prog {
ctx := newContext(target)
for _, sCall := range trace.Calls {
if sCall.Paused {
// Probably a case where the call was killed by a signal like the following
Expand All @@ -35,14 +68,25 @@ func GenSyzProg(trace *parser.Trace, target *prog.Target, selector *CallSelector
}
ctx.Prog.Calls = append(ctx.Prog.Calls, call)
}
return ctx
if err := ctx.Tracker.fillOutPtrArgs(ctx.Prog); err != nil {
log.Logf(1, "failed to fill out memory: %v, skipping this prog", err)
return nil
}
if err := ctx.Prog.Finalize(); err != nil {
log.Fatalf("error validating program: %v", err)
}
if _, err := ctx.Prog.SerializeForExec(make([]byte, prog.ExecBufferSize)); err != nil {
log.Logf(1, "prog is too large")
return nil
}
return ctx.Prog
}

func genCall(ctx *Context) *prog.Call {
log.Logf(3, "parsing call: %s", ctx.CurrentStraceCall.CallName)
straceCall := ctx.CurrentStraceCall
ctx.CurrentSyzCall = new(prog.Call)
ctx.CurrentSyzCall.Meta = ctx.CallSelector.Select(ctx, straceCall)
ctx.CurrentSyzCall.Meta = ctx.callSelector.Select(ctx, straceCall)
syzCall := ctx.CurrentSyzCall
if ctx.CurrentSyzCall.Meta == nil {
log.Logf(2, "skipping call: %s which has no matching description", ctx.CurrentStraceCall.CallName)
Expand Down
31 changes: 14 additions & 17 deletions tools/syz-trace2syz/proggen/proggen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,20 @@ func initializeTarget(os, arch string) *prog.Target {
return target
}

func parseSingleTrace(t *testing.T, data string) *Context {
func parseSingleTrace(t *testing.T, data string) *prog.Prog {
target := initializeTarget(OS, Arch)
selector := NewCallSelector()
traceTree := parser.ParseLoop([]byte(data))
ctx := GenSyzProg(traceTree.TraceMap[traceTree.RootPid], target, selector)
ctx.FillOutMemory()
if err := ctx.Prog.Finalize(); err != nil {
t.Fatalf("failed to parse trace: %s", err.Error())
p := genProg(traceTree.TraceMap[traceTree.RootPid], target)
if p == nil {
t.Fatalf("failed to parse trace")
}
return ctx
return p
}

func TestParseTraceBasic(t *testing.T) {
test := `open("file", 66) = 3
write(3, "somedata", 8) = 8`
ctx := parseSingleTrace(t, test)
p := ctx.Prog
p := parseSingleTrace(t, test)
expectedSeq := "open-write"
if p.String() != expectedSeq {
t.Fatalf("expected: %s != %s", expectedSeq, p.String())
Expand All @@ -65,7 +62,7 @@ func TestParseTraceBasic(t *testing.T) {
func TestParseTraceInnerResource(t *testing.T) {
test := `pipe([5,6]) = 0
write(6, "\xff\xff\xfe\xff", 4) = 4`
p := parseSingleTrace(t, test).Prog
p := parseSingleTrace(t, test)
expectedSeq := "pipe-write"
if p.String() != expectedSeq {
t.Fatalf("Expected: %s != %s", expectedSeq, p.String())
Expand All @@ -85,7 +82,7 @@ func TestNegativeResource(t *testing.T) {
test := `socket(29, 3, 1) = 3
getsockopt(-1, 132, 119, 0x200005c0, [14]) = -1 EBADF (Bad file descriptor)`

p := parseSingleTrace(t, test).Prog
p := parseSingleTrace(t, test)
expectedSeq := "socket$can_raw-getsockopt$inet_sctp6_SCTP_RESET_STREAMS"
if p.String() != expectedSeq {
t.Fatalf("expected: %s != %s", expectedSeq, p.String())
Expand All @@ -107,7 +104,7 @@ func TestDistinguishResourceTypes(t *testing.T) {
write(3, "temp", 5) = 5
inotify_rm_watch(2, 3) = 0`
expectedSeq := "inotify_init-open-inotify_add_watch-write-inotify_rm_watch"
p := parseSingleTrace(t, test).Prog
p := parseSingleTrace(t, test)
if p.String() != expectedSeq {
t.Fatalf("Expected: %s != %s", expectedSeq, p.String())
}
Expand Down Expand Up @@ -139,7 +136,7 @@ func TestSocketLevel(t *testing.T) {
socket(1, 1 | 524288, 0) = 3
socket(1, 1 | 524288, 0) = 3`
expectedSeq := "socket$unix-socket$unix-socket$unix-socket$unix"
p := parseSingleTrace(t, test).Prog
p := parseSingleTrace(t, test)
if p.String() != expectedSeq {
t.Fatalf("Expected: %s != %s", expectedSeq, p.String())
}
Expand Down Expand Up @@ -194,7 +191,7 @@ func TestIdentifySockaddrStorage(t *testing.T) {
}

for i, test := range tests {
p := parseSingleTrace(t, test.test).Prog
p := parseSingleTrace(t, test.test)
if p.String() != test.expectedSeq {
t.Fatalf("failed btest: %d, expected: %s != %s", i, test.expectedSeq, p.String())
}
Expand All @@ -219,7 +216,7 @@ func TestIdentifyIfru(t *testing.T) {
}

for i, test := range tests {
p := parseSingleTrace(t, test.test).Prog
p := parseSingleTrace(t, test.test)
if p.String() != test.expectedSeq {
t.Fatalf("failed subtest: %d, expected %s != %s", i, test.expectedSeq, p.String())
}
Expand Down Expand Up @@ -272,7 +269,7 @@ func TestParseVariants(t *testing.T) {
}

for i, test := range tests {
p := parseSingleTrace(t, test.test).Prog
p := parseSingleTrace(t, test.test)
if p.String() != test.expectedSeq {
t.Fatalf("failed subtest: %d, expected %s != %s", i, test.expectedSeq, p.String())
}
Expand Down Expand Up @@ -321,7 +318,7 @@ func TestParseIPv4(t *testing.T) {
}
}
for i, test := range tests {
p := parseSingleTrace(t, test.test).Prog
p := parseSingleTrace(t, test.test)
if p.String() != test.expectedSeq {
t.Fatalf("failed subtest: %d, expected %s != %s", i, test.expectedSeq, p.String())
}
Expand Down
61 changes: 8 additions & 53 deletions tools/syz-trace2syz/trace2syz.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@ import (
"github.com/google/syzkaller/pkg/osutil"
"github.com/google/syzkaller/prog"
_ "github.com/google/syzkaller/sys"
"github.com/google/syzkaller/tools/syz-trace2syz/parser"
"github.com/google/syzkaller/tools/syz-trace2syz/proggen"
)

var (
flagFile = flag.String("file", "", "file to parse")
flagDir = flag.String("dir", "", "directory to parse")
flagDeserialize = flag.String("deserialize", "", "(Optional) directory to store deserialized programs")
callSelector = proggen.NewCallSelector()
)

const (
Expand Down Expand Up @@ -74,47 +72,20 @@ func parseTraces(target *prog.Target) []*prog.Prog {
log.Logf(0, "parsing %v traces", totalFiles)
for i, file := range names {
log.Logf(1, "parsing file %v/%v: %v", i+1, totalFiles, filepath.Base(names[i]))
tree := parser.Parse(file)
if tree == nil {
log.Logf(1, "file: %s is empty", filepath.Base(file))
continue
}
ctxs := parseTree(tree, tree.RootPid, target)
for i, ctx := range ctxs {
ctx.Prog.Target = ctx.Target
if err := ctx.FillOutMemory(); err != nil {
log.Logf(1, "failed to fill out memory: %v, skipping this prog", err)
continue
}
if err := ctx.Prog.Finalize(); err != nil {
log.Fatalf("error validating program: %v", err)
}
if progIsTooLarge(ctx.Prog) {
log.Logf(1, "prog is too large")
continue
}
ret = append(ret, ctx.Prog)
if deserializeDir == "" {
continue
}
progName := filepath.Join(deserializeDir, filepath.Base(file)+strconv.Itoa(i))
if err := osutil.WriteFile(progName, ctx.Prog.Serialize()); err != nil {
log.Fatalf("failed to output file: %v", err)
progs := proggen.ParseFile(file, target)
ret = append(ret, progs...)
if deserializeDir != "" {
for i, p := range progs {
progName := filepath.Join(deserializeDir, filepath.Base(file)+strconv.Itoa(i))
if err := osutil.WriteFile(progName, p.Serialize()); err != nil {
log.Fatalf("failed to output file: %v", err)
}
}
}

}
return ret
}

func progIsTooLarge(p *prog.Prog) bool {
buff := make([]byte, prog.ExecBufferSize)
if _, err := p.SerializeForExec(buff); err != nil {
return true
}
return false
}

func getTraceFiles(dir string) []string {
infos, err := ioutil.ReadDir(dir)
if err != nil {
Expand All @@ -129,22 +100,6 @@ func getTraceFiles(dir string) []string {
return names
}

// parseTree groups system calls in the trace by process id.
// The tree preserves process hierarchy i.e. parent->[]child
func parseTree(tree *parser.TraceTree, pid int64, target *prog.Target) []*proggen.Context {
log.Logf(2, "parsing trace pid %v", pid)
var ctxs []*proggen.Context
ctx := proggen.GenSyzProg(tree.TraceMap[pid], target, callSelector)

ctxs = append(ctxs, ctx)
for _, childPid := range tree.Ptree[pid] {
if tree.TraceMap[childPid] != nil {
ctxs = append(ctxs, parseTree(tree, childPid, target)...)
}
}
return ctxs
}

func pack(progs []*prog.Prog) {
var records []db.Record
for _, prog := range progs {
Expand Down

0 comments on commit 8056889

Please sign in to comment.