diff --git a/tools/opensnoop.py b/tools/opensnoop.py index 393bcd0fc3d0..4c6f38d4a5b2 100755 --- a/tools/opensnoop.py +++ b/tools/opensnoop.py @@ -104,15 +104,11 @@ #ifdef FULLPATH #include #include +#endif +#define NAME_MAX 255 #define MAX_ENTRIES 32 -enum event_type { - EVENT_ENTRY, - EVENT_END, -}; -#endif - struct val_t { u64 id; char comm[TASK_COMM_LEN]; @@ -127,15 +123,26 @@ u32 uid; int ret; char comm[TASK_COMM_LEN]; + u32 path_depth; #ifdef FULLPATH - enum event_type type; -#endif + /** + * Example: "/CCCCC/BB/AAAA" + * name[]: "AAAA000000000000BB0000000000CCCCC00000000000" + * |<- NAME_MAX ->| + * + * name[] must be u8, because char [] will be truncated by ctypes.cast(), + * such as above example, will be truncated to "AAAA0". + */ + u8 name[NAME_MAX * MAX_ENTRIES]; +#else + /* If not fullpath, avoid transfer big data */ char name[NAME_MAX]; +#endif int flags; // EXTENDED_STRUCT_MEMBER u32 mode; // EXTENDED_STRUCT_MEMBER }; -BPF_PERF_OUTPUT(events); +BPF_RINGBUF_OUTPUT(events, BUFFER_PAGES); """ bpf_text_kprobe = """ @@ -145,7 +152,7 @@ { u64 id = bpf_get_current_pid_tgid(); struct val_t *valp; - struct data_t data = {}; + struct data_t *data; u64 tsp = bpf_ktime_get_ns(); @@ -155,17 +162,23 @@ return 0; } - bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm); - bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)valp->fname); - data.id = valp->id; - data.ts = tsp / 1000; - data.uid = bpf_get_current_uid_gid(); - data.flags = valp->flags; // EXTENDED_STRUCT_MEMBER - data.mode = valp->mode; // EXTENDED_STRUCT_MEMBER - data.ret = PT_REGS_RC(ctx); + data = events.ringbuf_reserve(sizeof(struct data_t)); + if (!data) + goto cleanup; + + bpf_probe_read_kernel(&data->comm, sizeof(data->comm), valp->comm); + data->path_depth = 0; + bpf_probe_read_user_str(&data->name, sizeof(data->name), (void *)valp->fname); + data->id = valp->id; + data->ts = tsp / 1000; + data->uid = bpf_get_current_uid_gid(); + data->flags = valp->flags; // EXTENDED_STRUCT_MEMBER + data->mode = valp->mode; // EXTENDED_STRUCT_MEMBER + data->ret = PT_REGS_RC(ctx); SUBMIT_DATA +cleanup: infotmp.delete(&id); return 0; @@ -297,6 +310,11 @@ u32 pid = id >> 32; // PID is higher part u32 tid = id; // Cast and get the lower part u32 uid = bpf_get_current_uid_gid(); + struct data_t *data; + + data = events.ringbuf_reserve(sizeof(struct data_t)); + if (!data) + return 0; PID_TID_FILTER UID_FILTER @@ -305,18 +323,18 @@ return 0; } - struct data_t data = {}; - bpf_get_current_comm(&data.comm, sizeof(data.comm)); + bpf_get_current_comm(&data->comm, sizeof(data->comm)); u64 tsp = bpf_ktime_get_ns(); - bpf_probe_read_user_str(&data.name, sizeof(data.name), (void *)filename); - data.id = id; - data.ts = tsp / 1000; - data.uid = bpf_get_current_uid_gid(); - data.flags = flags; // EXTENDED_STRUCT_MEMBER - data.mode = mode; // EXTENDED_STRUCT_MEMBER - data.ret = ret; + data->path_depth = 0; + bpf_probe_read_user_str(&data->name, sizeof(data->name), (void *)filename); + data->id = id; + data->ts = tsp / 1000; + data->uid = bpf_get_current_uid_gid(); + data->flags = flags; // EXTENDED_STRUCT_MEMBER + data->mode = mode; // EXTENDED_STRUCT_MEMBER + data->ret = ret; SUBMIT_DATA @@ -372,6 +390,10 @@ 'if (uid != %s) { return 0; }' % args.uid) else: bpf_text = bpf_text.replace('UID_FILTER', '') +if args.buffer_pages: + bpf_text = bpf_text.replace('BUFFER_PAGES', '%s' % args.buffer_pages) +else: + bpf_text = bpf_text.replace('BUFFER_PAGES', '%d' % 64) bpf_text = filter_by_containers(args) + bpf_text if args.flag_filter: bpf_text = bpf_text.replace('FLAGS_FILTER', @@ -384,36 +406,39 @@ if args.full_path: bpf_text = bpf_text.replace('SUBMIT_DATA', """ - data.type = EVENT_ENTRY; - events.perf_submit(ctx, &data, sizeof(data)); - - if (data.name[0] != '/') { // relative path + if (data->name[0] != '/') { // relative path struct task_struct *task; struct dentry *dentry; + size_t filepart_length; + char *payload = data->name; int i; task = (struct task_struct *)bpf_get_current_task_btf(); dentry = task->fs->pwd.dentry; - for (i = 1; i < MAX_ENTRIES; i++) { - bpf_probe_read_kernel(&data.name, sizeof(data.name), (void *)dentry->d_name.name); - data.type = EVENT_ENTRY; - events.perf_submit(ctx, &data, sizeof(data)); + for (i = 1, payload += NAME_MAX; i < MAX_ENTRIES; i++) { + filepart_length = + bpf_probe_read_kernel(payload, NAME_MAX, (void *)dentry->d_name.name); + + if (filepart_length < 0 || filepart_length > NAME_MAX) + break; if (dentry == dentry->d_parent) { // root directory break; } + payload += NAME_MAX; + dentry = dentry->d_parent; + data->path_depth++; } } - data.type = EVENT_END; - events.perf_submit(ctx, &data, sizeof(data)); + events.ringbuf_submit(data, sizeof(*data)); """) else: bpf_text = bpf_text.replace('SUBMIT_DATA', """ - events.perf_submit(ctx, &data, sizeof(data)); + events.ringbuf_submit(data, sizeof(*data)); """) if debug or args.ebpf: @@ -447,78 +472,75 @@ print("%-8s %-4s " % ("FLAGS", "MODE"), end="") print("PATH") -class EventType(object): - EVENT_ENTRY = 0 - EVENT_END = 1 - entries = defaultdict(list) +def split_names(str): + NAME_MAX = 255 + MAX_ENTRIES = 32 + chunks = [str[i:i + NAME_MAX] for i in range(0, NAME_MAX * MAX_ENTRIES, NAME_MAX)] + return [chunk.split(b'\x00', 1)[0] for chunk in chunks] + # process event def print_event(cpu, data, size): event = b["events"].event(data) global initial_ts - if not args.full_path or event.type == EventType.EVENT_END: - skip = False + skip = False - # split return value into FD and errno columns - if event.ret >= 0: - fd_s = event.ret - err = 0 - else: - fd_s = -1 - err = - event.ret + # split return value into FD and errno columns + if event.ret >= 0: + fd_s = event.ret + err = 0 + else: + fd_s = -1 + err = - event.ret - if not initial_ts: - initial_ts = event.ts + if not initial_ts: + initial_ts = event.ts - if args.failed and (event.ret >= 0): - skip = True + if args.failed and (event.ret >= 0): + skip = True - if args.name and bytes(args.name) not in event.comm: - skip = True + if args.name and bytes(args.name) not in event.comm: + skip = True - if not skip: - if args.timestamp: - delta = event.ts - initial_ts - printb(b"%-14.9f" % (float(delta) / 1000000), nl="") + if not skip: + if args.timestamp: + delta = event.ts - initial_ts + printb(b"%-14.9f" % (float(delta) / 1000000), nl="") - if args.print_uid: - printb(b"%-6d" % event.uid, nl="") + if args.print_uid: + printb(b"%-6d" % event.uid, nl="") - printb(b"%-6d %-16s %4d %3d " % - (event.id & 0xffffffff if args.tid else event.id >> 32, - event.comm, fd_s, err), nl="") + printb(b"%-6d %-16s %4d %3d " % + (event.id & 0xffffffff if args.tid else event.id >> 32, + event.comm, fd_s, err), nl="") - if args.extended_fields: - # If neither O_CREAT nor O_TMPFILE is specified in flags, then - # mode is ignored, see open(2). - if event.mode == 0 and event.flags & os.O_CREAT == 0 and \ - (event.flags & os.O_TMPFILE) != os.O_TMPFILE: - printb(b"%08o n/a " % event.flags, nl="") - else: - printb(b"%08o %04o " % (event.flags, event.mode), nl="") - - if not args.full_path: - printb(b"%s" % event.name) + if args.extended_fields: + # If neither O_CREAT nor O_TMPFILE is specified in flags, then + # mode is ignored, see open(2). + if event.mode == 0 and event.flags & os.O_CREAT == 0 and \ + (event.flags & os.O_TMPFILE) != os.O_TMPFILE: + printb(b"%08o n/a " % event.flags, nl="") else: - paths = entries[event.id] - paths.reverse() - printb(b"%s" % os.path.join(*paths)) + printb(b"%08o %04o " % (event.flags, event.mode), nl="") if args.full_path: - try: - del(entries[event.id]) - except Exception: - pass - elif event.type == EventType.EVENT_ENTRY: - entries[event.id].append(event.name) + # see struct data_t::name field comment. + names = split_names(bytes(event.name)) + picked = names[:event.path_depth + 1] + picked_str = [x.decode('utf-8', 'ignore') if isinstance(x, bytes) else str(x) for x in picked] + joined = '/'.join(picked_str[::-1]) + result = joined if joined.startswith('/') else '/' + joined + printb(b"%s" % result.encode("utf-8")) + else: + printb(b"%s" % event.name) # loop with callback to print_event -b["events"].open_perf_buffer(print_event, page_cnt=args.buffer_pages) +b["events"].open_ring_buffer(print_event) start_time = datetime.now() while not args.duration or datetime.now() - start_time < args.duration: try: - b.perf_buffer_poll() + b.ring_buffer_poll() except KeyboardInterrupt: exit()