Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(driver/bpf): fixed a couple of verifier issues. #1896

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
300 changes: 111 additions & 189 deletions driver/bpf/fillers.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,236 +571,158 @@ FILLER(sys_poll_x, true)
return bpf_poll_parse_fds(data, false);
}

#define MAX_IOVCNT 32
#define MAX_IOVCNT_COMPAT 8
#ifdef CONFIG_COMPAT
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only real downside of the patch (except for its ugliness probably :D ) is that we will have max iovec count of 8 when config compat is enabled; this is needed because we must have a constant to be able to unroll the loops below. Not a big deal; this is only on the old ebpf probe.

#define MAX_IOVCNT 8
#else
#define MAX_IOVCNT 32
#endif

static __always_inline int bpf_parse_readv_writev_bufs_64(struct filler_data *data,
static __always_inline int bpf_parse_readv_writev_bufs(struct filler_data *data,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be honest I would prefer to revert this code to what we had before this PR https://github.com/falcosecurity/libs/pull/1814/files#diff-a8b6fc07077f8fcef2f22e51392da619a5e68b7c603c62b3ab93ce5b6be22d6b. The new implementation seems pretty easy to break and the only advantage is that we support readv and writev on 32 bits... that IMO is a corner case.

Btw if you feel confident we can also merge it and maybe revert it if we see it doesn't work well...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, i would love to be able to just use memcpy there (the clang builtin of course); but it won't work :/ it would make the code much better...

const void __user *iovsrc,
unsigned long iovcnt,
long retval,
int flags,
unsigned long *size)
int flags)
{
const struct iovec *iov;
int res = PPM_SUCCESS;
unsigned long copylen;
long size = 0;
int j;
unsigned long iov_size = sizeof(struct iovec);
unsigned long len_off = offsetof(struct iovec, iov_len);
unsigned long base_off = offsetof(struct iovec, iov_base);
unsigned long ptr_size = sizeof(void *);

copylen = iovcnt * sizeof(struct iovec);
iov = (const struct iovec *)data->tmp_scratch;
#ifdef CONFIG_COMPAT
if (bpf_in_ia32_syscall())
{
iov_size = sizeof(struct compat_iovec);
len_off = offsetof(struct compat_iovec, iov_len);
base_off = offsetof(struct compat_iovec, iov_base);
ptr_size = 4;
}
#endif

copylen = iovcnt * iov_size;
if (copylen > SCRATCH_SIZE_MAX)
{
return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL;
}

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (copylen)
if (bpf_probe_read_user((void *)iov,
((copylen - 1) & SCRATCH_SIZE_MAX) + 1,
(void *)iovsrc))
if (copylen)
if (bpf_probe_read_user(data->tmp_scratch,
((copylen - 1) & SCRATCH_SIZE_MAX) + 1,
(void *)iovsrc))
#else
if (bpf_probe_read_user((void *)iov,
copylen & SCRATCH_SIZE_MAX,
(void *)iovsrc))
if (bpf_probe_read_user(data->tmp_scratch,
copylen & SCRATCH_SIZE_MAX,
(void *)iovsrc))
#endif
return PPM_FAILURE_INVALID_USER_MEMORY;

#pragma unroll

#pragma unroll
for (j = 0; j < MAX_IOVCNT; ++j) {
if (j == iovcnt)
break;
// BPF seems to require a hard limit to avoid overflows
if (*size == LONG_MAX)
if (size == LONG_MAX)
break;

*size += iov[j].iov_len;
volatile unsigned curr_shift = j * iov_size + len_off;
unsigned long shift_bounded = curr_shift & SCRATCH_SIZE_HALF;
if (curr_shift > SCRATCH_SIZE_HALF)
break;

long curr_len;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know why, it won't make me use memcpy (that is an alias for __builtin_memcpy from clang) here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If i could use memcpy the patch would look much tidier, but that's not the case unfortunately.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh we can't because for __builtin_memcpy size must be a compile time constant.

if (ptr_size == 4)
{
curr_len = *((int *)(data->tmp_scratch + shift_bounded));
}
else
{
curr_len = *((long *)(data->tmp_scratch + shift_bounded));
}
size += curr_len;
}

if ((flags & PRB_FLAG_IS_WRITE) == 0)
if (*size > retval)
*size = retval;
if (size > retval)
size = retval;

if (flags & PRB_FLAG_PUSH_SIZE && res == PPM_SUCCESS) {
res = bpf_push_u32_to_ring(data, (uint32_t)*size);
if (flags & PRB_FLAG_PUSH_SIZE) {
res = bpf_push_u32_to_ring(data, (uint32_t)size);
CHECK_RES(res);
}

if (flags & PRB_FLAG_PUSH_DATA) {
if (*size > 0) {
if (size > 0) {
unsigned long off = _READ(data->state->tail_ctx.curoff);
unsigned long remaining = *size;
int j;
unsigned long remaining = size;

#pragma unroll
#pragma unroll
for (j = 0; j < MAX_IOVCNT; ++j) {
volatile unsigned int to_read;

if (j == iovcnt)
break;

unsigned long off_bounded = off & SCRATCH_SIZE_HALF;
if (off > SCRATCH_SIZE_HALF)
break;

if (iov[j].iov_len <= remaining)
to_read = iov[j].iov_len;
volatile unsigned len_curr_shift = j * iov_size + len_off;
unsigned long len_shift_bounded = len_curr_shift & SCRATCH_SIZE_HALF;
if (len_curr_shift > SCRATCH_SIZE_HALF)
break;

long curr_len;
if (ptr_size == 4)
{
curr_len = *((int *)(data->tmp_scratch + len_shift_bounded));
}
else
{
curr_len = *((long *)(data->tmp_scratch + len_shift_bounded));
}
if (curr_len <= remaining)
to_read = curr_len;
else
to_read = remaining;

if (to_read > SCRATCH_SIZE_HALF)
to_read = SCRATCH_SIZE_HALF;

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (to_read)
if (bpf_probe_read_user(&data->buf[off_bounded],
((to_read - 1) & SCRATCH_SIZE_HALF) + 1,
(void*)iov[j].iov_base))
#else
if (bpf_probe_read_user(&data->buf[off_bounded],
to_read & SCRATCH_SIZE_HALF,
(void*)iov[j].iov_base))
#endif
return PPM_FAILURE_INVALID_USER_MEMORY;

remaining -= to_read;
off += to_read;
}
} else {
*size = 0;
}
return PPM_SUCCESS;
}

return res;
}

#ifdef CONFIG_COMPAT
static __always_inline int bpf_parse_readv_writev_bufs_32(struct filler_data *data,
const void __user *iovsrc,
unsigned long iovcnt,
long retval,
int flags,
unsigned long *size)
{
const struct compat_iovec *compat_iov;
int res = PPM_SUCCESS;
unsigned long copylen;
int j;

copylen = iovcnt * sizeof(struct compat_iovec);
compat_iov = (const struct compat_iovec *)data->tmp_scratch;

if (copylen > SCRATCH_SIZE_MAX)
{
return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL;
}

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (copylen)
if (bpf_probe_read_user((void *)compat_iov,
((copylen - 1) & SCRATCH_SIZE_MAX) + 1,
(void *)iovsrc))
#else
if (bpf_probe_read_user((void *)compat_iov,
copylen & SCRATCH_SIZE_MAX,
(void *)iovsrc))
#endif
return PPM_FAILURE_INVALID_USER_MEMORY;

#pragma unroll
for (j = 0; j < MAX_IOVCNT_COMPAT; ++j) {
if (j == iovcnt)
break;
// BPF seems to require a hard limit to avoid overflows
if (*size == LONG_MAX)
break;

*size += compat_iov[j].iov_len;
}

if ((flags & PRB_FLAG_IS_WRITE) == 0)
if (*size > retval)
*size = retval;

if (flags & PRB_FLAG_PUSH_SIZE && res == PPM_SUCCESS) {
res = bpf_push_u32_to_ring(data, (uint32_t)*size);
CHECK_RES(res);
}

if (flags & PRB_FLAG_PUSH_DATA) {
if (*size > 0) {
unsigned long off = _READ(data->state->tail_ctx.curoff);
unsigned long remaining = *size;
int j;

// The 14 iovec count limit is due to old kernels verifiers
// complaining.
#pragma unroll
for (j = 0; j < MAX_IOVCNT_COMPAT; ++j) {
volatile unsigned int to_read;

if (j == iovcnt)
volatile unsigned base_curr_shift = j * iov_size + base_off;
unsigned long base_shift_bounded = base_curr_shift & SCRATCH_SIZE_HALF;
if (base_curr_shift > SCRATCH_SIZE_HALF)
break;

unsigned long off_bounded = off & SCRATCH_SIZE_HALF;
if (off > SCRATCH_SIZE_HALF)
break;

if (compat_iov[j].iov_len <= remaining)
to_read = compat_iov[j].iov_len;
unsigned long curr_base;
if (ptr_size == 4)
{
curr_base = *((unsigned int *)(data->tmp_scratch + base_shift_bounded));
}
else
to_read = remaining;

if (to_read > SCRATCH_SIZE_HALF)
to_read = SCRATCH_SIZE_HALF;

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (to_read)
if (bpf_probe_read_user(&data->buf[off_bounded],
((to_read - 1) & SCRATCH_SIZE_HALF) + 1,
(void*)compat_iov[j].iov_base))
#else
{
curr_base = *((unsigned long *)(data->tmp_scratch + base_shift_bounded));
}
#ifdef BPF_FORBIDS_ZERO_ACCESS
if (to_read)
if (bpf_probe_read_user(&data->buf[off_bounded],
to_read & SCRATCH_SIZE_HALF,
(void*)compat_iov[j].iov_base))
#endif
((to_read - 1) & SCRATCH_SIZE_HALF) + 1,
(void *)curr_base))
#else
if (bpf_probe_read_user(&data->buf[off_bounded],
to_read & SCRATCH_SIZE_HALF,
(void *)curr_base))
#endif
return PPM_FAILURE_INVALID_USER_MEMORY;

remaining -= to_read;
off += to_read;
}
} else {
*size = 0;
size = 0;
}

return PPM_SUCCESS;
}
return res;
}
#endif

static __always_inline int bpf_parse_readv_writev_bufs(struct filler_data *data,
const void __user *iovsrc,
unsigned long iovcnt,
long retval,
int flags)
{
unsigned long size = 0;
int res = PPM_SUCCESS;
if (!bpf_in_ia32_syscall())
{
res = bpf_parse_readv_writev_bufs_64(data, iovsrc, iovcnt, retval, flags, &size);
}
else
{
#ifdef CONFIG_COMPAT
res = bpf_parse_readv_writev_bufs_32(data, iovsrc, iovcnt, retval, flags, &size);
#endif
}

if(flags & PRB_FLAG_PUSH_DATA && res == PPM_SUCCESS)
{
data->fd = bpf_syscall_get_argument(data, 0);
data->curarg_already_on_frame = true;
return __bpf_val_to_ring(data, 0, size, PT_BYTEBUF, -1, true, KERNEL);
Expand Down Expand Up @@ -4312,10 +4234,12 @@ FILLER(sys_recvfrom_x, true)
unsigned long val;
uint16_t size = 0;
long retval;
int addrlen;
int addrlen = 0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

recvfrom instead looks tidier now imo.

int err = 0;
int res;
int fd;
bool push = true;
bool from_usr = false;

/*
* Push the common params to the ring
Expand All @@ -4324,6 +4248,7 @@ FILLER(sys_recvfrom_x, true)
res = f_sys_recv_x_common(data, retval);
CHECK_RES(res);


if (retval >= 0) {
/*
* Get the fd
Expand All @@ -4350,29 +4275,26 @@ FILLER(sys_recvfrom_x, true)
*/
err = bpf_addr_to_kernel(usrsockaddr, addrlen,
(struct sockaddr *)data->tmp_scratch);
if (err >= 0) {
if (err >= 0)
{
/*
* Convert the fd into socket endpoint information
* Convert the fd into socket endpoint information
*/
size = bpf_fd_to_socktuple(data,
fd,
(struct sockaddr *)data->tmp_scratch,
addrlen,
true,
true,
data->tmp_scratch + sizeof(struct sockaddr_storage));
from_usr = true;
}
} else {
else
{
// Do not send any socket endpoint info.
push = false;
}
}
if (push)
{
/*
* Get socket endpoint information from fd if the user-provided *sockaddr is NULL
*/
size = bpf_fd_to_socktuple(data,
fd,
NULL,
0,
false,
true,
data->tmp_scratch + sizeof(struct sockaddr_storage));
* Get socket endpoint information from fd if the user-provided *sockaddr is NULL
*/
size = bpf_fd_to_socktuple(data, fd, (struct sockaddr *)data->tmp_scratch, addrlen, from_usr,
true, data->tmp_scratch + sizeof(struct sockaddr_storage));
}
}

Expand Down
Loading