Skip to content

Commit 053b212

Browse files
author
Alexei Starovoitov
committed
Merge branch 'fixes-for-bits-iterator'
Hou Tao says: ==================== The patch set fixes several issues in bits iterator. Patch #1 fixes the kmemleak problem of bits iterator. Patch #2~#3 fix the overflow problem of nr_bits. Patch #4 fixes the potential stack corruption when bits iterator is used on 32-bit host. Patch #5 adds more test cases for bits iterator. Please see the individual patches for more details. And comments are always welcome. --- v4: * patch #1: add ack from Yafang * patch #3: revert code-churn like changes: (1) compute nr_bytes and nr_bits before the check of nr_words. (2) use nr_bits == 64 to check for single u64, preventing build warning on 32-bit hosts. * patch #4: use "BITS_PER_LONG == 32" instead of "!defined(CONFIG_64BIT)" v3: https://lore.kernel.org/bpf/[email protected]/T/#t * split the bits-iterator related patches from "Misc fixes for bpf" patch set * patch #1: use "!nr_bits || bits >= nr_bits" to stop the iteration * patch #2: add a new helper for the overflow problem * patch #3: decrease the limitation from 512 to 511 and check whether nr_bytes is too large for bpf memory allocator explicitly * patch #5: add two more test cases for bit iterator v2: http://lore.kernel.org/bpf/[email protected] ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents d0b98f6 + ebafc1e commit 053b212

File tree

4 files changed

+118
-14
lines changed

4 files changed

+118
-14
lines changed

include/linux/bpf_mem_alloc.h

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
3333
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
3434
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
3535

36+
/* Check the allocation size for kmalloc equivalent allocator */
37+
int bpf_mem_alloc_check_size(bool percpu, size_t size);
38+
3639
/* kmalloc/kfree equivalent: */
3740
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
3841
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);

kernel/bpf/helpers.c

+44-10
Original file line numberDiff line numberDiff line change
@@ -2851,21 +2851,47 @@ struct bpf_iter_bits {
28512851
__u64 __opaque[2];
28522852
} __aligned(8);
28532853

2854+
#define BITS_ITER_NR_WORDS_MAX 511
2855+
28542856
struct bpf_iter_bits_kern {
28552857
union {
2856-
unsigned long *bits;
2857-
unsigned long bits_copy;
2858+
__u64 *bits;
2859+
__u64 bits_copy;
28582860
};
2859-
u32 nr_bits;
2861+
int nr_bits;
28602862
int bit;
28612863
} __aligned(8);
28622864

2865+
/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
2866+
* a u64 pointer and an unsigned long pointer to find_next_bit() will
2867+
* return the same result, as both point to the same 8-byte area.
2868+
*
2869+
* For 32-bit little-endian hosts, using a u64 pointer or unsigned long
2870+
* pointer also makes no difference. This is because the first iterated
2871+
* unsigned long is composed of bits 0-31 of the u64 and the second unsigned
2872+
* long is composed of bits 32-63 of the u64.
2873+
*
2874+
* However, for 32-bit big-endian hosts, this is not the case. The first
2875+
* iterated unsigned long will be bits 32-63 of the u64, so swap these two
2876+
* ulong values within the u64.
2877+
*/
2878+
static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
2879+
{
2880+
#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
2881+
unsigned int i;
2882+
2883+
for (i = 0; i < nr; i++)
2884+
bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
2885+
#endif
2886+
}
2887+
28632888
/**
28642889
* bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
28652890
* @it: The new bpf_iter_bits to be created
28662891
* @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
28672892
* @nr_words: The size of the specified memory area, measured in 8-byte units.
2868-
* Due to the limitation of memalloc, it can't be greater than 512.
2893+
* The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
2894+
* further reduced by the BPF memory allocator implementation.
28692895
*
28702896
* This function initializes a new bpf_iter_bits structure for iterating over
28712897
* a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
@@ -2892,17 +2918,24 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
28922918

28932919
if (!unsafe_ptr__ign || !nr_words)
28942920
return -EINVAL;
2921+
if (nr_words > BITS_ITER_NR_WORDS_MAX)
2922+
return -E2BIG;
28952923

28962924
/* Optimization for u64 mask */
28972925
if (nr_bits == 64) {
28982926
err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
28992927
if (err)
29002928
return -EFAULT;
29012929

2930+
swap_ulong_in_u64(&kit->bits_copy, nr_words);
2931+
29022932
kit->nr_bits = nr_bits;
29032933
return 0;
29042934
}
29052935

2936+
if (bpf_mem_alloc_check_size(false, nr_bytes))
2937+
return -E2BIG;
2938+
29062939
/* Fallback to memalloc */
29072940
kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
29082941
if (!kit->bits)
@@ -2914,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
29142947
return err;
29152948
}
29162949

2950+
swap_ulong_in_u64(kit->bits, nr_words);
2951+
29172952
kit->nr_bits = nr_bits;
29182953
return 0;
29192954
}
@@ -2930,17 +2965,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
29302965
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
29312966
{
29322967
struct bpf_iter_bits_kern *kit = (void *)it;
2933-
u32 nr_bits = kit->nr_bits;
2934-
const unsigned long *bits;
2935-
int bit;
2968+
int bit = kit->bit, nr_bits = kit->nr_bits;
2969+
const void *bits;
29362970

2937-
if (nr_bits == 0)
2971+
if (!nr_bits || bit >= nr_bits)
29382972
return NULL;
29392973

29402974
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
2941-
bit = find_next_bit(bits, nr_bits, kit->bit + 1);
2975+
bit = find_next_bit(bits, nr_bits, bit + 1);
29422976
if (bit >= nr_bits) {
2943-
kit->nr_bits = 0;
2977+
kit->bit = bit;
29442978
return NULL;
29452979
}
29462980

kernel/bpf/memalloc.c

+13-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
*/
3636
#define LLIST_NODE_SZ sizeof(struct llist_node)
3737

38+
#define BPF_MEM_ALLOC_SIZE_MAX 4096
39+
3840
/* similar to kmalloc, but sizeof == 8 bucket is gone */
3941
static u8 size_index[24] __ro_after_init = {
4042
3, /* 8 */
@@ -65,7 +67,7 @@ static u8 size_index[24] __ro_after_init = {
6567

6668
static int bpf_mem_cache_idx(size_t size)
6769
{
68-
if (!size || size > 4096)
70+
if (!size || size > BPF_MEM_ALLOC_SIZE_MAX)
6971
return -1;
7072

7173
if (size <= 192)
@@ -1005,3 +1007,13 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
10051007

10061008
return !ret ? NULL : ret + LLIST_NODE_SZ;
10071009
}
1010+
1011+
int bpf_mem_alloc_check_size(bool percpu, size_t size)
1012+
{
1013+
/* The size of percpu allocation doesn't have LLIST_NODE_SZ overhead */
1014+
if ((percpu && size > BPF_MEM_ALLOC_SIZE_MAX) ||
1015+
(!percpu && size > BPF_MEM_ALLOC_SIZE_MAX - LLIST_NODE_SZ))
1016+
return -E2BIG;
1017+
1018+
return 0;
1019+
}

tools/testing/selftests/bpf/progs/verifier_bits_iter.c

+58-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
1515
int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
1616
void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
1717

18+
u64 bits_array[511] = {};
19+
1820
SEC("iter.s/cgroup")
1921
__description("bits iter without destroy")
2022
__failure __msg("Unreleased reference")
@@ -110,16 +112,16 @@ int bit_index(void)
110112
}
111113

112114
SEC("syscall")
113-
__description("bits nomem")
115+
__description("bits too big")
114116
__success __retval(0)
115-
int bits_nomem(void)
117+
int bits_too_big(void)
116118
{
117119
u64 data[4];
118120
int nr = 0;
119121
int *bit;
120122

121123
__builtin_memset(&data, 0xff, sizeof(data));
122-
bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */
124+
bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */
123125
nr++;
124126
return nr;
125127
}
@@ -151,3 +153,56 @@ int zero_words(void)
151153
nr++;
152154
return nr;
153155
}
156+
157+
SEC("syscall")
158+
__description("huge words")
159+
__success __retval(0)
160+
int huge_words(void)
161+
{
162+
u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1};
163+
int nr = 0;
164+
int *bit;
165+
166+
bpf_for_each(bits, bit, &data[0], 67108865)
167+
nr++;
168+
return nr;
169+
}
170+
171+
SEC("syscall")
172+
__description("max words")
173+
__success __retval(4)
174+
int max_words(void)
175+
{
176+
volatile int nr = 0;
177+
int *bit;
178+
179+
bits_array[0] = (1ULL << 63) | 1U;
180+
bits_array[510] = (1ULL << 33) | (1ULL << 32);
181+
182+
bpf_for_each(bits, bit, bits_array, 511) {
183+
if (nr == 0 && *bit != 0)
184+
break;
185+
if (nr == 2 && *bit != 32672)
186+
break;
187+
nr++;
188+
}
189+
return nr;
190+
}
191+
192+
SEC("syscall")
193+
__description("bad words")
194+
__success __retval(0)
195+
int bad_words(void)
196+
{
197+
void *bad_addr = (void *)(3UL << 30);
198+
int nr = 0;
199+
int *bit;
200+
201+
bpf_for_each(bits, bit, bad_addr, 1)
202+
nr++;
203+
204+
bpf_for_each(bits, bit, bad_addr, 4)
205+
nr++;
206+
207+
return nr;
208+
}

0 commit comments

Comments
 (0)