@@ -86,6 +86,9 @@ struct bucket {
8686 };
8787};
8888
89+ #define HASHTAB_MAP_LOCK_COUNT 8
90+ #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
91+
8992struct bpf_htab {
9093 struct bpf_map map ;
9194 struct bucket * buckets ;
@@ -99,6 +102,8 @@ struct bpf_htab {
99102 u32 n_buckets ; /* number of hash buckets */
100103 u32 elem_size ; /* size of each element in bytes */
101104 u32 hashrnd ;
105+ struct lock_class_key lockdep_key ;
106+ int __percpu * map_locked [HASHTAB_MAP_LOCK_COUNT ];
102107};
103108
104109/* each htab element is struct htab_elem + key + value */
@@ -136,35 +141,56 @@ static void htab_init_buckets(struct bpf_htab *htab)
136141{
137142 unsigned i ;
138143
144+ lockdep_register_key (& htab -> lockdep_key );
139145 for (i = 0 ; i < htab -> n_buckets ; i ++ ) {
140146 INIT_HLIST_NULLS_HEAD (& htab -> buckets [i ].head , i );
141- if (htab_use_raw_lock (htab ))
147+ if (htab_use_raw_lock (htab )) {
142148 raw_spin_lock_init (& htab -> buckets [i ].raw_lock );
143- else
149+ lockdep_set_class (& htab -> buckets [i ].raw_lock ,
150+ & htab -> lockdep_key );
151+ } else {
144152 spin_lock_init (& htab -> buckets [i ].lock );
153+ lockdep_set_class (& htab -> buckets [i ].lock ,
154+ & htab -> lockdep_key );
155+ }
145156 }
146157}
147158
148- static inline unsigned long htab_lock_bucket (const struct bpf_htab * htab ,
149- struct bucket * b )
159+ static inline int htab_lock_bucket (const struct bpf_htab * htab ,
160+ struct bucket * b , u32 hash ,
161+ unsigned long * pflags )
150162{
151163 unsigned long flags ;
152164
165+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
166+
167+ migrate_disable ();
168+ if (unlikely (__this_cpu_inc_return (* (htab -> map_locked [hash ])) != 1 )) {
169+ __this_cpu_dec (* (htab -> map_locked [hash ]));
170+ migrate_enable ();
171+ return - EBUSY ;
172+ }
173+
153174 if (htab_use_raw_lock (htab ))
154175 raw_spin_lock_irqsave (& b -> raw_lock , flags );
155176 else
156177 spin_lock_irqsave (& b -> lock , flags );
157- return flags ;
178+ * pflags = flags ;
179+
180+ return 0 ;
158181}
159182
160183static inline void htab_unlock_bucket (const struct bpf_htab * htab ,
161- struct bucket * b ,
184+ struct bucket * b , u32 hash ,
162185 unsigned long flags )
163186{
187+ hash = hash & HASHTAB_MAP_LOCK_MASK ;
164188 if (htab_use_raw_lock (htab ))
165189 raw_spin_unlock_irqrestore (& b -> raw_lock , flags );
166190 else
167191 spin_unlock_irqrestore (& b -> lock , flags );
192+ __this_cpu_dec (* (htab -> map_locked [hash ]));
193+ migrate_enable ();
168194}
169195
170196static bool htab_lru_map_delete_node (void * arg , struct bpf_lru_node * node );
@@ -422,8 +448,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
422448 bool percpu_lru = (attr -> map_flags & BPF_F_NO_COMMON_LRU );
423449 bool prealloc = !(attr -> map_flags & BPF_F_NO_PREALLOC );
424450 struct bpf_htab * htab ;
451+ int err , i ;
425452 u64 cost ;
426- int err ;
427453
428454 htab = kzalloc (sizeof (* htab ), GFP_USER );
429455 if (!htab )
@@ -480,6 +506,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
480506 if (!htab -> buckets )
481507 goto free_charge ;
482508
509+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ ) {
510+ htab -> map_locked [i ] = __alloc_percpu_gfp (sizeof (int ),
511+ sizeof (int ), GFP_USER );
512+ if (!htab -> map_locked [i ])
513+ goto free_map_locked ;
514+ }
515+
483516 if (htab -> map .map_flags & BPF_F_ZERO_SEED )
484517 htab -> hashrnd = 0 ;
485518 else
@@ -490,7 +523,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
490523 if (prealloc ) {
491524 err = prealloc_init (htab );
492525 if (err )
493- goto free_buckets ;
526+ goto free_map_locked ;
494527
495528 if (!percpu && !lru ) {
496529 /* lru itself can remove the least used element, so
@@ -506,7 +539,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
506539
507540free_prealloc :
508541 prealloc_destroy (htab );
509- free_buckets :
542+ free_map_locked :
543+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
544+ free_percpu (htab -> map_locked [i ]);
510545 bpf_map_area_free (htab -> buckets );
511546free_charge :
512547 bpf_map_charge_finish (& htab -> map .memory );
@@ -687,20 +722,23 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
687722 struct hlist_nulls_node * n ;
688723 unsigned long flags ;
689724 struct bucket * b ;
725+ int ret ;
690726
691727 tgt_l = container_of (node , struct htab_elem , lru_node );
692728 b = __select_bucket (htab , tgt_l -> hash );
693729 head = & b -> head ;
694730
695- flags = htab_lock_bucket (htab , b );
731+ ret = htab_lock_bucket (htab , b , tgt_l -> hash , & flags );
732+ if (ret )
733+ return false;
696734
697735 hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
698736 if (l == tgt_l ) {
699737 hlist_nulls_del_rcu (& l -> hash_node );
700738 break ;
701739 }
702740
703- htab_unlock_bucket (htab , b , flags );
741+ htab_unlock_bucket (htab , b , tgt_l -> hash , flags );
704742
705743 return l == tgt_l ;
706744}
@@ -972,7 +1010,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
9721010 */
9731011 }
9741012
975- flags = htab_lock_bucket (htab , b );
1013+ ret = htab_lock_bucket (htab , b , hash , & flags );
1014+ if (ret )
1015+ return ret ;
9761016
9771017 l_old = lookup_elem_raw (head , hash , key , key_size );
9781018
@@ -1013,7 +1053,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
10131053 }
10141054 ret = 0 ;
10151055err :
1016- htab_unlock_bucket (htab , b , flags );
1056+ htab_unlock_bucket (htab , b , hash , flags );
10171057 return ret ;
10181058}
10191059
@@ -1051,7 +1091,9 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
10511091 return - ENOMEM ;
10521092 memcpy (l_new -> key + round_up (map -> key_size , 8 ), value , map -> value_size );
10531093
1054- flags = htab_lock_bucket (htab , b );
1094+ ret = htab_lock_bucket (htab , b , hash , & flags );
1095+ if (ret )
1096+ return ret ;
10551097
10561098 l_old = lookup_elem_raw (head , hash , key , key_size );
10571099
@@ -1070,7 +1112,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
10701112 ret = 0 ;
10711113
10721114err :
1073- htab_unlock_bucket (htab , b , flags );
1115+ htab_unlock_bucket (htab , b , hash , flags );
10741116
10751117 if (ret )
10761118 bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
@@ -1105,7 +1147,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
11051147 b = __select_bucket (htab , hash );
11061148 head = & b -> head ;
11071149
1108- flags = htab_lock_bucket (htab , b );
1150+ ret = htab_lock_bucket (htab , b , hash , & flags );
1151+ if (ret )
1152+ return ret ;
11091153
11101154 l_old = lookup_elem_raw (head , hash , key , key_size );
11111155
@@ -1128,7 +1172,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
11281172 }
11291173 ret = 0 ;
11301174err :
1131- htab_unlock_bucket (htab , b , flags );
1175+ htab_unlock_bucket (htab , b , hash , flags );
11321176 return ret ;
11331177}
11341178
@@ -1168,7 +1212,9 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
11681212 return - ENOMEM ;
11691213 }
11701214
1171- flags = htab_lock_bucket (htab , b );
1215+ ret = htab_lock_bucket (htab , b , hash , & flags );
1216+ if (ret )
1217+ return ret ;
11721218
11731219 l_old = lookup_elem_raw (head , hash , key , key_size );
11741220
@@ -1190,7 +1236,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
11901236 }
11911237 ret = 0 ;
11921238err :
1193- htab_unlock_bucket (htab , b , flags );
1239+ htab_unlock_bucket (htab , b , hash , flags );
11941240 if (l_new )
11951241 bpf_lru_push_free (& htab -> lru , & l_new -> lru_node );
11961242 return ret ;
@@ -1218,7 +1264,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
12181264 struct htab_elem * l ;
12191265 unsigned long flags ;
12201266 u32 hash , key_size ;
1221- int ret = - ENOENT ;
1267+ int ret ;
12221268
12231269 WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
12241270
@@ -1228,17 +1274,20 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
12281274 b = __select_bucket (htab , hash );
12291275 head = & b -> head ;
12301276
1231- flags = htab_lock_bucket (htab , b );
1277+ ret = htab_lock_bucket (htab , b , hash , & flags );
1278+ if (ret )
1279+ return ret ;
12321280
12331281 l = lookup_elem_raw (head , hash , key , key_size );
12341282
12351283 if (l ) {
12361284 hlist_nulls_del_rcu (& l -> hash_node );
12371285 free_htab_elem (htab , l );
1238- ret = 0 ;
1286+ } else {
1287+ ret = - ENOENT ;
12391288 }
12401289
1241- htab_unlock_bucket (htab , b , flags );
1290+ htab_unlock_bucket (htab , b , hash , flags );
12421291 return ret ;
12431292}
12441293
@@ -1250,7 +1299,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
12501299 struct htab_elem * l ;
12511300 unsigned long flags ;
12521301 u32 hash , key_size ;
1253- int ret = - ENOENT ;
1302+ int ret ;
12541303
12551304 WARN_ON_ONCE (!rcu_read_lock_held () && !rcu_read_lock_trace_held ());
12561305
@@ -1260,16 +1309,18 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
12601309 b = __select_bucket (htab , hash );
12611310 head = & b -> head ;
12621311
1263- flags = htab_lock_bucket (htab , b );
1312+ ret = htab_lock_bucket (htab , b , hash , & flags );
1313+ if (ret )
1314+ return ret ;
12641315
12651316 l = lookup_elem_raw (head , hash , key , key_size );
12661317
1267- if (l ) {
1318+ if (l )
12681319 hlist_nulls_del_rcu (& l -> hash_node );
1269- ret = 0 ;
1270- }
1320+ else
1321+ ret = - ENOENT ;
12711322
1272- htab_unlock_bucket (htab , b , flags );
1323+ htab_unlock_bucket (htab , b , hash , flags );
12731324 if (l )
12741325 bpf_lru_push_free (& htab -> lru , & l -> lru_node );
12751326 return ret ;
@@ -1295,6 +1346,7 @@ static void delete_all_elements(struct bpf_htab *htab)
12951346static void htab_map_free (struct bpf_map * map )
12961347{
12971348 struct bpf_htab * htab = container_of (map , struct bpf_htab , map );
1349+ int i ;
12981350
12991351 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
13001352 * bpf_free_used_maps() is called after bpf prog is no longer executing.
@@ -1312,6 +1364,9 @@ static void htab_map_free(struct bpf_map *map)
13121364
13131365 free_percpu (htab -> extra_elems );
13141366 bpf_map_area_free (htab -> buckets );
1367+ lockdep_unregister_key (& htab -> lockdep_key );
1368+ for (i = 0 ; i < HASHTAB_MAP_LOCK_COUNT ; i ++ )
1369+ free_percpu (htab -> map_locked [i ]);
13151370 kfree (htab );
13161371}
13171372
@@ -1415,8 +1470,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14151470 b = & htab -> buckets [batch ];
14161471 head = & b -> head ;
14171472 /* do not grab the lock unless need it (bucket_cnt > 0). */
1418- if (locked )
1419- flags = htab_lock_bucket (htab , b );
1473+ if (locked ) {
1474+ ret = htab_lock_bucket (htab , b , batch , & flags );
1475+ if (ret )
1476+ goto next_batch ;
1477+ }
14201478
14211479 bucket_cnt = 0 ;
14221480 hlist_nulls_for_each_entry_rcu (l , n , head , hash_node )
@@ -1433,7 +1491,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14331491 /* Note that since bucket_cnt > 0 here, it is implicit
14341492 * that the locked was grabbed, so release it.
14351493 */
1436- htab_unlock_bucket (htab , b , flags );
1494+ htab_unlock_bucket (htab , b , batch , flags );
14371495 rcu_read_unlock ();
14381496 bpf_enable_instrumentation ();
14391497 goto after_loop ;
@@ -1444,7 +1502,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14441502 /* Note that since bucket_cnt > 0 here, it is implicit
14451503 * that the locked was grabbed, so release it.
14461504 */
1447- htab_unlock_bucket (htab , b , flags );
1505+ htab_unlock_bucket (htab , b , batch , flags );
14481506 rcu_read_unlock ();
14491507 bpf_enable_instrumentation ();
14501508 kvfree (keys );
@@ -1497,7 +1555,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
14971555 dst_val += value_size ;
14981556 }
14991557
1500- htab_unlock_bucket (htab , b , flags );
1558+ htab_unlock_bucket (htab , b , batch , flags );
15011559 locked = false;
15021560
15031561 while (node_to_free ) {
0 commit comments