Skip to content

Commit 6048c64

Browse files
Andreas Gruenbachertytso
authored andcommitted
mbcache: add reusable flag to cache entries
To reduce amount of damage caused by single bad block, we limit number of inodes sharing an xattr block to 1024. Thus there can be more xattr blocks with the same contents when there are lots of files with the same extended attributes. These xattr blocks naturally result in hash collisions and can form long hash chains and we unnecessarily check each such block only to find out we cannot use it because it is already shared by too many inodes. Add a reusable flag to cache entries which is cleared when a cache entry has reached its maximum refcount. Cache entries which are not marked reusable are skipped by mb_cache_entry_find_{first,next}. This significantly speeds up mbcache when there are many same xattr blocks. For example for xattr-bench with 5 values and each process handling 20000 files, the run for 64 processes is 25x faster with this patch. Even for 8 processes the speedup is almost 3x. We have also verified that for situations where there is only one xattr block of each kind, the patch doesn't have a measurable cost. [JK: Remove handling of setting the same value since it is not needed anymore, check for races in e_reusable setting, improve changelog, add measurements] Signed-off-by: Andreas Gruenbacher <[email protected]> Signed-off-by: Jan Kara <[email protected]> Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 3fd1646 commit 6048c64

File tree

4 files changed

+81
-30
lines changed

4 files changed

+81
-30
lines changed

fs/ext2/xattr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
823823
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
824824
int error;
825825

826-
error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
826+
error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
827827
if (error) {
828828
if (error == -EBUSY) {
829829
ea_bdebug(bh, "already in cache (%d cache entries)",

fs/ext4/xattr.c

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,8 @@ static void
545545
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
546546
struct buffer_head *bh)
547547
{
548+
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
549+
u32 hash, ref;
548550
int error = 0;
549551

550552
BUFFER_TRACE(bh, "get_write_access");
@@ -553,23 +555,34 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
553555
goto out;
554556

555557
lock_buffer(bh);
556-
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
557-
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
558-
558+
hash = le32_to_cpu(BHDR(bh)->h_hash);
559+
ref = le32_to_cpu(BHDR(bh)->h_refcount);
560+
if (ref == 1) {
559561
ea_bdebug(bh, "refcount now=0; freeing");
560562
/*
561563
* This must happen under buffer lock for
562564
* ext4_xattr_block_set() to reliably detect freed block
563565
*/
564-
mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
565-
bh->b_blocknr);
566+
mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
566567
get_bh(bh);
567568
unlock_buffer(bh);
568569
ext4_free_blocks(handle, inode, bh, 0, 1,
569570
EXT4_FREE_BLOCKS_METADATA |
570571
EXT4_FREE_BLOCKS_FORGET);
571572
} else {
572-
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
573+
ref--;
574+
BHDR(bh)->h_refcount = cpu_to_le32(ref);
575+
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
576+
struct mb_cache_entry *ce;
577+
578+
ce = mb_cache_entry_get(ext4_mb_cache, hash,
579+
bh->b_blocknr);
580+
if (ce) {
581+
ce->e_reusable = 1;
582+
mb_cache_entry_put(ext4_mb_cache, ce);
583+
}
584+
}
585+
573586
/*
574587
* Beware of this ugliness: Releasing of xattr block references
575588
* from different inodes can race and so we have to protect
@@ -872,6 +885,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
872885
if (new_bh == bs->bh)
873886
ea_bdebug(new_bh, "keeping");
874887
else {
888+
u32 ref;
889+
875890
/* The old block is released after updating
876891
the inode. */
877892
error = dquot_alloc_block(inode,
@@ -886,15 +901,18 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
886901
lock_buffer(new_bh);
887902
/*
888903
* We have to be careful about races with
889-
* freeing or rehashing of xattr block. Once we
890-
* hold buffer lock xattr block's state is
891-
* stable so we can check whether the block got
892-
* freed / rehashed or not. Since we unhash
893-
* mbcache entry under buffer lock when freeing
894-
* / rehashing xattr block, checking whether
895-
* entry is still hashed is reliable.
904+
* freeing, rehashing or adding references to
905+
* xattr block. Once we hold buffer lock xattr
906+
* block's state is stable so we can check
907+
* whether the block got freed / rehashed or
908+
* not. Since we unhash mbcache entry under
909+
* buffer lock when freeing / rehashing xattr
910+
* block, checking whether entry is still
911+
* hashed is reliable. Same rules hold for
912+
* e_reusable handling.
896913
*/
897-
if (hlist_bl_unhashed(&ce->e_hash_list)) {
914+
if (hlist_bl_unhashed(&ce->e_hash_list) ||
915+
!ce->e_reusable) {
898916
/*
899917
* Undo everything and check mbcache
900918
* again.
@@ -909,9 +927,12 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
909927
new_bh = NULL;
910928
goto inserted;
911929
}
912-
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
930+
ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
931+
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
932+
if (ref >= EXT4_XATTR_REFCOUNT_MAX)
933+
ce->e_reusable = 0;
913934
ea_bdebug(new_bh, "reusing; refcount now=%d",
914-
le32_to_cpu(BHDR(new_bh)->h_refcount));
935+
ref);
915936
unlock_buffer(new_bh);
916937
error = ext4_handle_dirty_xattr_block(handle,
917938
inode,
@@ -1566,11 +1587,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
15661587
static void
15671588
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
15681589
{
1569-
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1590+
struct ext4_xattr_header *header = BHDR(bh);
1591+
__u32 hash = le32_to_cpu(header->h_hash);
1592+
int reusable = le32_to_cpu(header->h_refcount) <
1593+
EXT4_XATTR_REFCOUNT_MAX;
15701594
int error;
15711595

15721596
error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
1573-
bh->b_blocknr);
1597+
bh->b_blocknr, reusable);
15741598
if (error) {
15751599
if (error == -EBUSY)
15761600
ea_bdebug(bh, "already in cache");
@@ -1645,12 +1669,6 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
16451669
if (!bh) {
16461670
EXT4_ERROR_INODE(inode, "block %lu read error",
16471671
(unsigned long) ce->e_block);
1648-
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1649-
EXT4_XATTR_REFCOUNT_MAX) {
1650-
ea_idebug(inode, "block %lu refcount %d>=%d",
1651-
(unsigned long) ce->e_block,
1652-
le32_to_cpu(BHDR(bh)->h_refcount),
1653-
EXT4_XATTR_REFCOUNT_MAX);
16541672
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
16551673
*pce = ce;
16561674
return bh;

fs/mbcache.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,14 @@ static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
6363
* @mask - gfp mask with which the entry should be allocated
6464
* @key - key of the entry
6565
* @block - block that contains data
66+
* @reusable - is the block reusable by other inodes?
6667
*
6768
* Creates entry in @cache with key @key and records that data is stored in
6869
* block @block. The function returns -EBUSY if entry with the same key
6970
* and for the same block already exists in cache. Otherwise 0 is returned.
7071
*/
7172
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
72-
sector_t block)
73+
sector_t block, bool reusable)
7374
{
7475
struct mb_cache_entry *entry, *dup;
7576
struct hlist_bl_node *dup_node;
@@ -91,6 +92,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
9192
atomic_set(&entry->e_refcnt, 1);
9293
entry->e_key = key;
9394
entry->e_block = block;
95+
entry->e_reusable = reusable;
9496
head = mb_cache_entry_head(cache, key);
9597
hlist_bl_lock(head);
9698
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -137,7 +139,7 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
137139
while (node) {
138140
entry = hlist_bl_entry(node, struct mb_cache_entry,
139141
e_hash_list);
140-
if (entry->e_key == key) {
142+
if (entry->e_key == key && entry->e_reusable) {
141143
atomic_inc(&entry->e_refcnt);
142144
goto out;
143145
}
@@ -184,10 +186,38 @@ struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
184186
}
185187
EXPORT_SYMBOL(mb_cache_entry_find_next);
186188

189+
/*
190+
* mb_cache_entry_get - get a cache entry by block number (and key)
191+
* @cache - cache we work with
192+
* @key - key of block number @block
193+
* @block - block number
194+
*/
195+
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
196+
sector_t block)
197+
{
198+
struct hlist_bl_node *node;
199+
struct hlist_bl_head *head;
200+
struct mb_cache_entry *entry;
201+
202+
head = mb_cache_entry_head(cache, key);
203+
hlist_bl_lock(head);
204+
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
205+
if (entry->e_key == key && entry->e_block == block) {
206+
atomic_inc(&entry->e_refcnt);
207+
goto out;
208+
}
209+
}
210+
entry = NULL;
211+
out:
212+
hlist_bl_unlock(head);
213+
return entry;
214+
}
215+
EXPORT_SYMBOL(mb_cache_entry_get);
216+
187217
/* mb_cache_entry_delete_block - remove information about block from cache
188218
* @cache - cache we work with
189-
* @key - key of the entry to remove
190-
* @block - block containing data for @key
219+
* @key - key of block @block
220+
* @block - block number
191221
*
192222
* Remove entry from cache @cache with key @key with data stored in @block.
193223
*/

include/linux/mbcache.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ struct mb_cache_entry {
1818
/* Key in hash - stable during lifetime of the entry */
1919
u32 e_key;
2020
u32 e_referenced:1;
21+
u32 e_reusable:1;
2122
/* Block number of hashed block - stable during lifetime of the entry */
2223
sector_t e_block;
2324
};
@@ -26,7 +27,7 @@ struct mb_cache *mb_cache_create(int bucket_bits);
2627
void mb_cache_destroy(struct mb_cache *cache);
2728

2829
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
29-
sector_t block);
30+
sector_t block, bool reusable);
3031
void __mb_cache_entry_free(struct mb_cache_entry *entry);
3132
static inline int mb_cache_entry_put(struct mb_cache *cache,
3233
struct mb_cache_entry *entry)
@@ -39,6 +40,8 @@ static inline int mb_cache_entry_put(struct mb_cache *cache,
3940

4041
void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
4142
sector_t block);
43+
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
44+
sector_t block);
4245
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
4346
u32 key);
4447
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,

0 commit comments

Comments
 (0)