Skip to content

Commit 65c6c3e

Browse files
committed
Add support for new compat feature "sparse_super2"
In practice, it is **extremely** rare for users to try to use more than the first backup superblock located at the beginning of block group tytso#1. (i.e., at block number 32768 for file systems with a 4k block size). This new compat feature restricts the backup superblock to block group tytso#1 and the last block group in the file system. Aside from reducing the overhead of the file system by a small number of blocks, by eliminating the rest of the backup superblocks, it allows us to have a much more flexible metadata layout. For example, we can force all of the allocation bitmaps and inode table blocks to the beginning of the disk, which allows most of the disk to be exclusively used for contiguous data blocks. This simplifies taking advantage of certain HDD specific features, such as Shingled Magnetic Recording (aka Shingled Drives), and the TCG's OPAL Storage Specification where having a simple mapping between LBA block ranges and the data blocks used by the file system can make life much simpler. Signed-off-by: "Theodore Ts'o" <[email protected]>
1 parent b818205 commit 65c6c3e

16 files changed

+300
-9
lines changed

debugfs/set_fields.c

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ static struct field_set_info super_fields[] = {
150150
{ "usr_quota_inum", &set_sb.s_usr_quota_inum, NULL, 4, parse_uint },
151151
{ "grp_quota_inum", &set_sb.s_grp_quota_inum, NULL, 4, parse_uint },
152152
{ "overhead_blocks", &set_sb.s_overhead_blocks, NULL, 4, parse_uint },
153+
{ "backup_bgs", &set_sb.s_backup_bgs[0], NULL, 4, parse_uint,
154+
FLAG_ARRAY, 2 },
153155
{ "checksum", &set_sb.s_checksum, NULL, 4, parse_uint },
154156
{ 0, 0, 0, 0 }
155157
};

lib/e2p/feature.c

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ static struct feature feature_list[] = {
4343
"lazy_bg" },
4444
{ E2P_FEATURE_COMPAT, EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP,
4545
"snapshot_bitmap" },
46+
{ E2P_FEATURE_COMPAT, EXT4_FEATURE_COMPAT_SPARSE_SUPER2,
47+
"sparse_super2" },
4648

4749
{ E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
4850
"sparse_super" },

lib/e2p/ls.c

+8
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,14 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
368368
fprintf(f, "type %u\n", sb->s_jnl_backup_type);
369369
}
370370
}
371+
if (sb->s_backup_bgs[0] || sb->s_backup_bgs[1]) {
372+
fprintf(f, "Backup block groups: ");
373+
if (sb->s_backup_bgs[0])
374+
fprintf(f, "%u ", sb->s_backup_bgs[0]);
375+
if (sb->s_backup_bgs[1])
376+
fprintf(f, "%u ", sb->s_backup_bgs[1]);
377+
fputc('\n', f);
378+
}
371379
if (sb->s_snapshot_inum) {
372380
fprintf(f, "Snapshot inode: %u\n",
373381
sb->s_snapshot_inum);

lib/ext2fs/closefs.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,16 @@ static int test_root(unsigned int a, unsigned int b)
3535

3636
int ext2fs_bg_has_super(ext2_filsys fs, dgrp_t group)
3737
{
38-
if (!(fs->super->s_feature_ro_compat &
39-
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) || group <= 1)
38+
if (group == 0)
39+
return 1;
40+
if (fs->super->s_feature_compat & EXT4_FEATURE_COMPAT_SPARSE_SUPER2) {
41+
if (group == fs->super->s_backup_bgs[0] ||
42+
group == fs->super->s_backup_bgs[1])
43+
return 1;
44+
return 0;
45+
}
46+
if ((group <= 1) || !(fs->super->s_feature_ro_compat &
47+
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER))
4048
return 1;
4149
if (!(group & 1))
4250
return 0;

lib/ext2fs/ext2_fs.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,8 @@ struct ext2_super_block {
645645
__u32 s_usr_quota_inum; /* inode number of user quota file */
646646
__u32 s_grp_quota_inum; /* inode number of group quota file */
647647
__u32 s_overhead_blocks; /* overhead blocks/clusters in fs */
648-
__u32 s_reserved[108]; /* Padding to the end of the block */
648+
__u32 s_backup_bgs[2]; /* If sparse_super2 enabled */
649+
__u32 s_reserved[106]; /* Padding to the end of the block */
649650
__u32 s_checksum; /* crc32c(superblock) */
650651
};
651652

@@ -696,6 +697,7 @@ struct ext2_super_block {
696697
#define EXT2_FEATURE_COMPAT_LAZY_BG 0x0040
697698
/* #define EXT2_FEATURE_COMPAT_EXCLUDE_INODE 0x0080 not used, legacy */
698699
#define EXT2_FEATURE_COMPAT_EXCLUDE_BITMAP 0x0100
700+
#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
699701

700702

701703
#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001

lib/ext2fs/ext2fs.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,8 @@ typedef struct ext2_icount *ext2_icount_t;
550550
EXT3_FEATURE_COMPAT_HAS_JOURNAL|\
551551
EXT2_FEATURE_COMPAT_RESIZE_INODE|\
552552
EXT2_FEATURE_COMPAT_DIR_INDEX|\
553-
EXT2_FEATURE_COMPAT_EXT_ATTR)
553+
EXT2_FEATURE_COMPAT_EXT_ATTR|\
554+
EXT4_FEATURE_COMPAT_SPARSE_SUPER2)
554555

555556
/* This #ifdef is temporary until compression is fully supported */
556557
#ifdef ENABLE_COMPRESSION

lib/ext2fs/initialize.c

+17
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ errcode_t ext2fs_initialize(const char *name, int flags,
173173
set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */
174174
set_field(s_log_groups_per_flex, 0);
175175
set_field(s_flags, 0);
176+
assign_field(s_backup_bgs[0]);
177+
assign_field(s_backup_bgs[1]);
176178
if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
177179
retval = EXT2_ET_UNSUPP_FEATURE;
178180
goto cleanup;
@@ -422,6 +424,21 @@ errcode_t ext2fs_initialize(const char *name, int flags,
422424
* count.
423425
*/
424426

427+
/* Set up the locations of the backup superblocks */
428+
if (super->s_feature_compat & EXT4_FEATURE_COMPAT_SPARSE_SUPER2) {
429+
if (super->s_backup_bgs[0] >= fs->group_desc_count)
430+
super->s_backup_bgs[0] = fs->group_desc_count - 1;
431+
if (super->s_backup_bgs[1] >= fs->group_desc_count)
432+
super->s_backup_bgs[1] = fs->group_desc_count - 1;
433+
if (super->s_backup_bgs[0] == super->s_backup_bgs[1])
434+
super->s_backup_bgs[1] = 0;
435+
if (super->s_backup_bgs[0] > super->s_backup_bgs[1]) {
436+
__u32 t = super->s_backup_bgs[0];
437+
super->s_backup_bgs[0] = super->s_backup_bgs[1];
438+
super->s_backup_bgs[1] = t;
439+
}
440+
}
441+
425442
retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf);
426443
if (retval)
427444
goto cleanup;

lib/ext2fs/res_gdt.c

+13
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ static unsigned int list_backups(ext2_filsys fs, unsigned int *three,
3131
int mult = 3;
3232
unsigned int ret;
3333

34+
if (fs->super->s_feature_compat & EXT4_FEATURE_COMPAT_SPARSE_SUPER2) {
35+
if (*min == 1) {
36+
*min += 1;
37+
if (fs->super->s_backup_bgs[0])
38+
return fs->super->s_backup_bgs[0];
39+
}
40+
if (*min == 2) {
41+
*min += 1;
42+
if (fs->super->s_backup_bgs[1])
43+
return fs->super->s_backup_bgs[1];
44+
}
45+
return fs->group_desc_count;
46+
}
3447
if (!(fs->super->s_feature_ro_compat &
3548
EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
3649
ret = *min;

lib/ext2fs/swapfs.c

+2
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
9999
}
100100
for (; i < 17; i++)
101101
sb->s_jnl_blocks[i] = ext2fs_swab32(sb->s_jnl_blocks[i]);
102+
sb->s_backup_bgs[0] = ext2fs_swab32(sb->s_backup_bgs[0]);
103+
sb->s_backup_bgs[1] = ext2fs_swab32(sb->s_backup_bgs[1]);
102104
}
103105

104106
void ext2fs_swap_group_desc2(ext2_filsys fs, struct ext2_group_desc *gdp)

lib/ext2fs/tst_super_size.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ int main(int argc, char **argv)
135135
check_field(s_usr_quota_inum, 4);
136136
check_field(s_grp_quota_inum, 4);
137137
check_field(s_overhead_blocks, 4);
138-
check_field(s_reserved, 108 * 4);
138+
check_field(s_backup_bgs, 8);
139+
check_field(s_reserved, 106 * 4);
139140
check_field(s_checksum, 4);
140141
do_field("Superblock end", 0, 0, cur_offset, 1024);
141142
#endif

misc/ext4.5.in

+11
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,17 @@ kernels from mounting file systems that they could not understand.
171171
.\" .br
172172
.\" .B Future feature, available in e2fsprogs 1.43-WIP
173173
.TP
174+
.B sparse_super2
175+
.br
176+
This feature indicates that there will only at most two backup
177+
superblock and block group descriptors. The block groups used to store
178+
the backup superblock and blockgroup descriptors are stored in the
179+
superblock, but typically, one will be located at the beginning of block
180+
group #1, and one in the last block group in the file system. This is
181+
feature is essentially a more extreme version of sparse_super and is
182+
designed to allow the a much larger percentage of the disk to have
183+
contiguous blocks available for data files.
184+
.TP
174185
.B meta_bg
175186
.br
176187
This ext4 feature allows file systems to be resized on-line without explicitly

misc/mke2fs.8.in

+6
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ small risk if the system crashes before the journal has been overwritten
274274
entirely one time. If the option value is omitted, it defaults to 1 to
275275
enable lazy journal inode zeroing.
276276
.TP
277+
.BI num_backup_sb= <0|1|2>
278+
If the
279+
.B sparse_super2
280+
file system feature is enabled this option controls whether there will
281+
be 0, 1, or 2 backup superblocks created in the file system.
282+
.TP
277283
.BI root_owner [=uid:gid]
278284
Specify the numeric user and group ID of the root directory. If no UID:GID
279285
is specified, use the user and group ID of the user running \fBmke2fs\fR.

misc/mke2fs.c

+29-2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ static int discard = 1; /* attempt to discard device before fs creation */
8888
static int direct_io;
8989
static int force;
9090
static int noaction;
91+
static int num_backups = 2; /* number of backup bg's for sparse_super2 */
9192
static uid_t root_uid;
9293
static gid_t root_gid;
9394
int journal_size;
@@ -739,6 +740,21 @@ static void parse_extended_opts(struct ext2_super_block *param,
739740
r_usage++;
740741
continue;
741742
}
743+
} else if (strcmp(token, "num_backup_sb") == 0) {
744+
if (!arg) {
745+
r_usage++;
746+
badopt = token;
747+
continue;
748+
}
749+
num_backups = strtoul(arg, &p, 0);
750+
if (*p || num_backups > 2) {
751+
fprintf(stderr,
752+
_("Invalid # of backup "
753+
"superbocks: %s\n"),
754+
arg);
755+
r_usage++;
756+
continue;
757+
}
742758
} else if (strcmp(token, "stride") == 0) {
743759
if (!arg) {
744760
r_usage++;
@@ -895,6 +911,7 @@ static void parse_extended_opts(struct ext2_super_block *param,
895911
"\tis set off by an equals ('=') sign.\n\n"
896912
"Valid extended options are:\n"
897913
"\tmmp_update_interval=<interval>\n"
914+
"\tnum_backup_sb=<0|1|2>\n"
898915
"\tstride=<RAID per-disk data chunk in blocks>\n"
899916
"\tstripe-width=<RAID stride * data disks in blocks>\n"
900917
"\toffset=<offset to create the file system>\n"
@@ -925,7 +942,8 @@ static __u32 ok_features[3] = {
925942
EXT3_FEATURE_COMPAT_HAS_JOURNAL |
926943
EXT2_FEATURE_COMPAT_RESIZE_INODE |
927944
EXT2_FEATURE_COMPAT_DIR_INDEX |
928-
EXT2_FEATURE_COMPAT_EXT_ATTR,
945+
EXT2_FEATURE_COMPAT_EXT_ATTR |
946+
EXT4_FEATURE_COMPAT_SPARSE_SUPER2,
929947
/* Incompat */
930948
EXT2_FEATURE_INCOMPAT_FILETYPE|
931949
EXT3_FEATURE_INCOMPAT_EXTENTS|
@@ -1975,6 +1993,8 @@ static void PRS(int argc, char *argv[])
19751993
}
19761994
#endif
19771995

1996+
num_backups = get_int_from_profile(fs_types, "num_backup_sb", 2);
1997+
19781998
blocksize = EXT2_BLOCK_SIZE(&fs_param);
19791999

19802000
/*
@@ -2171,6 +2191,13 @@ static void PRS(int argc, char *argv[])
21712191
ext2fs_r_blocks_count_set(&fs_param, reserved_ratio *
21722192
ext2fs_blocks_count(&fs_param) / 100.0);
21732193

2194+
if (fs_param.s_feature_compat & EXT4_FEATURE_COMPAT_SPARSE_SUPER2) {
2195+
if (num_backups >= 1)
2196+
fs_param.s_backup_bgs[0] = 1;
2197+
if (num_backups >= 2)
2198+
fs_param.s_backup_bgs[1] = ~0;
2199+
}
2200+
21742201
free(fs_type);
21752202
free(usage_types);
21762203
}
@@ -2603,8 +2630,8 @@ int main (int argc, char *argv[])
26032630
read_bb_file(fs, &bb_list, bad_blocks_filename);
26042631
if (cflag)
26052632
test_disk(fs, &bb_list);
2606-
26072633
handle_bad_blocks(fs, bb_list);
2634+
26082635
fs->stride = fs_stride = fs->super->s_raid_stride;
26092636
if (!quiet)
26102637
printf("%s", _("Allocating group tables: "));

misc/mke2fs.conf.5.in

+5
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ first mounted.
360360
.I journal_location
361361
This relation specifies the location of the journal.
362362
.TP
363+
.I num_backup_sb
364+
This relation indicates whether file systems with the
365+
.B sparse_super2
366+
feature enabled should be created with 0, 1, or 2 backup superblocks.
367+
.TP
363368
.I inode_ratio
364369
This relation specifies the default inode ratio if the user does not
365370
specify one on the command line.

resize/online.c

+8
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ errcode_t online_resize_fs(ext2_filsys fs, const char *mtpt,
7676
no_resize_ioctl = 1;
7777
}
7878

79+
if (EXT2_HAS_COMPAT_FEATURE(fs->super,
80+
EXT4_FEATURE_COMPAT_SPARSE_SUPER2) &&
81+
(access("/sys/fs/ext4/features/sparse_super2", R_OK) != 0)) {
82+
com_err(program_name, 0, _("kernel does not support online "
83+
"resize with sparse_super2"));
84+
exit(1);
85+
}
86+
7987
printf(_("Filesystem at %s is mounted on %s; "
8088
"on-line resizing required\n"), fs->device_name, mtpt);
8189

0 commit comments

Comments
 (0)