Skip to content

Commit c4703ce

Browse files
committed
libnvdimm/namespace: Fix label tracking error
Users have reported intermittent occurrences of DIMM initialization failures due to duplicate allocations of address capacity detected in the labels, or errors of the form below, both have the same root cause. nd namespace1.4: failed to track label: 0 WARNING: CPU: 17 PID: 1381 at drivers/nvdimm/label.c:863 RIP: 0010:__pmem_label_update+0x56c/0x590 [libnvdimm] Call Trace: ? nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] nd_pmem_namespace_label_update+0xd6/0x160 [libnvdimm] uuid_store+0x17e/0x190 [libnvdimm] kernfs_fop_write+0xf0/0x1a0 vfs_write+0xb7/0x1b0 ksys_write+0x57/0xd0 do_syscall_64+0x60/0x210 Unfortunately those reports were typically with a busy parallel namespace creation / destruction loop making it difficult to see the components of the bug. However, Jane provided a simple reproducer using the work-in-progress sub-section implementation. When ndctl is reconfiguring a namespace it may take an existing defunct / disabled namespace and reconfigure it with a new uuid and other parameters. Critically namespace_update_uuid() takes existing address resources and renames them for the new namespace to use / reconfigure as it sees fit. The bug is that this rename only happens in the resource tracking tree. Existing labels with the old uuid are not reaped leading to a scenario where multiple active labels reference the same span of address range. Teach namespace_update_uuid() to flag any references to the old uuid for reaping at the next label update attempt. Cc: <[email protected]> Fixes: bf9bccc ("libnvdimm: pmem label sets and namespace instantiation") Link: pmem/ndctl#91 Reported-by: Jane Chu <[email protected]> Reported-by: Jeff Moyer <[email protected]> Reported-by: Erwin Tsaur <[email protected]> Cc: Johannes Thumshirn <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent 92f6f2d commit c4703ce

File tree

3 files changed

+35
-13
lines changed

3 files changed

+35
-13
lines changed

drivers/nvdimm/label.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -756,16 +756,27 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
756756
return &guid_null;
757757
}
758758

759+
static void reap_victim(struct nd_mapping *nd_mapping,
760+
struct nd_label_ent *victim)
761+
{
762+
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
763+
u32 slot = to_slot(ndd, victim->label);
764+
765+
dev_dbg(ndd->dev, "free: %d\n", slot);
766+
nd_label_free_slot(ndd, slot);
767+
victim->label = NULL;
768+
}
769+
759770
static int __pmem_label_update(struct nd_region *nd_region,
760771
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
761772
int pos, unsigned long flags)
762773
{
763774
struct nd_namespace_common *ndns = &nspm->nsio.common;
764775
struct nd_interleave_set *nd_set = nd_region->nd_set;
765776
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
766-
struct nd_label_ent *label_ent, *victim = NULL;
767777
struct nd_namespace_label *nd_label;
768778
struct nd_namespace_index *nsindex;
779+
struct nd_label_ent *label_ent;
769780
struct nd_label_id label_id;
770781
struct resource *res;
771782
unsigned long *free;
@@ -834,18 +845,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
834845
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
835846
if (!label_ent->label)
836847
continue;
837-
if (memcmp(nspm->uuid, label_ent->label->uuid,
838-
NSLABEL_UUID_LEN) != 0)
839-
continue;
840-
victim = label_ent;
841-
list_move_tail(&victim->list, &nd_mapping->labels);
842-
break;
843-
}
844-
if (victim) {
845-
dev_dbg(ndd->dev, "free: %d\n", slot);
846-
slot = to_slot(ndd, victim->label);
847-
nd_label_free_slot(ndd, slot);
848-
victim->label = NULL;
848+
if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
849+
|| memcmp(nspm->uuid, label_ent->label->uuid,
850+
NSLABEL_UUID_LEN) == 0)
851+
reap_victim(nd_mapping, label_ent);
849852
}
850853

851854
/* update index */

drivers/nvdimm/namespace_devs.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,12 +1247,27 @@ static int namespace_update_uuid(struct nd_region *nd_region,
12471247
for (i = 0; i < nd_region->ndr_mappings; i++) {
12481248
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
12491249
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
1250+
struct nd_label_ent *label_ent;
12501251
struct resource *res;
12511252

12521253
for_each_dpa_resource(ndd, res)
12531254
if (strcmp(res->name, old_label_id.id) == 0)
12541255
sprintf((void *) res->name, "%s",
12551256
new_label_id.id);
1257+
1258+
mutex_lock(&nd_mapping->lock);
1259+
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
1260+
struct nd_namespace_label *nd_label = label_ent->label;
1261+
struct nd_label_id label_id;
1262+
1263+
if (!nd_label)
1264+
continue;
1265+
nd_label_gen_id(&label_id, nd_label->uuid,
1266+
__le32_to_cpu(nd_label->flags));
1267+
if (strcmp(old_label_id.id, label_id.id) == 0)
1268+
set_bit(ND_LABEL_REAP, &label_ent->flags);
1269+
}
1270+
mutex_unlock(&nd_mapping->lock);
12561271
}
12571272
kfree(*old_uuid);
12581273
out:

drivers/nvdimm/nd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,12 @@ struct nd_percpu_lane {
113113
spinlock_t lock;
114114
};
115115

116+
enum nd_label_flags {
117+
ND_LABEL_REAP,
118+
};
116119
struct nd_label_ent {
117120
struct list_head list;
121+
unsigned long flags;
118122
struct nd_namespace_label *label;
119123
};
120124

0 commit comments

Comments
 (0)