Skip to content

Commit dc73290

Browse files
AstralBobAndreas Gruenbacher
authored and
Andreas Gruenbacher
committed
gfs2: Introduce flag for glock holder auto-demotion
This patch introduces a new HIF_MAY_DEMOTE flag and infrastructure that will allow glocks to be demoted automatically on locking conflicts. When a locking request comes in that isn't compatible with the locking state of an active holder and that holder has the HIF_MAY_DEMOTE flag set, the holder will be demoted before the incoming locking request is granted. Note that this mechanism demotes active holders (with the HIF_HOLDER flag set), while before we were only demoting glocks without any active holders. This allows processes to keep hold of locks that may form a cyclic locking dependency; the core glock logic will then break those dependencies in case a conflicting locking request occurs. We'll use this to avoid giving up the inode glock proactively before faulting in pages. Processes that allow a glock holder to be taken away indicate this by calling gfs2_holder_allow_demote(), which sets the HIF_MAY_DEMOTE flag. Later, they call gfs2_holder_disallow_demote() to clear the flag again, and then they check if their holder is still queued: if it is, they are still holding the glock; if it isn't, they can re-acquire the glock (or abort). Signed-off-by: Bob Peterson <[email protected]> Signed-off-by: Andreas Gruenbacher <[email protected]>
1 parent 6144464 commit dc73290

File tree

3 files changed

+200
-36
lines changed

3 files changed

+200
-36
lines changed

Diff for: fs/gfs2/glock.c

+179-36
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct gfs2_glock_iter {
5858
typedef void (*glock_examiner) (struct gfs2_glock * gl);
5959

6060
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61+
static void __gfs2_glock_dq(struct gfs2_holder *gh);
6162

6263
static struct dentry *gfs2_root;
6364
static struct workqueue_struct *glock_workqueue;
@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl)
197198

198199
if (gl->gl_state == LM_ST_UNLOCKED)
199200
return 0;
201+
/*
202+
* Note that demote_ok is used for the lru process of disposing of
203+
* glocks. For this purpose, we don't care if the glock's holders
204+
* have the HIF_MAY_DEMOTE flag set or not. If someone is using
205+
* them, don't demote.
206+
*/
200207
if (!list_empty(&gl->gl_holders))
201208
return 0;
202209
if (glops->go_demote_ok)
@@ -379,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
379386
struct gfs2_holder *gh, *tmp;
380387

381388
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
382-
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
389+
if (!test_bit(HIF_WAIT, &gh->gh_iflags))
383390
continue;
384391
if (ret & LM_OUT_ERROR)
385392
gh->gh_error = -EIO;
@@ -393,6 +400,40 @@ static void do_error(struct gfs2_glock *gl, const int ret)
393400
}
394401
}
395402

403+
/**
404+
* demote_incompat_holders - demote incompatible demoteable holders
405+
* @gl: the glock we want to promote
406+
* @new_gh: the new holder to be promoted
407+
*/
408+
static void demote_incompat_holders(struct gfs2_glock *gl,
409+
struct gfs2_holder *new_gh)
410+
{
411+
struct gfs2_holder *gh;
412+
413+
/*
414+
* Demote incompatible holders before we make ourselves eligible.
415+
* (This holder may or may not allow auto-demoting, but we don't want
416+
* to demote the new holder before it's even granted.)
417+
*/
418+
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
419+
/*
420+
* Since holders are at the front of the list, we stop when we
421+
* find the first non-holder.
422+
*/
423+
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
424+
return;
425+
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
426+
!may_grant(gl, new_gh, gh)) {
427+
/*
428+
* We should not recurse into do_promote because
429+
* __gfs2_glock_dq only calls handle_callback,
430+
* gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
431+
*/
432+
__gfs2_glock_dq(gh);
433+
}
434+
}
435+
}
436+
396437
/**
397438
* find_first_holder - find the first "holder" gh
398439
* @gl: the glock
@@ -411,6 +452,26 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
411452
return NULL;
412453
}
413454

455+
/**
456+
* find_first_strong_holder - find the first non-demoteable holder
457+
* @gl: the glock
458+
*
459+
* Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
460+
*/
461+
static inline struct gfs2_holder *
462+
find_first_strong_holder(struct gfs2_glock *gl)
463+
{
464+
struct gfs2_holder *gh;
465+
466+
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
467+
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
468+
return NULL;
469+
if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
470+
return gh;
471+
}
472+
return NULL;
473+
}
474+
414475
/**
415476
* do_promote - promote as many requests as possible on the current queue
416477
* @gl: The glock
@@ -425,14 +486,20 @@ __acquires(&gl->gl_lockref.lock)
425486
{
426487
const struct gfs2_glock_operations *glops = gl->gl_ops;
427488
struct gfs2_holder *gh, *tmp, *first_gh;
489+
bool incompat_holders_demoted = false;
428490
int ret;
429491

430492
restart:
431-
first_gh = find_first_holder(gl);
493+
first_gh = find_first_strong_holder(gl);
432494
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
433-
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
495+
if (!test_bit(HIF_WAIT, &gh->gh_iflags))
434496
continue;
435497
if (may_grant(gl, first_gh, gh)) {
498+
if (!incompat_holders_demoted) {
499+
demote_incompat_holders(gl, first_gh);
500+
incompat_holders_demoted = true;
501+
first_gh = gh;
502+
}
436503
if (gh->gh_list.prev == &gl->gl_holders &&
437504
glops->go_lock) {
438505
spin_unlock(&gl->gl_lockref.lock);
@@ -458,6 +525,11 @@ __acquires(&gl->gl_lockref.lock)
458525
gfs2_holder_wake(gh);
459526
continue;
460527
}
528+
/*
529+
* If we get here, it means we may not grant this holder for
530+
* some reason. If this holder is the head of the list, it
531+
* means we have a blocked holder at the head, so return 1.
532+
*/
461533
if (gh->gh_list.prev == &gl->gl_holders)
462534
return 1;
463535
do_error(gl, 0);
@@ -1372,7 +1444,7 @@ __acquires(&gl->gl_lockref.lock)
13721444
if (test_bit(GLF_LOCK, &gl->gl_flags)) {
13731445
struct gfs2_holder *first_gh;
13741446

1375-
first_gh = find_first_holder(gl);
1447+
first_gh = find_first_strong_holder(gl);
13761448
try_futile = !may_grant(gl, first_gh, gh);
13771449
}
13781450
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
@@ -1381,7 +1453,8 @@ __acquires(&gl->gl_lockref.lock)
13811453

13821454
list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
13831455
if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1384-
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1456+
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
1457+
!test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
13851458
goto trap_recursive;
13861459
if (try_futile &&
13871460
!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
@@ -1477,51 +1550,83 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
14771550
return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
14781551
}
14791552

1480-
/**
1481-
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1482-
* @gh: the glock holder
1483-
*
1484-
*/
1553+
static inline bool needs_demote(struct gfs2_glock *gl)
1554+
{
1555+
return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
1556+
test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
1557+
}
14851558

1486-
void gfs2_glock_dq(struct gfs2_holder *gh)
1559+
static void __gfs2_glock_dq(struct gfs2_holder *gh)
14871560
{
14881561
struct gfs2_glock *gl = gh->gh_gl;
14891562
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
14901563
unsigned delay = 0;
14911564
int fast_path = 0;
14921565

1493-
spin_lock(&gl->gl_lockref.lock);
14941566
/*
1495-
* If we're in the process of file system withdraw, we cannot just
1496-
* dequeue any glocks until our journal is recovered, lest we
1497-
* introduce file system corruption. We need two exceptions to this
1498-
* rule: We need to allow unlocking of nondisk glocks and the glock
1499-
* for our own journal that needs recovery.
1567+
* This while loop is similar to function demote_incompat_holders:
1568+
* If the glock is due to be demoted (which may be from another node
1569+
* or even if this holder is GL_NOCACHE), the weak holders are
1570+
* demoted as well, allowing the glock to be demoted.
15001571
*/
1501-
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
1502-
glock_blocked_by_withdraw(gl) &&
1503-
gh->gh_gl != sdp->sd_jinode_gl) {
1504-
sdp->sd_glock_dqs_held++;
1505-
spin_unlock(&gl->gl_lockref.lock);
1506-
might_sleep();
1507-
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
1508-
TASK_UNINTERRUPTIBLE);
1509-
spin_lock(&gl->gl_lockref.lock);
1510-
}
1511-
if (gh->gh_flags & GL_NOCACHE)
1512-
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1572+
while (gh) {
1573+
/*
1574+
* If we're in the process of file system withdraw, we cannot
1575+
* just dequeue any glocks until our journal is recovered, lest
1576+
* we introduce file system corruption. We need two exceptions
1577+
* to this rule: We need to allow unlocking of nondisk glocks
1578+
* and the glock for our own journal that needs recovery.
1579+
*/
1580+
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
1581+
glock_blocked_by_withdraw(gl) &&
1582+
gh->gh_gl != sdp->sd_jinode_gl) {
1583+
sdp->sd_glock_dqs_held++;
1584+
spin_unlock(&gl->gl_lockref.lock);
1585+
might_sleep();
1586+
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
1587+
TASK_UNINTERRUPTIBLE);
1588+
spin_lock(&gl->gl_lockref.lock);
1589+
}
1590+
1591+
/*
1592+
* This holder should not be cached, so mark it for demote.
1593+
* Note: this should be done before the check for needs_demote
1594+
* below.
1595+
*/
1596+
if (gh->gh_flags & GL_NOCACHE)
1597+
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1598+
1599+
list_del_init(&gh->gh_list);
1600+
clear_bit(HIF_HOLDER, &gh->gh_iflags);
1601+
trace_gfs2_glock_queue(gh, 0);
1602+
1603+
/*
1604+
* If there hasn't been a demote request we are done.
1605+
* (Let the remaining holders, if any, keep holding it.)
1606+
*/
1607+
if (!needs_demote(gl)) {
1608+
if (list_empty(&gl->gl_holders))
1609+
fast_path = 1;
1610+
break;
1611+
}
1612+
/*
1613+
* If we have another strong holder (we cannot auto-demote)
1614+
* we are done. It keeps holding it until it is done.
1615+
*/
1616+
if (find_first_strong_holder(gl))
1617+
break;
15131618

1514-
list_del_init(&gh->gh_list);
1515-
clear_bit(HIF_HOLDER, &gh->gh_iflags);
1516-
if (list_empty(&gl->gl_holders) &&
1517-
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1518-
!test_bit(GLF_DEMOTE, &gl->gl_flags))
1519-
fast_path = 1;
1619+
/*
1620+
* If we have a weak holder at the head of the list, it
1621+
* (and all others like it) must be auto-demoted. If there
1622+
* are no more weak holders, we exit the while loop.
1623+
*/
1624+
gh = find_first_holder(gl);
1625+
}
15201626

15211627
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
15221628
gfs2_glock_add_to_lru(gl);
15231629

1524-
trace_gfs2_glock_queue(gh, 0);
15251630
if (unlikely(!fast_path)) {
15261631
gl->gl_lockref.count++;
15271632
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -1530,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
15301635
delay = gl->gl_hold_time;
15311636
__gfs2_glock_queue_work(gl, delay);
15321637
}
1638+
}
1639+
1640+
/**
1641+
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1642+
* @gh: the glock holder
1643+
*
1644+
*/
1645+
void gfs2_glock_dq(struct gfs2_holder *gh)
1646+
{
1647+
struct gfs2_glock *gl = gh->gh_gl;
1648+
1649+
spin_lock(&gl->gl_lockref.lock);
1650+
__gfs2_glock_dq(gh);
15331651
spin_unlock(&gl->gl_lockref.lock);
15341652
}
15351653

@@ -1692,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
16921810

16931811
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
16941812
{
1813+
struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
16951814
unsigned long delay = 0;
16961815
unsigned long holdtime;
16971816
unsigned long now = jiffies;
@@ -1706,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
17061825
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
17071826
delay = gl->gl_hold_time;
17081827
}
1828+
/*
1829+
* Note 1: We cannot call demote_incompat_holders from handle_callback
1830+
* or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
1831+
* handle_callback -> demote_incompat_holders -> gfs2_glock_dq
1832+
* Plus, we only want to demote the holders if the request comes from
1833+
* a remote cluster node because local holder conflicts are resolved
1834+
* elsewhere.
1835+
*
1836+
* Note 2: if a remote node wants this glock in EX mode, lock_dlm will
1837+
* request that we set our state to UNLOCKED. Here we mock up a holder
1838+
* to make it look like someone wants the lock EX locally. Any SH
1839+
* and DF requests should be able to share the lock without demoting.
1840+
*
1841+
* Note 3: We only want to demote the demoteable holders when there
1842+
* are no more strong holders. The demoteable holders might as well
1843+
* keep the glock until the last strong holder is done with it.
1844+
*/
1845+
if (!find_first_strong_holder(gl)) {
1846+
if (state == LM_ST_UNLOCKED)
1847+
mock_gh.gh_state = LM_ST_EXCLUSIVE;
1848+
demote_incompat_holders(gl, &mock_gh);
1849+
}
17091850
handle_callback(gl, state, delay, true);
17101851
__gfs2_glock_queue_work(gl, delay);
17111852
spin_unlock(&gl->gl_lockref.lock);
@@ -2095,6 +2236,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
20952236
*p++ = 'H';
20962237
if (test_bit(HIF_WAIT, &iflags))
20972238
*p++ = 'W';
2239+
if (test_bit(HIF_MAY_DEMOTE, &iflags))
2240+
*p++ = 'D';
20982241
*p = 0;
20992242
return buf;
21002243
}

Diff for: fs/gfs2/glock.h

+20
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
150150
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
151151
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
152152
break;
153+
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
154+
continue;
153155
if (gh->gh_owner_pid == pid)
154156
goto out;
155157
}
@@ -325,6 +327,24 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
325327
spin_unlock(&gl->gl_lockref.lock);
326328
}
327329

330+
static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
331+
{
332+
struct gfs2_glock *gl = gh->gh_gl;
333+
334+
spin_lock(&gl->gl_lockref.lock);
335+
set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
336+
spin_unlock(&gl->gl_lockref.lock);
337+
}
338+
339+
static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
340+
{
341+
struct gfs2_glock *gl = gh->gh_gl;
342+
343+
spin_lock(&gl->gl_lockref.lock);
344+
clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
345+
spin_unlock(&gl->gl_lockref.lock);
346+
}
347+
328348
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
329349
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
330350

Diff for: fs/gfs2/incore.h

+1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ struct gfs2_lkstats {
252252

253253
enum {
254254
/* States */
255+
HIF_MAY_DEMOTE = 1,
255256
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
256257
HIF_WAIT = 10,
257258
};

0 commit comments

Comments
 (0)