Skip to content

Commit 488dc8a

Browse files
committed
opal: poller re-entrancy try open-power#3
So my great attempt at avoiding all re-entencies fails due to HBRT... at least until we have some kind of way to thread things, it will have to re-enter so let's bite the bullet, make the poller list walking lockless (we'll handle removal when we have to, ie, not yet) and slightly extend the coverage of the PSI lock while at it. All the other pollers already have their own locks anyway so we are actually removing some overhead. Signed-off-by: Benjamin Herrenschmidt <[email protected]>
1 parent 21175cd commit 488dc8a

File tree

4 files changed

+21
-60
lines changed

4 files changed

+21
-60
lines changed

core/opal.c

+14-31
Original file line numberDiff line numberDiff line change
@@ -201,50 +201,33 @@ void opal_del_poller(void (*poller)(void *data))
201201
{
202202
struct opal_poll_entry *ent;
203203

204+
/* XXX This is currently unused. To solve various "interesting"
205+
* locking issues, the pollers are run locklessly, so if we were
206+
* to free them, we would have to be careful, using something
207+
* akin to RCU to synchronize with other OPAL entries. For now
208+
* if anybody uses it, print a warning and leak the entry, don't
209+
* free it.
210+
*/
211+
prerror("WARNING: Unsupported opal_del_poller\n");
212+
204213
lock(&opal_poll_lock);
205214
list_for_each(&opal_pollers, ent, link) {
206215
if (ent->poller == poller) {
207216
list_del(&ent->link);
208-
free(ent);
217+
/* free(ent); */
209218
break;
210219
}
211220
}
212221
unlock(&opal_poll_lock);
213222
}
214223

215-
bool __opal_check_poll_recursion(const char *caller)
216-
{
217-
if (!lock_held_by_me(&opal_poll_lock))
218-
return false;
219-
prerror("OPAL: poller recursion caught in %s !\n", caller);
220-
backtrace();
221-
222-
return true;
223-
}
224-
225-
void __opal_run_pollers(const char *caller)
224+
void opal_run_pollers(void)
226225
{
227226
struct opal_poll_entry *poll_ent;
228227

229-
/* Debug path. Warn if we recursed */
230-
if (__opal_check_poll_recursion(caller)) {
231-
/* This shouldn't happen. However, if it does, we are goin
232-
* to end up warning a *LOT* so let's introduce an arbitrary
233-
* delay here.
234-
*/
235-
time_wait_ms_nopoll(10);
236-
return;
237-
}
238-
239-
/*
240-
* Only run the pollers if they aren't already running
241-
* on another CPU and we aren't re-entering.
242-
*/
243-
if (try_lock(&opal_poll_lock)) {
244-
list_for_each(&opal_pollers, poll_ent, link)
245-
poll_ent->poller(poll_ent->data);
246-
unlock(&opal_poll_lock);
247-
}
228+
/* The pollers are run lokelessly, see comment in opal_del_poller */
229+
list_for_each(&opal_pollers, poll_ent, link)
230+
poll_ent->poller(poll_ent->data);
248231
}
249232

250233
static int64_t opal_poll_events(uint64_t *outstanding_event_mask)

hw/fsp/fsp.c

+2-16
Original file line numberDiff line numberDiff line change
@@ -1544,22 +1544,8 @@ int fsp_sync_msg(struct fsp_msg *msg, bool autofree)
15441544
if (rc)
15451545
goto bail;
15461546

1547-
/* Debug .. make sure we aren't recursing on the poll lock
1548-
* or the FSP lock
1549-
*/
1550-
if (opal_check_poll_recursion()) {
1551-
/* Until we're confident we found all culprits, fallback
1552-
* to the old way of just polling the FSP
1553-
*/
1554-
while(fsp_msg_busy(msg)) {
1555-
lock(&fsp_lock);
1556-
__fsp_poll(false);
1557-
unlock(&fsp_lock);
1558-
}
1559-
} else {
1560-
while(fsp_msg_busy(msg))
1561-
opal_run_pollers();
1562-
}
1547+
while(fsp_msg_busy(msg))
1548+
opal_run_pollers();
15631549

15641550
switch(msg->state) {
15651551
case fsp_msg_done:

hw/psi.c

+4-8
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,14 @@ static void psi_link_poll(void *data __unused)
122122
(tb_compare(now, psi_link_timer) == TB_AAFTERB) ||
123123
(tb_compare(now, psi_link_timer) == TB_AEQUALB)) {
124124

125+
lock(&psi_lock);
126+
125127
list_for_each(&psis, psi, list) {
126128
u64 val;
127129

128130
if (psi->active || !psi->working)
129131
continue;
130132

131-
lock(&psi_lock);
132-
if (psi->active || !psi->working) {
133-
unlock(&psi_lock);
134-
continue;
135-
}
136-
137133
val = in_be64(psi->regs + PSIHB_CR);
138134

139135
printf("PSI[0x%03x]: Poll CR=0x%016llx\n",
@@ -150,9 +146,7 @@ static void psi_link_poll(void *data __unused)
150146
fsp_reinit_fsp();
151147
return;
152148
}
153-
unlock(&psi_lock);
154149
}
155-
156150
if (!psi_link_timeout)
157151
psi_link_timeout =
158152
now + secs_to_tb(PSI_LINK_RECOVERY_TIMEOUT);
@@ -165,6 +159,8 @@ static void psi_link_poll(void *data __unused)
165159

166160
/* Poll every 10 seconds */
167161
psi_link_timer = now + secs_to_tb(PSI_LINK_CHECK_INTERVAL);
162+
163+
unlock(&psi_lock);
168164
}
169165
}
170166

include/opal.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -900,11 +900,7 @@ extern void __opal_register(uint64_t token, void *func, unsigned num_args);
900900
*/
901901
extern void opal_add_poller(void (*poller)(void *data), void *data);
902902
extern void opal_del_poller(void (*poller)(void *data));
903-
extern void __opal_run_pollers(const char *caller);
904-
extern bool __opal_check_poll_recursion(const char *caller);
905-
906-
#define opal_run_pollers() __opal_run_pollers(__func__)
907-
#define opal_check_poll_recursion() __opal_check_poll_recursion(__func__)
903+
extern void opal_run_pollers(void);
908904

909905
/*
910906
* Warning: no locking, only call that from the init processor

0 commit comments

Comments
 (0)