Skip to content

Commit 8eee1d3

Browse files
committed
libata: fix sff host state machine locking while polling
The bulk of ATA host state machine is implemented by ata_sff_hsm_move(). The function is called from either the interrupt handler or, if polling, a work item. Unlike from the interrupt path, the polling path calls the function without holding the host lock and ata_sff_hsm_move() selectively grabs the lock. This is completely broken. If an IRQ triggers while polling is in progress, the two can easily race and end up accessing the hardware and updating state machine state at the same time. This can put the state machine in an illegal state and lead to a crash like the following. kernel BUG at drivers/ata/libata-sff.c:1302! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 1 PID: 10679 Comm: syz-executor Not tainted 4.5.0-rc1+ torvalds#300 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88002bd00000 ti: ffff88002e048000 task.ti: ffff88002e048000 RIP: 0010:[<ffffffff83a83409>] [<ffffffff83a83409>] ata_sff_hsm_move+0x619/0x1c60 ... Call Trace: <IRQ> [<ffffffff83a84c31>] __ata_sff_port_intr+0x1e1/0x3a0 drivers/ata/libata-sff.c:1584 [<ffffffff83a85611>] ata_bmdma_port_intr+0x71/0x400 drivers/ata/libata-sff.c:2877 [< inline >] __ata_sff_interrupt drivers/ata/libata-sff.c:1629 [<ffffffff83a85bf3>] ata_bmdma_interrupt+0x253/0x580 drivers/ata/libata-sff.c:2902 [<ffffffff81479f98>] handle_irq_event_percpu+0x108/0x7e0 kernel/irq/handle.c:157 [<ffffffff8147a717>] handle_irq_event+0xa7/0x140 kernel/irq/handle.c:205 [<ffffffff81484573>] handle_edge_irq+0x1e3/0x8d0 kernel/irq/chip.c:623 [< inline >] generic_handle_irq_desc include/linux/irqdesc.h:146 [<ffffffff811a92bc>] handle_irq+0x10c/0x2a0 arch/x86/kernel/irq_64.c:78 [<ffffffff811a7e4d>] do_IRQ+0x7d/0x1a0 arch/x86/kernel/irq.c:240 [<ffffffff86653d4c>] common_interrupt+0x8c/0x8c arch/x86/entry/entry_64.S:520 <EOI> [< inline >] rcu_lock_acquire include/linux/rcupdate.h:490 [< inline >] rcu_read_lock include/linux/rcupdate.h:874 [<ffffffff8164b4a1>] filemap_map_pages+0x131/0xba0 mm/filemap.c:2145 [< inline >] do_fault_around mm/memory.c:2943 [< inline >] do_read_fault mm/memory.c:2962 [< inline >] do_fault mm/memory.c:3133 [< inline >] handle_pte_fault mm/memory.c:3308 [< inline >] __handle_mm_fault mm/memory.c:3418 [<ffffffff816efb16>] handle_mm_fault+0x2516/0x49a0 mm/memory.c:3447 [<ffffffff8127dc16>] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238 [<ffffffff8127e358>] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331 [<ffffffff8126f514>] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264 [<ffffffff86655578>] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986 Fix it by ensuring that the polling path is holding the host lock before entering ata_sff_hsm_move() so that all hardware accesses and state updates are performed under the host lock. Signed-off-by: Tejun Heo <[email protected]> Reported-and-tested-by: Dmitry Vyukov <[email protected]> Link: http://lkml.kernel.org/g/CACT4Y+b_JsOxJu2EZyEf+mOXORc_zid5V1-pLZSroJVxyWdSpw@mail.gmail.com Cc: [email protected]
1 parent a588afc commit 8eee1d3

File tree

1 file changed

+11
-21
lines changed

1 file changed

+11
-21
lines changed

drivers/ata/libata-sff.c

+11-21
Original file line numberDiff line numberDiff line change
@@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap,
997997
static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
998998
{
999999
struct ata_port *ap = qc->ap;
1000-
unsigned long flags;
10011000

10021001
if (ap->ops->error_handler) {
10031002
if (in_wq) {
1004-
spin_lock_irqsave(ap->lock, flags);
1005-
10061003
/* EH might have kicked in while host lock is
10071004
* released.
10081005
*/
@@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
10141011
} else
10151012
ata_port_freeze(ap);
10161013
}
1017-
1018-
spin_unlock_irqrestore(ap->lock, flags);
10191014
} else {
10201015
if (likely(!(qc->err_mask & AC_ERR_HSM)))
10211016
ata_qc_complete(qc);
@@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
10241019
}
10251020
} else {
10261021
if (in_wq) {
1027-
spin_lock_irqsave(ap->lock, flags);
10281022
ata_sff_irq_on(ap);
10291023
ata_qc_complete(qc);
1030-
spin_unlock_irqrestore(ap->lock, flags);
10311024
} else
10321025
ata_qc_complete(qc);
10331026
}
@@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
10481041
{
10491042
struct ata_link *link = qc->dev->link;
10501043
struct ata_eh_info *ehi = &link->eh_info;
1051-
unsigned long flags = 0;
10521044
int poll_next;
10531045

1046+
lockdep_assert_held(ap->lock);
1047+
10541048
WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0);
10551049

10561050
/* Make sure ata_sff_qc_issue() does not throw things
@@ -1112,14 +1106,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
11121106
}
11131107
}
11141108

1115-
/* Send the CDB (atapi) or the first data block (ata pio out).
1116-
* During the state transition, interrupt handler shouldn't
1117-
* be invoked before the data transfer is complete and
1118-
* hsm_task_state is changed. Hence, the following locking.
1119-
*/
1120-
if (in_wq)
1121-
spin_lock_irqsave(ap->lock, flags);
1122-
11231109
if (qc->tf.protocol == ATA_PROT_PIO) {
11241110
/* PIO data out protocol.
11251111
* send first data block.
@@ -1135,9 +1121,6 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
11351121
/* send CDB */
11361122
atapi_send_cdb(ap, qc);
11371123

1138-
if (in_wq)
1139-
spin_unlock_irqrestore(ap->lock, flags);
1140-
11411124
/* if polling, ata_sff_pio_task() handles the rest.
11421125
* otherwise, interrupt handler takes over from here.
11431126
*/
@@ -1362,12 +1345,14 @@ static void ata_sff_pio_task(struct work_struct *work)
13621345
u8 status;
13631346
int poll_next;
13641347

1348+
spin_lock_irq(ap->lock);
1349+
13651350
BUG_ON(ap->sff_pio_task_link == NULL);
13661351
/* qc can be NULL if timeout occurred */
13671352
qc = ata_qc_from_tag(ap, link->active_tag);
13681353
if (!qc) {
13691354
ap->sff_pio_task_link = NULL;
1370-
return;
1355+
goto out_unlock;
13711356
}
13721357

13731358
fsm_start:
@@ -1382,11 +1367,14 @@ static void ata_sff_pio_task(struct work_struct *work)
13821367
*/
13831368
status = ata_sff_busy_wait(ap, ATA_BUSY, 5);
13841369
if (status & ATA_BUSY) {
1370+
spin_unlock_irq(ap->lock);
13851371
ata_msleep(ap, 2);
1372+
spin_lock_irq(ap->lock);
1373+
13861374
status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
13871375
if (status & ATA_BUSY) {
13881376
ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE);
1389-
return;
1377+
goto out_unlock;
13901378
}
13911379
}
13921380

@@ -1403,6 +1391,8 @@ static void ata_sff_pio_task(struct work_struct *work)
14031391
*/
14041392
if (poll_next)
14051393
goto fsm_start;
1394+
out_unlock:
1395+
spin_unlock_irq(ap->lock);
14061396
}
14071397

14081398
/**

0 commit comments

Comments
 (0)