Skip to content

Commit 62fe99c

Browse files
Ming Leiaxboe
authored andcommitted
ublk: add read()/write() support for ublk char device
Support pread()/pwrite() on ublk char device for reading/writing request io buffer, so data copy between io request buffer and userspace buffer can be moved to ublk server from ublk driver. Then UBLK_F_NEED_GET_DATA becomes not necessary, so ublk server can allocate buffer without one extra round uring command communication for userspace to provide buffer. IO buffer can be located by iocb->ki_pos which encodes buffer offset, io tag and queue id info, and type of iocb->ki_pos is u64, so it is big enough for holding reasonable queue depth, nr_queues and max io buffer size. Signed-off-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent 38f2dd3 commit 62fe99c

File tree

2 files changed

+172
-1
lines changed

2 files changed

+172
-1
lines changed

drivers/block/ublk_drv.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,23 @@ static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
207207

208208
static struct miscdevice ublk_misc;
209209

210+
static inline unsigned ublk_pos_to_hwq(loff_t pos)
211+
{
212+
return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_QID_OFF) &
213+
UBLK_QID_BITS_MASK;
214+
}
215+
216+
static inline unsigned ublk_pos_to_buf_off(loff_t pos)
217+
{
218+
return (pos - UBLKSRV_IO_BUF_OFFSET) & UBLK_IO_BUF_BITS_MASK;
219+
}
220+
221+
static inline unsigned ublk_pos_to_tag(loff_t pos)
222+
{
223+
return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_TAG_OFF) &
224+
UBLK_TAG_BITS_MASK;
225+
}
226+
210227
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
211228
{
212229
struct request_queue *q = ub->ub_disk->queue;
@@ -1429,6 +1446,36 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
14291446
return -EIOCBQUEUED;
14301447
}
14311448

1449+
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
1450+
struct ublk_queue *ubq, int tag, size_t offset)
1451+
{
1452+
struct request *req;
1453+
1454+
if (!ublk_need_req_ref(ubq))
1455+
return NULL;
1456+
1457+
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
1458+
if (!req)
1459+
return NULL;
1460+
1461+
if (!ublk_get_req_ref(ubq, req))
1462+
return NULL;
1463+
1464+
if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
1465+
goto fail_put;
1466+
1467+
if (!ublk_rq_has_data(req))
1468+
goto fail_put;
1469+
1470+
if (offset > blk_rq_bytes(req))
1471+
goto fail_put;
1472+
1473+
return req;
1474+
fail_put:
1475+
ublk_put_req_ref(ubq, req);
1476+
return NULL;
1477+
}
1478+
14321479
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
14331480
{
14341481
/*
@@ -1446,11 +1493,112 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
14461493
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
14471494
}
14481495

1496+
static inline bool ublk_check_ubuf_dir(const struct request *req,
1497+
int ubuf_dir)
1498+
{
1499+
/* copy ubuf to request pages */
1500+
if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
1501+
return true;
1502+
1503+
/* copy request pages to ubuf */
1504+
if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
1505+
return true;
1506+
1507+
return false;
1508+
}
1509+
1510+
static struct request *ublk_check_and_get_req(struct kiocb *iocb,
1511+
struct iov_iter *iter, size_t *off, int dir)
1512+
{
1513+
struct ublk_device *ub = iocb->ki_filp->private_data;
1514+
struct ublk_queue *ubq;
1515+
struct request *req;
1516+
size_t buf_off;
1517+
u16 tag, q_id;
1518+
1519+
if (!ub)
1520+
return ERR_PTR(-EACCES);
1521+
1522+
if (!user_backed_iter(iter))
1523+
return ERR_PTR(-EACCES);
1524+
1525+
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
1526+
return ERR_PTR(-EACCES);
1527+
1528+
tag = ublk_pos_to_tag(iocb->ki_pos);
1529+
q_id = ublk_pos_to_hwq(iocb->ki_pos);
1530+
buf_off = ublk_pos_to_buf_off(iocb->ki_pos);
1531+
1532+
if (q_id >= ub->dev_info.nr_hw_queues)
1533+
return ERR_PTR(-EINVAL);
1534+
1535+
ubq = ublk_get_queue(ub, q_id);
1536+
if (!ubq)
1537+
return ERR_PTR(-EINVAL);
1538+
1539+
if (tag >= ubq->q_depth)
1540+
return ERR_PTR(-EINVAL);
1541+
1542+
req = __ublk_check_and_get_req(ub, ubq, tag, buf_off);
1543+
if (!req)
1544+
return ERR_PTR(-EINVAL);
1545+
1546+
if (!req->mq_hctx || !req->mq_hctx->driver_data)
1547+
goto fail;
1548+
1549+
if (!ublk_check_ubuf_dir(req, dir))
1550+
goto fail;
1551+
1552+
*off = buf_off;
1553+
return req;
1554+
fail:
1555+
ublk_put_req_ref(ubq, req);
1556+
return ERR_PTR(-EACCES);
1557+
}
1558+
1559+
static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
1560+
{
1561+
struct ublk_queue *ubq;
1562+
struct request *req;
1563+
size_t buf_off;
1564+
size_t ret;
1565+
1566+
req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST);
1567+
if (IS_ERR(req))
1568+
return PTR_ERR(req);
1569+
1570+
ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST);
1571+
ubq = req->mq_hctx->driver_data;
1572+
ublk_put_req_ref(ubq, req);
1573+
1574+
return ret;
1575+
}
1576+
1577+
static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
1578+
{
1579+
struct ublk_queue *ubq;
1580+
struct request *req;
1581+
size_t buf_off;
1582+
size_t ret;
1583+
1584+
req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE);
1585+
if (IS_ERR(req))
1586+
return PTR_ERR(req);
1587+
1588+
ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE);
1589+
ubq = req->mq_hctx->driver_data;
1590+
ublk_put_req_ref(ubq, req);
1591+
1592+
return ret;
1593+
}
1594+
14491595
static const struct file_operations ublk_ch_fops = {
14501596
.owner = THIS_MODULE,
14511597
.open = ublk_ch_open,
14521598
.release = ublk_ch_release,
14531599
.llseek = no_llseek,
1600+
.read_iter = ublk_ch_read_iter,
1601+
.write_iter = ublk_ch_write_iter,
14541602
.uring_cmd = ublk_ch_uring_cmd,
14551603
.mmap = ublk_ch_mmap,
14561604
};
@@ -2362,6 +2510,9 @@ static int __init ublk_init(void)
23622510
{
23632511
int ret;
23642512

2513+
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
2514+
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
2515+
23652516
init_waitqueue_head(&ublk_idr_wq);
23662517

23672518
ret = misc_register(&ublk_misc);

include/uapi/linux/ublk_cmd.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,29 @@
9393
#define UBLKSRV_CMD_BUF_OFFSET 0
9494
#define UBLKSRV_IO_BUF_OFFSET 0x80000000
9595

96-
/* tag bit is 12bit, so at most 4096 IOs for each queue */
96+
/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */
9797
#define UBLK_MAX_QUEUE_DEPTH 4096
9898

99+
/* single IO buffer max size is 32MB */
100+
#define UBLK_IO_BUF_OFF 0
101+
#define UBLK_IO_BUF_BITS 25
102+
#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1)
103+
104+
/* so at most 64K IOs for each queue */
105+
#define UBLK_TAG_OFF UBLK_IO_BUF_BITS
106+
#define UBLK_TAG_BITS 16
107+
#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1)
108+
109+
/* max 4096 queues */
110+
#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS)
111+
#define UBLK_QID_BITS 12
112+
#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1)
113+
114+
#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS)
115+
116+
#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS)
117+
#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
118+
99119
/*
100120
* zero copy requires 4k block size, and can remap ublk driver's io
101121
* request into ublksrv's vm space

0 commit comments

Comments
 (0)