Skip to content

Commit

Permalink
xen/block: add multi-page ring support
Browse files Browse the repository at this point in the history
Extend xen/block to support multi-page ring, so that more requests can be
issued by using more than one pages as the request ring between blkfront
and backend.
As a result, the performance can get improved significantly.

We got some impressive improvements on our highend iscsi storage cluster
backend. If using 64 pages as the ring, the IOPS increased about 15 times
for the throughput testing and above doubled for the latency testing.

The reason was the limit on outstanding requests is 32 if use only one-page
ring, but in our case the iscsi lun was spread across about 100 physical
drives, 32 was really not enough to keep them busy.

Changes in v2:
 - Rebased to 4.0-rc6.
 - Document on how multi-page ring feature working to linux io/blkif.h.

Changes in v3:
 - Remove changes to linux io/blkif.h and follow the protocol defined
   in io/blkif.h of XEN tree.
 - Rebased to 4.1-rc3

Changes in v4:
 - Turn to use 'ring-page-order' and 'max-ring-page-order'.
 - A few comments from Roger.

Changes in v5:
 - Clarify with 4k granularity to comment
 - Address more comments from Roger

Signed-off-by: Bob Liu <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
  • Loading branch information
Bob Liu authored and konradwilk committed Jun 6, 2015
1 parent 8ab0144 commit 86839c5
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 59 deletions.
13 changes: 13 additions & 0 deletions drivers/block/xen-blkback/blkback.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");

/*
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
*/
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
* The LRU mechanism to clean the lists of persistent grants needs to
* be executed periodically. The time interval between consecutive executions
Expand Down Expand Up @@ -1451,6 +1458,12 @@ static int __init xen_blkif_init(void)
if (!xen_domain())
return -ENODEV;

if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
}

rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
Expand Down
2 changes: 2 additions & 0 deletions drivers/block/xen-blkback/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>

extern unsigned int xen_blkif_max_ring_order;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
Expand Down Expand Up @@ -320,6 +321,7 @@ struct xen_blkif {
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
unsigned int nr_ring_pages;
};

struct seg_buf {
Expand Down
89 changes: 69 additions & 20 deletions drivers/block/xen-blkback/xenbus.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
#define RINGREF_NAME_LEN (20)

struct backend_info {
struct xenbus_device *dev;
Expand Down Expand Up @@ -156,16 +157,16 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
return blkif;
}

static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
unsigned int evtchn)
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
unsigned int nr_grefs, unsigned int evtchn)
{
int err;

/* Already connected through? */
if (blkif->irq)
return 0;

err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring);
if (err < 0)
return err;
Expand All @@ -175,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
break;
}
default:
Expand Down Expand Up @@ -270,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
i++;
}

WARN_ON(i != XEN_BLKIF_REQS_PER_PAGE);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));

kmem_cache_free(xen_blkif_cachep, blkif);
}
Expand Down Expand Up @@ -555,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
if (err)
goto fail;

err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
xen_blkif_max_ring_order);
if (err)
pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);

err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
Expand Down Expand Up @@ -818,23 +824,66 @@ static void connect(struct backend_info *be)
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
unsigned long ring_ref;
unsigned int evtchn;
unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
struct pending_req *req, *n;
int err, i, j;

pr_debug("%s %s\n", __func__, dev->otherend);

err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
&ring_ref, "event-channel", "%u", &evtchn, NULL);
if (err) {
xenbus_dev_fatal(dev, err,
"reading %s/ring-ref and event-channel",
err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
&evtchn);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
return err;
}
pr_info("event-channel %u\n", evtchn);

err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
&ring_page_order);
if (err != 1) {
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
"%u", &ring_ref[0]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
dev->otherend);
return err;
}
nr_grefs = 1;
pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
ring_ref[0]);
} else {
unsigned int i;

if (ring_page_order > xen_blkif_max_ring_order) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
dev->otherend, ring_page_order,
xen_blkif_max_ring_order);
return err;
}

nr_grefs = 1 << ring_page_order;
for (i = 0; i < nr_grefs; i++) {
char ring_ref_name[RINGREF_NAME_LEN];

snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
"%u", &ring_ref[i]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/%s",
dev->otherend, ring_ref_name);
return err;
}
pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
}
}

be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
Expand All @@ -859,12 +908,13 @@ static int connect_ring(struct backend_info *be)

be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
be->blkif->nr_ring_pages = nr_grefs;

pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
ring_ref, evtchn, be->blkif->blk_protocol, protocol,
pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");

for (i = 0; i < XEN_BLKIF_REQS_PER_PAGE; i++) {
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
goto fail;
Expand All @@ -883,10 +933,9 @@ static int connect_ring(struct backend_info *be)
}

/* Map the shared frame, irq etc. */
err = xen_blkif_map(be->blkif, ring_ref, evtchn);
err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
ring_ref, evtchn);
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
}

Expand Down
Loading

0 comments on commit 86839c5

Please sign in to comment.