[BLK] Add support for barrier writes to blkfront/blkback.
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 9 Nov 2006 16:49:59 +0000 (16:49 +0000)
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Thu, 9 Nov 2006 16:49:59 +0000 (16:49 +0000)
Protocol changes:
 * There is a new operation (BLKIF_OP_WRITE_BARRIER)
   to pass on barrier requests.
 * There is a new state (BLKIF_RSP_EOPNOTSUPP) to indicate
   unsupported operations (barrier writes may fail depending
   on the underlying block device).
 * A new xenstore node named "feature-barrier" indicates the
   backend is able to handle barrier writes.  The value can
   be 1 (all is fine) or 0 (underlying block device doesn't
   support barriers).

blkback changes:  Add "feature-barrier" node to indicate barrier
support, pass incoming barrier requests to the block layer using
submit_bio(WRITE_BARRIER, bio).  Some error handling fixes to
properly pass through barrier write failures, so the frontend
can turn off barriers then.

blkfront changes:  Check if the backend sets "feature-barrier", if
present switch to QUEUE_ORDERED_DRAIN mode.  Send off barrier
requests to the backend using the new BLKIF_OP_WRITE_BARRIER
operation.  Also some error handling for the EOPNOTSUPP case.

Background:  Barriers are needed to make journaling filesystems work
reliable.  For some requests they need order constrains to make the
transactions work correctly.  Barrier requests are used to pass that
ordering information to the block layer and/or to the device, so the
constrains are obeyed when reordering requests for better write
performance.

Signed-off-by: Gerd Hoffmann <kraxel@suse.de>
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c

index e9a7e7d0703a0bf3c363c2ad8c3b341d8ce49320..e8df9e034648567cb09ee49ddbdd0bb506293da2 100644 (file)
@@ -189,9 +189,9 @@ static void fast_flush_area(pending_req_t *req)
 
 static void print_stats(blkif_t *blkif)
 {
-       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
+       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d  |  br %4d\n",
               current->comm, blkif->st_oo_req,
-              blkif->st_rd_req, blkif->st_wr_req);
+              blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
@@ -241,11 +241,17 @@ int blkif_schedule(void *arg)
  * COMPLETION CALLBACK -- Called as bh->b_end_io()
  */
 
-static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+static void __end_block_io_op(pending_req_t *pending_req, int error)
 {
        /* An error fails the entire request. */
-       if (!uptodate) {
-               DPRINTK("Buffer not up-to-date at end of operation\n");
+       if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+           (error == -EOPNOTSUPP)) {
+               DPRINTK("blkback: write barrier op failed, not supported\n");
+               blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
+               pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+       } else if (error) {
+               DPRINTK("Buffer not up-to-date at end of operation, "
+                       "error=%d\n", error);
                pending_req->status = BLKIF_RSP_ERROR;
        }
 
@@ -262,7 +268,7 @@ static int end_block_io_op(struct bio *bio, unsigned int done, int error)
 {
        if (bio->bi_size != 0)
                return 1;
-       __end_block_io_op(bio->bi_private, !error);
+       __end_block_io_op(bio->bi_private, error);
        bio_put(bio);
        return error;
 }
@@ -319,6 +325,9 @@ static int do_block_io_op(blkif_t *blkif)
                        blkif->st_rd_req++;
                        dispatch_rw_block_io(blkif, &req, pending_req);
                        break;
+               case BLKIF_OP_WRITE_BARRIER:
+                       blkif->st_br_req++;
+                       /* fall through */
                case BLKIF_OP_WRITE:
                        blkif->st_wr_req++;
                        dispatch_rw_block_io(blkif, &req, pending_req);
@@ -340,7 +349,6 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                                 pending_req_t *pending_req)
 {
        extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
-       int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct phys_req preq;
        struct { 
@@ -349,6 +357,22 @@ static void dispatch_rw_block_io(blkif_t *blkif,
        unsigned int nseg;
        struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        int ret, i, nbio = 0;
+       int operation;
+
+       switch (req->operation) {
+       case BLKIF_OP_READ:
+               operation = READ;
+               break;
+       case BLKIF_OP_WRITE:
+               operation = WRITE;
+               break;
+       case BLKIF_OP_WRITE_BARRIER:
+               operation = WRITE_BARRIER;
+               break;
+       default:
+               operation = 0; /* make gcc happy */
+               BUG();
+       }
 
        /* Check that number of segments is sane. */
        nseg = req->nr_segments;
@@ -364,7 +388,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 
        pending_req->blkif     = blkif;
        pending_req->id        = req->id;
-       pending_req->operation = operation;
+       pending_req->operation = req->operation;
        pending_req->status    = BLKIF_RSP_OKAY;
        pending_req->nr_pages  = nseg;
 
@@ -380,7 +404,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
                preq.nr_sects += seg[i].nsec;
 
                flags = GNTMAP_host_map;
-               if ( operation == WRITE )
+               if (operation != READ)
                        flags |= GNTMAP_readonly;
                gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
                                  req->seg[i].gref, blkif->domid);
index 38cb756964f84c49f8ec74949485c1fdf4a019d6..d55e388107ab52cf178c158411d79bcc0e6046d2 100644 (file)
@@ -44,6 +44,7 @@
 #include <xen/interface/io/ring.h>
 #include <xen/gnttab.h>
 #include <xen/driver_util.h>
+#include <xen/xenbus.h>
 
 #define DPRINTK(_f, _a...)                     \
        pr_debug("(file=%s, line=%d) " _f,      \
@@ -87,6 +88,7 @@ typedef struct blkif_st {
        int                 st_rd_req;
        int                 st_wr_req;
        int                 st_oo_req;
+       int                 st_br_req;
 
        wait_queue_head_t waiting_to_free;
 
@@ -131,4 +133,7 @@ void blkif_xenbus_init(void);
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 int blkif_schedule(void *arg);
 
+int blkback_barrier(struct xenbus_transaction xbt,
+                   struct backend_info *be, int state);
+
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
index a809b04cd1a1b019aef8e02a899b1ae4ec8b1df9..0abd23a5217ee729a581f20f654b4a7d2a5c5c57 100644 (file)
@@ -31,7 +31,6 @@
  */
 
 #include "common.h"
-#include <xen/xenbus.h>
 
 #define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
        (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
@@ -104,7 +103,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
        struct vbd *vbd = &blkif->vbd;
        int rc = -EACCES;
 
-       if ((operation == WRITE) && vbd->readonly)
+       if ((operation != READ) && vbd->readonly)
                goto out;
 
        if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
index 02f90a68039233c6a31d5a8eca13f4f9736a5a5f..4d23434b714f0d04bdf845b51124ca1fbf026a3a 100644 (file)
@@ -20,7 +20,6 @@
 #include <stdarg.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
-#include <xen/xenbus.h>
 #include "common.h"
 
 #undef DPRINTK
@@ -91,11 +90,13 @@ static void update_blkif_status(blkif_t *blkif)
 VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
 VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
 VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
 
 static struct attribute *vbdstat_attrs[] = {
        &dev_attr_oo_req.attr,
        &dev_attr_rd_req.attr,
        &dev_attr_wr_req.attr,
+       &dev_attr_br_req.attr,
        NULL
 };
 
@@ -165,6 +166,19 @@ static int blkback_remove(struct xenbus_device *dev)
        return 0;
 }
 
+int blkback_barrier(struct xenbus_transaction xbt,
+                   struct backend_info *be, int state)
+{
+       struct xenbus_device *dev = be->dev;
+       int err;
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+                           "%d", state);
+       if (err)
+               xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+       return err;
+}
 
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
@@ -366,12 +380,15 @@ static void connect(struct backend_info *be)
        /* Supply the information about the device the frontend needs */
 again:
        err = xenbus_transaction_start(&xbt);
-
        if (err) {
                xenbus_dev_fatal(dev, err, "starting transaction");
                return;
        }
 
+       err = blkback_barrier(xbt, be, 1);
+       if (err)
+               goto abort;
+
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%lu",
                            vbd_size(&be->blkif->vbd));
        if (err) {
index 63ebf8ed93071211d15adcff07015d87606c71aa..557288b45a0d6fc054f8f6620fb06bcdb0fb2f49 100644 (file)
@@ -320,6 +320,12 @@ static void connect(struct blkfront_info *info)
                return;
        }
 
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-barrier", "%lu", &info->feature_barrier,
+                           NULL);
+       if (err)
+               info->feature_barrier = 0;
+
        err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -569,11 +575,14 @@ static int blkif_queue_request(struct request *req)
        info->shadow[id].request = (unsigned long)req;
 
        ring_req->id = id;
-       ring_req->operation = rq_data_dir(req) ?
-               BLKIF_OP_WRITE : BLKIF_OP_READ;
        ring_req->sector_number = (blkif_sector_t)req->sector;
        ring_req->handle = info->handle;
 
+       ring_req->operation = rq_data_dir(req) ?
+               BLKIF_OP_WRITE : BLKIF_OP_READ;
+       if (blk_barrier_rq(req))
+               ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
        ring_req->nr_segments = 0;
        rq_for_each_bio (bio, req) {
                bio_for_each_segment (bvec, bio, idx) {
@@ -670,6 +679,7 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
        RING_IDX i, rp;
        unsigned long flags;
        struct blkfront_info *info = (struct blkfront_info *)dev_id;
+       int uptodate;
 
        spin_lock_irqsave(&blkif_io_lock, flags);
 
@@ -694,19 +704,27 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 
                ADD_ID_TO_FREELIST(info, id);
 
+               uptodate = (bret->status == BLKIF_RSP_OKAY);
                switch (bret->operation) {
+               case BLKIF_OP_WRITE_BARRIER:
+                       if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+                               printk("blkfront: %s: write barrier op failed\n",
+                                      info->gd->disk_name);
+                               uptodate = -EOPNOTSUPP;
+                               info->feature_barrier = 0;
+                               xlvbd_barrier(info);
+                       }
+                       /* fall through */
                case BLKIF_OP_READ:
                case BLKIF_OP_WRITE:
                        if (unlikely(bret->status != BLKIF_RSP_OKAY))
                                DPRINTK("Bad return from blkdev data "
                                        "request: %x\n", bret->status);
 
-                       ret = end_that_request_first(
-                               req, (bret->status == BLKIF_RSP_OKAY),
+                       ret = end_that_request_first(req, uptodate,
                                req->hard_nr_sectors);
                        BUG_ON(ret);
-                       end_that_request_last(
-                               req, (bret->status == BLKIF_RSP_OKAY));
+                       end_that_request_last(req, uptodate);
                        break;
                default:
                        BUG();
index 5ba3d1ebc36dcbd76d699cbcc174014e162b3e99..b86360f405316872a0123b8ccf970239659576bf 100644 (file)
@@ -126,6 +126,7 @@ struct blkfront_info
        struct gnttab_free_callback callback;
        struct blk_shadow shadow[BLK_RING_SIZE];
        unsigned long shadow_free;
+       int feature_barrier;
 
        /**
         * The number of people holding this device open.  We won't allow a
@@ -152,5 +153,6 @@ extern void do_blkif_request (request_queue_t *rq);
 int xlvbd_add(blkif_sector_t capacity, int device,
              u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
 void xlvbd_del(struct blkfront_info *info);
+int xlvbd_barrier(struct blkfront_info *info);
 
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
index bbb16a956e7cd4ac21638ca07c6ea7bd440c7aef..7d8e11d1ec180e254753bf149fc0e839a34c74cf 100644 (file)
@@ -289,6 +289,10 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
        }
 
        info->rq = gd->queue;
+       info->gd = gd;
+
+       if (info->feature_barrier)
+               xlvbd_barrier(info);
 
        if (vdisk_info & VDISK_READONLY)
                set_disk_ro(gd, 1);
@@ -299,8 +303,6 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
        if (vdisk_info & VDISK_CDROM)
                gd->flags |= GENHD_FL_CD;
 
-       info->gd = gd;
-
        return 0;
 
  out:
@@ -348,3 +350,17 @@ xlvbd_del(struct blkfront_info *info)
        blk_cleanup_queue(info->rq);
        info->rq = NULL;
 }
+
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+       int err;
+
+       err = blk_queue_ordered(info->rq,
+               info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
+       if (err)
+               return err;
+       printk("blkfront: %s: barriers %s\n",
+              info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
+       return 0;
+}