--- /dev/null
- return ( (fe_dom << 16) | idx );
+/******************************************************************************
+ * blktap_datapath.c
+ *
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ * -- see full header in blktap.c
+ */
+
+#include "blktap.h"
+#include <asm-xen/evtchn.h>
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+
+active_req_t active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+
+inline active_req_t *get_active_req(void)
+{
+ ACTIVE_RING_IDX idx;
+ active_req_t *ar;
+ unsigned long flags;
+
+ ASSERT(active_cons != active_prod);
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+ ar = &active_reqs[idx];
+ spin_unlock_irqrestore(&active_req_lock, flags);
+
+ return ar;
+}
+
+inline void free_active_req(active_req_t *ar)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+ spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+ return &active_reqs[idx];
+}
+
+void active_reqs_init(void)
+{
+ ACTIVE_RING_IDX i;
+
+ active_cons = 0;
+ active_prod = MAX_ACTIVE_REQS;
+ memset(active_reqs, 0, sizeof(active_reqs));
+ for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+ active_req_ring[i] = i;
+}
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message. In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
++ return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
+}
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+ blkif_response_t *resp_d;
+ active_req_t *ar;
+
+ ar = &active_reqs[ID_TO_IDX(rsp->id)];
+ rsp->id = ar->id;
+
+ resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+ blkif->blk_ring.rsp_prod_pvt);
+ memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ blkif->blk_ring.rsp_prod_pvt++;
+
+ blkif_put(ar->blkif);
+ free_active_req(ar);
+
+ return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+ WPRINTK("Tap trying to access an unconnected backend!\n");
+ return 0;
+ }
+
+ req_d = RING_GET_REQUEST(&blktap_be_ring,
+ blktap_be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ blktap_be_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+void kick_fe_domain(blkif_t *blkif)
+{
+ RING_PUSH_RESPONSES(&blkif->blk_ring);
+ notify_via_evtchn(blkif->evtchn);
+ DPRINTK("notified FE(dom %u)\n", blkif->domid);
+
+}
+
+void kick_be_domain(void)
+{
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED )
+ return;
+
+ wmb(); /* Ensure that the frontend can see the requests. */
+ RING_PUSH_REQUESTS(&blktap_be_ring);
+ notify_via_evtchn(blktap_be_evtchn);
+ DPRINTK("notified BE\n");
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+ return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( __on_blkdev_list(blkif) )
+ {
+ list_del(&blkif->blkdev_list);
+ blkif->blkdev_list.next = NULL;
+ blkif_put(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ {
+ list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ blkif_get(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ blkif_t *blkif;
+ struct list_head *ent;
+
+ daemonize(
+ "xentapd"
+ );
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&blkio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
+ list_empty(&blkio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&blkio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+ !list_empty(&blkio_schedule_list) )
+ {
+ ent = blkio_schedule_list.next;
+ blkif = list_entry(ent, blkif_t, blkdev_list);
+ blkif_get(blkif);
+ remove_from_blkdev_list(blkif);
+ if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(blkif);
+ blkif_put(blkif);
+ }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
+#endif
+ }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
+ !list_empty(&blkio_schedule_list) )
+ wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+ remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+ spin_lock_init(&blkio_schedule_list_lock);
+ INIT_LIST_HEAD(&blkio_schedule_list);
+
+ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+}
+
+/*-----[ Interrupt entry from a frontend ]------ */
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ blkif_t *blkif = dev_id;
+
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blkio_schedule();
+ return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+ /* we have pending messages from the real frontend. */
+
+ blkif_request_t *req_s;
+ RING_IDX i, rp;
+ unsigned long flags;
+ active_req_t *ar;
+ int more_to_do = 0;
+ int notify_be = 0, notify_user = 0;
+
+ DPRINTK("PT got FE interrupt.\n");
+
+ if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blkif->blk_ring.sring->req_prod;
+ rmb();
+
+ for ( i = blkif->blk_ring.req_cons;
+ (i != rp) &&
+ !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+ i++ )
+ {
+
+ if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+ /* This is a new request:
+ * Assign an active request record, and remap the id.
+ */
+ ar = get_active_req();
+ ar->id = req_s->id;
+ ar->nr_pages = req_s->nr_segments;
+ blkif_get(blkif);
+ ar->blkif = blkif;
+ req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+ /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+
+ /* FE -> BE interposition point is here. */
+
+ /* ------------------------------------------------------------- */
+ /* BLKIF_OP_PROBE_HACK: */
+ /* Signal to the backend that we are a tap domain. */
+
+ if (req_s->operation == BLKIF_OP_PROBE) {
+ DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+ req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+ }
+
+ /* ------------------------------------------------------------- */
+
+ /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ /* Copy the response message to UFERing */
+ /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+ /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("req->UFERing\n");
+ blktap_write_fe_ring(req_s);
+ notify_user = 1;
+ }
+
+ /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+
+ /* be included to prevent noise from the fe when its off */
+ /* copy the request message to the BERing */
+
+ DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+ (unsigned)blktap_be_ring.req_prod_pvt &
+ (RING_SIZE((&blktap_be_ring)-1)));
+
+ write_req_to_be_ring(req_s);
+ notify_be = 1;
+ }
+ }
+
+ blkif->blk_ring.req_cons = i;
+
+ /* unlock rings */
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ if (notify_user)
+ blktap_kick_user();
+ if (notify_be)
+ kick_be_domain();
+
+ return more_to_do;
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs)
+{
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ RING_IDX rp, i;
+ unsigned long flags;
+
+ DPRINTK("PT got BE interrupt.\n");
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blktap_be_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+
+ /* BE -> FE interposition point is here. */
+
+ blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+
+ /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+ /* Copy the response message to UBERing */
+ /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+ /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("rsp->UBERing\n");
+ blktap_write_be_ring(resp_s);
+ blktap_kick_user();
+
+ }
+
+ /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+
+ /* (fe included to prevent random interference from the BE) */
+ /* Copy the response message to FERing */
+
+ DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+ (unsigned)blkif->blk_ring.rsp_prod_pvt &
+ (RING_SIZE((&blkif->blk_ring)-1)));
+
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+
+ }
+ }
+
+ blktap_be_ring.rsp_cons = i;
+
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/* Debug : print the current ring indices. */
+
+void print_vm_ring_idxs(void)
+{
+ int i;
+ blkif_t *blkif;
+
+ WPRINTK("FE Rings: \n---------\n");
+ for ( i = 0; i < 50; i++) {
+ blkif = blkif_find_by_handle((domid_t)i, 0);
+ if (blkif != NULL) {
+ if (blkif->blk_ring.sring != NULL) {
+ WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n", i,
+ blkif->blk_ring.req_cons,
+ blkif->blk_ring.rsp_prod_pvt,
+ blkif->blk_ring.sring->req_prod,
+ blkif->blk_ring.sring->rsp_prod);
+ } else {
+ WPRINTK("%2d: [no device channel yet]\n", i);
+ }
+ }
+ }
+ if (blktap_be_ring.sring != NULL) {
+ WPRINTK("BE Ring: \n--------\n");
+ WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_be_ring.rsp_cons,
+ blktap_be_ring.req_prod_pvt,
+ blktap_be_ring.sring->req_prod,
+ blktap_be_ring.sring->rsp_prod);
+ }
+}
--- /dev/null
- static void blktap_fast_flush_area(int idx, int nr_pages)
- {
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
- int i;
-
- for ( i = 0; i < nr_pages; i++ )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping;
- mcl[i].args[0] = MMAP_VADDR(idx, i);
- mcl[i].args[1] = 0;
- mcl[i].args[2] = 0;
- }
-
- mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
- BUG();
- }
-
+/******************************************************************************
+ * blktap_userdev.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+static blkif_back_ring_t blktap_ube_ring;
+static ctrl_front_ring_t blktap_uctrl_ring;
+
+/* local prototypes */
+static int blktap_read_fe_ring(void);
+static int blktap_read_be_ring(void);
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ /*
+ * if the page has not been mapped in by the driver then generate
+ * a SIGBUS to the domain.
+ */
+
+ force_sig(SIGBUS, current);
+
+ return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+ nopage: blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+ blkif_sring_t *sring;
+ ctrl_sring_t *csring;
+
+ if ( test_and_set_bit(0, &blktap_dev_inuse) )
+ return -EBUSY;
+
+ printk(KERN_ALERT "blktap open.\n");
+
+ /* Allocate the ctrl ring. */
+ csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (csring == NULL)
+ goto fail_nomem;
+
+ SetPageReserved(virt_to_page(csring));
+
+ SHARED_RING_INIT(csring);
+ FRONT_RING_INIT(&blktap_uctrl_ring, csring);
+
+
+ /* Allocate the fe ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_ctrl;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blktap_ufe_ring, sring);
+
+ /* Allocate the be ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_fe;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blktap_ube_ring, sring);
+
+ DPRINTK(KERN_ALERT "blktap open.\n");
+
+ return 0;
+
+ fail_free_ctrl:
+ free_page( (unsigned long) blktap_uctrl_ring.sring);
+
+ fail_free_fe:
+ free_page( (unsigned long) blktap_ufe_ring.sring);
+
+ fail_nomem:
+ return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+ blktap_dev_inuse = 0;
+ blktap_ring_ok = 0;
+
+ printk(KERN_ALERT "blktap closed.\n");
+
+ /* Free the ring page. */
+ ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
+ free_page((unsigned long) blktap_uctrl_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+ free_page((unsigned long) blktap_ufe_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+ free_page((unsigned long) blktap_ube_ring.sring);
+
+ return 0;
+}
+
+/* Note on mmap:
+ * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio
+ * work to do direct page access from userspace, this ended up being a
+ * problem. The bigger issue seems to be that there is no way to map
+ * a foreign page in to user space and have the virtual address of that
+ * page map sanely down to a mfn.
+ * Removing the VM_IO flag results in a loop in get_user_pages, as
+ * pfn_valid() always fails on a foreign page.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int size;
+
+ printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ vma->vm_start, vma->vm_end);
+
+ vma->vm_ops = &blktap_vm_ops;
+
+ size = vma->vm_end - vma->vm_start;
+ if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+ printk(KERN_INFO
+ "blktap: you _must_ map exactly %d pages!\n",
+ MMAP_PAGES + RING_PAGES);
+ return -EAGAIN;
+ }
+
+ size >>= PAGE_SHIFT;
+ printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+
+ rings_vstart = vma->vm_start;
+ mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+
+ /* Map the ring pages to the start of the region and reserve it. */
+
+ /* not sure if I really need to do this... */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start,
+ __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
+ }
+
+
+ DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE,
+ __pa(blktap_ube_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("be_ring: remap_pfn_range failure!\n");
+ }
+
+ DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ),
+ __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("fe_ring: remap_pfn_range failure!\n");
+ }
+
+ blktap_vma = vma;
+ blktap_ring_ok = 1;
+
+ return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+ case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+ return blktap_read_fe_ring();
+
+ case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+ return blktap_read_be_ring();
+
+ case BLKTAP_IOCTL_SETMODE:
+ if (BLKTAP_MODE_VALID(arg)) {
+ blktap_mode = arg;
+ /* XXX: may need to flush rings here. */
+ printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+ return 0;
+ }
+ case BLKTAP_IOCTL_PRINT_IDXS:
+ {
+ print_vm_ring_idxs();
+ WPRINTK("User Rings: \n-----------\n");
+ WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ufe_ring.rsp_cons,
+ blktap_ufe_ring.req_prod_pvt,
+ blktap_ufe_ring.sring->req_prod,
+ blktap_ufe_ring.sring->rsp_prod);
+ WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ube_ring.req_cons,
+ blktap_ube_ring.rsp_prod_pvt,
+ blktap_ube_ring.sring->req_prod,
+ blktap_ube_ring.sring->rsp_prod);
+
+ }
+ }
+ return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &blktap_wait, wait);
+
+ if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
+ RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ||
+ RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+
+ RING_PUSH_REQUESTS(&blktap_uctrl_ring);
+ RING_PUSH_REQUESTS(&blktap_ufe_ring);
+ RING_PUSH_RESPONSES(&blktap_ube_ring);
+ return POLLIN | POLLRDNORM;
+ }
+
+ return 0;
+}
+
+void blktap_kick_user(void)
+{
+ /* blktap_ring->req_prod = blktap_req_prod; */
+ wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+ owner: THIS_MODULE,
+ poll: blktap_poll,
+ ioctl: blktap_ioctl,
+ open: blktap_open,
+ release: blktap_release,
+ mmap: blktap_mmap,
+};
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+ blkif_request_t *target;
+ int error, i;
+
+ /*
+ * This is called to pass a request from the real frontend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: ufe_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( RING_FULL(&blktap_ufe_ring) ) {
+ DPRINTK("blktap: fe_ring is full, can't add.\n");
+ return 0;
+ }
+
+ target = RING_GET_REQUEST(&blktap_ufe_ring,
+ blktap_ufe_ring.req_prod_pvt);
+ memcpy(target, req, sizeof(*req));
+
+ /* Attempt to map the foreign pages directly in to the application */
+ for (i=0; i<target->nr_segments; i++) {
+
+ error = direct_remap_area_pages(blktap_vma->vm_mm,
+ MMAP_VADDR(ID_TO_IDX(req->id), i),
+ target->frame_and_sects[i] & PAGE_MASK,
+ PAGE_SIZE,
+ blktap_vma->vm_page_prot,
+ ID_TO_DOM(req->id));
+ if ( error != 0 ) {
+ printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ /* the request is now dropped on the floor. */
+ return 0;
+ }
+ }
+
+ blktap_ufe_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *target;
+
+ /*
+ * This is called to pass a request from the real backend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_RESPONSE(&blktap_ube_ring,
+ blktap_ube_ring.rsp_prod_pvt);
+ memcpy(target, rsp, sizeof(*rsp));
+
+ /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+
+ blktap_ube_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
- blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+static int blktap_read_fe_ring(void)
+{
+ /* This is called to read responses from the UFE ring. */
+
+ RING_IDX i, rp;
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ active_req_t *ar;
+
+ DPRINTK("blktap_read_fe_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ufe_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+
+ DPRINTK("resp->fe_ring\n");
+ ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+ blkif = ar->blkif;
++ zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0),
++ ar->nr_pages << PAGE_SHIFT, NULL);
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+ }
+
+ blktap_ufe_ring.rsp_cons = i;
+ }
+ return 0;
+}
+
+static int blktap_read_be_ring(void)
+{
+ /* This is called to read requests from the UBE ring. */
+
+ RING_IDX i, rp;
+ blkif_request_t *req_s;
+
+ DPRINTK("blktap_read_be_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ube_ring.sring->req_prod;
+ rmb();
+ for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+ {
+ req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
+
+ DPRINTK("req->be_ring\n");
+ write_req_to_be_ring(req_s);
+ kick_be_domain();
+ }
+
+ blktap_ube_ring.req_cons = i;
+ }
+
+ return 0;
+}
+
+int blktap_write_ctrl_ring(ctrl_msg_t *msg)
+{
+ ctrl_msg_t *target;
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_REQUEST(&blktap_uctrl_ring,
+ blktap_uctrl_ring.req_prod_pvt);
+ memcpy(target, msg, sizeof(*msg));
+
+ blktap_uctrl_ring.req_prod_pvt++;
+
+ /* currently treat the ring as unidirectional. */
+ blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
+
+ return 0;
+
+}
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+ .minor = BLKTAP_MINOR,
+ .name = "blktap",
+ .fops = &blktap_fops,
+ .devfs_name = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+ int err;
+
+ err = misc_register(&blktap_miscdev);
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+ return err;
+ }
+
+ init_waitqueue_head(&blktap_wait);
+
+
+ return 0;
+}