From: akw27@arcadians.cl.cam.ac.uk Date: Mon, 14 Mar 2005 09:04:55 +0000 (+0000) Subject: bitkeeper revision 1.1236.25.19 (423553b79iCQL1CPX5dZyBiY8j8pnQ) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~17857^2~26^2~2 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=5a17dded4df2ee00dafe6bf8dd44b3143c2e4ee6;p=xen.git bitkeeper revision 1.1236.25.19 (423553b79iCQL1CPX5dZyBiY8j8pnQ) Merge arcadians.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk into arcadians.cl.cam.ac.uk:/auto/anfs/nos1/akw27/xeno-clone/xeno.bk --- 5a17dded4df2ee00dafe6bf8dd44b3143c2e4ee6 diff --cc linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c index e88c5629a6,0000000000..a58e49fa09 mode 100644,000000..100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c @@@ -1,472 -1,0 +1,472 @@@ +/****************************************************************************** + * blktap_datapath.c + * + * XenLinux virtual block-device tap. + * Block request routing data path. + * + * Copyright (c) 2004, Andrew Warfield + * -- see full header in blktap.c + */ + +#include "blktap.h" +#include + +/*-----[ The data paths ]-------------------------------------------------*/ + +/* Connection to a single backend domain. */ +blkif_front_ring_t blktap_be_ring; + +/*-----[ Tracking active requests ]---------------------------------------*/ + +/* this must be the same as MAX_PENDING_REQS in blkback.c */ +#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) + +active_req_t active_reqs[MAX_ACTIVE_REQS]; +ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; +spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; +ACTIVE_RING_IDX active_prod, active_cons; +#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) +#define ACTIVE_IDX(_ar) (_ar - active_reqs) +#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) + +inline active_req_t *get_active_req(void) +{ + ACTIVE_RING_IDX idx; + active_req_t *ar; + unsigned long flags; + + ASSERT(active_cons != active_prod); + + spin_lock_irqsave(&active_req_lock, flags); + idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; + ar = &active_reqs[idx]; + spin_unlock_irqrestore(&active_req_lock, flags); + + return ar; +} + +inline void free_active_req(active_req_t *ar) +{ + unsigned long flags; + + spin_lock_irqsave(&active_req_lock, flags); + active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); + spin_unlock_irqrestore(&active_req_lock, flags); +} + +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) +{ + return &active_reqs[idx]; +} + +void active_reqs_init(void) +{ + ACTIVE_RING_IDX i; + + active_cons = 0; + active_prod = MAX_ACTIVE_REQS; + memset(active_reqs, 0, sizeof(active_reqs)); + for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) + active_req_ring[i] = i; +} + +/* Requests passing through the tap to the backend hijack the id field + * in the request message. In it we put the AR index _AND_ the fe domid. + * the domid is used by the backend to map the pages properly. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) +{ - return ( (fe_dom << 16) | idx ); ++ return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) ); +} + +/*-----[ Ring helpers ]---------------------------------------------------*/ + +inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) +{ + blkif_response_t *resp_d; + active_req_t *ar; + + ar = &active_reqs[ID_TO_IDX(rsp->id)]; + rsp->id = ar->id; + + resp_d = RING_GET_RESPONSE(&blkif->blk_ring, + blkif->blk_ring.rsp_prod_pvt); + memcpy(resp_d, rsp, sizeof(blkif_response_t)); + wmb(); + blkif->blk_ring.rsp_prod_pvt++; + + blkif_put(ar->blkif); + free_active_req(ar); + + return 0; +} + +inline int write_req_to_be_ring(blkif_request_t *req) +{ + blkif_request_t *req_d; + + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { + WPRINTK("Tap trying to access an unconnected backend!\n"); + return 0; + } + + req_d = RING_GET_REQUEST(&blktap_be_ring, + blktap_be_ring.req_prod_pvt); + memcpy(req_d, req, sizeof(blkif_request_t)); + wmb(); + blktap_be_ring.req_prod_pvt++; + + return 0; +} + +void kick_fe_domain(blkif_t *blkif) +{ + RING_PUSH_RESPONSES(&blkif->blk_ring); + notify_via_evtchn(blkif->evtchn); + DPRINTK("notified FE(dom %u)\n", blkif->domid); + +} + +void kick_be_domain(void) +{ + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) + return; + + wmb(); /* Ensure that the frontend can see the requests. */ + RING_PUSH_REQUESTS(&blktap_be_ring); + notify_via_evtchn(blktap_be_evtchn); + DPRINTK("notified BE\n"); +} + +/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ + +/*-----[ Scheduler list maint -from blkback ]--- */ + +static struct list_head blkio_schedule_list; +static spinlock_t blkio_schedule_list_lock; + +static int __on_blkdev_list(blkif_t *blkif) +{ + return blkif->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(blkif_t *blkif) +{ + unsigned long flags; + if ( !__on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = NULL; + blkif_put(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + +static void add_to_blkdev_list_tail(blkif_t *blkif) +{ + unsigned long flags; + if ( __on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) + { + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); + blkif_get(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + + +/*-----[ Scheduler functions - from blkback ]--- */ + +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); + +static int do_block_io_op(blkif_t *blkif, int max_to_do); + +static int blkio_schedule(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + blkif_t *blkif; + struct list_head *ent; + + daemonize( + "xentapd" + ); + + for ( ; ; ) + { + /* Wait for work to do. */ + add_wait_queue(&blkio_schedule_wait, &wq); + set_current_state(TASK_INTERRUPTIBLE); + if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || + list_empty(&blkio_schedule_list) ) + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&blkio_schedule_wait, &wq); + + /* Queue up a batch of requests. */ + while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && + !list_empty(&blkio_schedule_list) ) + { + ent = blkio_schedule_list.next; + blkif = list_entry(ent, blkif_t, blkdev_list); + blkif_get(blkif); + remove_from_blkdev_list(blkif); + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(blkif); + blkif_put(blkif); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + /* Push the batch through to disc. */ + run_task_queue(&tq_disk); +#endif + } +} + +static void maybe_trigger_blkio_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/ + !list_empty(&blkio_schedule_list) ) + wake_up(&blkio_schedule_wait); +} + +void blkif_deschedule(blkif_t *blkif) +{ + remove_from_blkdev_list(blkif); +} + +void __init blkdev_schedule_init(void) +{ + spin_lock_init(&blkio_schedule_list_lock); + INIT_LIST_HEAD(&blkio_schedule_list); + + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) + BUG(); +} + +/*-----[ Interrupt entry from a frontend ]------ */ + +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_t *blkif = dev_id; + + add_to_blkdev_list_tail(blkif); + maybe_trigger_blkio_schedule(); + return IRQ_HANDLED; +} + +/*-----[ Other Frontend Ring functions ]-------- */ + +/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ +static int do_block_io_op(blkif_t *blkif, int max_to_do) +{ + /* we have pending messages from the real frontend. */ + + blkif_request_t *req_s; + RING_IDX i, rp; + unsigned long flags; + active_req_t *ar; + int more_to_do = 0; + int notify_be = 0, notify_user = 0; + + DPRINTK("PT got FE interrupt.\n"); + + if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1; + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + rp = blkif->blk_ring.sring->req_prod; + rmb(); + + for ( i = blkif->blk_ring.req_cons; + (i != rp) && + !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i); + i++ ) + { + + if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) + { + more_to_do = 1; + break; + } + + req_s = RING_GET_REQUEST(&blkif->blk_ring, i); + /* This is a new request: + * Assign an active request record, and remap the id. + */ + ar = get_active_req(); + ar->id = req_s->id; + ar->nr_pages = req_s->nr_segments; + blkif_get(blkif); + ar->blkif = blkif; + req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); + /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ + + /* FE -> BE interposition point is here. */ + + /* ------------------------------------------------------------- */ + /* BLKIF_OP_PROBE_HACK: */ + /* Signal to the backend that we are a tap domain. */ + + if (req_s->operation == BLKIF_OP_PROBE) { + DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); + req_s->frame_and_sects[1] = BLKTAP_COOKIE; + } + + /* ------------------------------------------------------------- */ + + /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + /* Copy the response message to UFERing */ + /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ + /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ + + DPRINTK("req->UFERing\n"); + blktap_write_fe_ring(req_s); + notify_user = 1; + } + + /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { + + /* be included to prevent noise from the fe when its off */ + /* copy the request message to the BERing */ + + DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", + (unsigned)i & (RING_SIZE(&blktap_be_ring)-1), + (unsigned)blktap_be_ring.req_prod_pvt & + (RING_SIZE((&blktap_be_ring)-1))); + + write_req_to_be_ring(req_s); + notify_be = 1; + } + } + + blkif->blk_ring.req_cons = i; + + /* unlock rings */ + spin_unlock_irqrestore(&blkif_io_lock, flags); + + if (notify_user) + blktap_kick_user(); + if (notify_be) + kick_be_domain(); + + return more_to_do; +} + +/*-----[ Data to/from Backend (server) VM ]------------------------------*/ + + +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, + struct pt_regs *ptregs) +{ + blkif_response_t *resp_s; + blkif_t *blkif; + RING_IDX rp, i; + unsigned long flags; + + DPRINTK("PT got BE interrupt.\n"); + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + rp = blktap_be_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_be_ring.rsp_cons; i != rp; i++) + { + resp_s = RING_GET_RESPONSE(&blktap_be_ring, i); + + /* BE -> FE interposition point is here. */ + + blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; + + /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_COPY_BE) ) { + + /* Copy the response message to UBERing */ + /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ + /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ + + DPRINTK("rsp->UBERing\n"); + blktap_write_be_ring(resp_s); + blktap_kick_user(); + + } + + /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { + + /* (fe included to prevent random interference from the BE) */ + /* Copy the response message to FERing */ + + DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", + (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1), + (unsigned)blkif->blk_ring.rsp_prod_pvt & + (RING_SIZE((&blkif->blk_ring)-1))); + + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); + + } + } + + blktap_be_ring.rsp_cons = i; + + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} + +/* Debug : print the current ring indices. */ + +void print_vm_ring_idxs(void) +{ + int i; + blkif_t *blkif; + + WPRINTK("FE Rings: \n---------\n"); + for ( i = 0; i < 50; i++) { + blkif = blkif_find_by_handle((domid_t)i, 0); + if (blkif != NULL) { + if (blkif->blk_ring.sring != NULL) { + WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", i, + blkif->blk_ring.req_cons, + blkif->blk_ring.rsp_prod_pvt, + blkif->blk_ring.sring->req_prod, + blkif->blk_ring.sring->rsp_prod); + } else { + WPRINTK("%2d: [no device channel yet]\n", i); + } + } + } + if (blktap_be_ring.sring != NULL) { + WPRINTK("BE Ring: \n--------\n"); + WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_be_ring.rsp_cons, + blktap_be_ring.req_prod_pvt, + blktap_be_ring.sring->req_prod, + blktap_be_ring.sring->rsp_prod); + } +} diff --cc linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c index 3cc307fddf,0000000000..b503b1ec13 mode 100644,000000..100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c @@@ -1,489 -1,0 +1,472 @@@ +/****************************************************************************** + * blktap_userdev.c + * + * XenLinux virtual block-device tap. + * Control interface between the driver and a character device. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for control ring. */ + +#include "blktap.h" + + +unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; + +/* Only one process may open /dev/xen/blktap at any time. */ +static unsigned long blktap_dev_inuse; +unsigned long blktap_ring_ok; /* make this ring->state */ + +/* for poll: */ +static wait_queue_head_t blktap_wait; + +/* Where things are inside the device mapping. */ +struct vm_area_struct *blktap_vma; +unsigned long mmap_vstart; +unsigned long rings_vstart; + +/* Rings up to user space. */ +static blkif_front_ring_t blktap_ufe_ring; +static blkif_back_ring_t blktap_ube_ring; +static ctrl_front_ring_t blktap_uctrl_ring; + +/* local prototypes */ +static int blktap_read_fe_ring(void); +static int blktap_read_be_ring(void); + +/* -------[ blktap vm ops ]------------------------------------------- */ + +static struct page *blktap_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + /* + * if the page has not been mapped in by the driver then generate + * a SIGBUS to the domain. + */ + + force_sig(SIGBUS, current); + + return 0; +} + +struct vm_operations_struct blktap_vm_ops = { + nopage: blktap_nopage, +}; + +/* -------[ blktap file ops ]----------------------------------------- */ + +static int blktap_open(struct inode *inode, struct file *filp) +{ + blkif_sring_t *sring; + ctrl_sring_t *csring; + + if ( test_and_set_bit(0, &blktap_dev_inuse) ) + return -EBUSY; + + printk(KERN_ALERT "blktap open.\n"); + + /* Allocate the ctrl ring. */ + csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); + if (csring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(csring)); + + SHARED_RING_INIT(csring); + FRONT_RING_INIT(&blktap_uctrl_ring, csring); + + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_free_ctrl; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&blktap_ufe_ring, sring); + + /* Allocate the be ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_free_fe; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + BACK_RING_INIT(&blktap_ube_ring, sring); + + DPRINTK(KERN_ALERT "blktap open.\n"); + + return 0; + + fail_free_ctrl: + free_page( (unsigned long) blktap_uctrl_ring.sring); + + fail_free_fe: + free_page( (unsigned long) blktap_ufe_ring.sring); + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + blktap_dev_inuse = 0; + blktap_ring_ok = 0; + + printk(KERN_ALERT "blktap closed.\n"); + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); + free_page((unsigned long) blktap_uctrl_ring.sring); + + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); + free_page((unsigned long) blktap_ufe_ring.sring); + + ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); + free_page((unsigned long) blktap_ube_ring.sring); + + return 0; +} + +/* Note on mmap: + * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio + * work to do direct page access from userspace, this ended up being a + * problem. The bigger issue seems to be that there is no way to map + * a foreign page in to user space and have the virtual address of that + * page map sanely down to a mfn. + * Removing the VM_IO flag results in a loop in get_user_pages, as + * pfn_valid() always fails on a foreign page. + */ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + + printk(KERN_ALERT "blktap mmap (%lx, %lx)\n", + vma->vm_start, vma->vm_end); + + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { + printk(KERN_INFO + "blktap: you _must_ map exactly %d pages!\n", + MMAP_PAGES + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); + + rings_vstart = vma->vm_start; + mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + + /* not sure if I really need to do this... */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start, + __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("ctrl_ring: remap_pfn_range failure!\n"); + } + + + DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, + __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("be_ring: remap_pfn_range failure!\n"); + } + + DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), + __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("fe_ring: remap_pfn_range failure!\n"); + } + + blktap_vma = vma; + blktap_ring_ok = 1; + + return 0; +} + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ + return blktap_read_fe_ring(); + + case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ + return blktap_read_be_ring(); + + case BLKTAP_IOCTL_SETMODE: + if (BLKTAP_MODE_VALID(arg)) { + blktap_mode = arg; + /* XXX: may need to flush rings here. */ + printk(KERN_INFO "blktap: set mode to %lx\n", arg); + return 0; + } + case BLKTAP_IOCTL_PRINT_IDXS: + { + print_vm_ring_idxs(); + WPRINTK("User Rings: \n-----------\n"); + WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ufe_ring.rsp_cons, + blktap_ufe_ring.req_prod_pvt, + blktap_ufe_ring.sring->req_prod, + blktap_ufe_ring.sring->rsp_prod); + WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ube_ring.req_cons, + blktap_ube_ring.rsp_prod_pvt, + blktap_ube_ring.sring->req_prod, + blktap_ube_ring.sring->rsp_prod); + + } + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &blktap_wait, wait); + + if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) || + RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || + RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { + + RING_PUSH_REQUESTS(&blktap_uctrl_ring); + RING_PUSH_REQUESTS(&blktap_ufe_ring); + RING_PUSH_RESPONSES(&blktap_ube_ring); + return POLLIN | POLLRDNORM; + } + + return 0; +} + +void blktap_kick_user(void) +{ + /* blktap_ring->req_prod = blktap_req_prod; */ + wake_up_interruptible(&blktap_wait); +} + +static struct file_operations blktap_fops = { + owner: THIS_MODULE, + poll: blktap_poll, + ioctl: blktap_ioctl, + open: blktap_open, + release: blktap_release, + mmap: blktap_mmap, +}; + +/*-----[ Data to/from user space ]----------------------------------------*/ + + +int blktap_write_fe_ring(blkif_request_t *req) +{ + blkif_request_t *target; + int error, i; + + /* + * This is called to pass a request from the real frontend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: ufe_ring not ready for a request!\n"); + return 0; + } + + if ( RING_FULL(&blktap_ufe_ring) ) { + DPRINTK("blktap: fe_ring is full, can't add.\n"); + return 0; + } + + target = RING_GET_REQUEST(&blktap_ufe_ring, + blktap_ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + + /* Attempt to map the foreign pages directly in to the application */ + for (i=0; inr_segments; i++) { + + error = direct_remap_area_pages(blktap_vma->vm_mm, + MMAP_VADDR(ID_TO_IDX(req->id), i), + target->frame_and_sects[i] & PAGE_MASK, + PAGE_SIZE, + blktap_vma->vm_page_prot, + ID_TO_DOM(req->id)); + if ( error != 0 ) { + printk(KERN_INFO "remapping attached page failed! (%d)\n", error); + /* the request is now dropped on the floor. */ + return 0; + } + } + + blktap_ufe_ring.req_prod_pvt++; + + return 0; +} + +int blktap_write_be_ring(blkif_response_t *rsp) +{ + blkif_response_t *target; + + /* + * This is called to pass a request from the real backend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + target = RING_GET_RESPONSE(&blktap_ube_ring, + blktap_ube_ring.rsp_prod_pvt); + memcpy(target, rsp, sizeof(*rsp)); + + /* no mapping -- pages were mapped in blktap_write_fe_ring() */ + + blktap_ube_ring.rsp_prod_pvt++; + + return 0; +} + - static void blktap_fast_flush_area(int idx, int nr_pages) - { - multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; - int i; - - for ( i = 0; i < nr_pages; i++ ) - { - mcl[i].op = __HYPERVISOR_update_va_mapping; - mcl[i].args[0] = MMAP_VADDR(idx, i); - mcl[i].args[1] = 0; - mcl[i].args[2] = 0; - } - - mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) - BUG(); - } - +static int blktap_read_fe_ring(void) +{ + /* This is called to read responses from the UFE ring. */ + + RING_IDX i, rp; + blkif_response_t *resp_s; + blkif_t *blkif; + active_req_t *ar; + + DPRINTK("blktap_read_fe_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { + + /* for each outstanding message on the UFEring */ + rp = blktap_ufe_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) + { + resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i); + + DPRINTK("resp->fe_ring\n"); + ar = lookup_active_req(ID_TO_IDX(resp_s->id)); + blkif = ar->blkif; - blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); ++ zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0), ++ ar->nr_pages << PAGE_SHIFT, NULL); + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); + } + + blktap_ufe_ring.rsp_cons = i; + } + return 0; +} + +static int blktap_read_be_ring(void) +{ + /* This is called to read requests from the UBE ring. */ + + RING_IDX i, rp; + blkif_request_t *req_s; + + DPRINTK("blktap_read_be_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { + + /* for each outstanding message on the UFEring */ + rp = blktap_ube_ring.sring->req_prod; + rmb(); + for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) + { + req_s = RING_GET_REQUEST(&blktap_ube_ring, i); + + DPRINTK("req->be_ring\n"); + write_req_to_be_ring(req_s); + kick_be_domain(); + } + + blktap_ube_ring.req_cons = i; + } + + return 0; +} + +int blktap_write_ctrl_ring(ctrl_msg_t *msg) +{ + ctrl_msg_t *target; + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + target = RING_GET_REQUEST(&blktap_uctrl_ring, + blktap_uctrl_ring.req_prod_pvt); + memcpy(target, msg, sizeof(*msg)); + + blktap_uctrl_ring.req_prod_pvt++; + + /* currently treat the ring as unidirectional. */ + blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; + + return 0; + +} + +/* -------[ blktap module setup ]------------------------------------- */ + +static struct miscdevice blktap_miscdev = { + .minor = BLKTAP_MINOR, + .name = "blktap", + .fops = &blktap_fops, + .devfs_name = "misc/blktap", +}; + +int blktap_init(void) +{ + int err; + + err = misc_register(&blktap_miscdev); + if ( err != 0 ) + { + printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); + return err; + } + + init_waitqueue_head(&blktap_wait); + + + return 0; +}