bitkeeper revision 1.1236.25.19 (423553b79iCQL1CPX5dZyBiY8j8pnQ)

author akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>

Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)

committer akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>

Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)
author akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)
committer akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)
diff --cc linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c

index e88c5629a6980196ee7faebe5172f6cdcf6cc090,0000000000000000000000000000000000000000..a58e49fa09dc70c079805826b182bea1612ccecd

mode 100644,000000..100644
--- 1/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
@@@ -1,472 -1,0 +1,472 @@@
-     return ( (fe_dom << 16) | idx );
+ +/******************************************************************************
+ + * blktap_datapath.c
+ + * 
+ + * XenLinux virtual block-device tap.
+ + * Block request routing data path.
+ + * 
+ + * Copyright (c) 2004, Andrew Warfield
+ + * -- see full header in blktap.c
+ + */
+ + 
+ +#include "blktap.h"
+ +#include <asm-xen/evtchn.h>
+ +
+ +/*-----[ The data paths ]-------------------------------------------------*/
+ +
+ +/* Connection to a single backend domain. */
+ +blkif_front_ring_t blktap_be_ring;
+ +
+ +/*-----[ Tracking active requests ]---------------------------------------*/
+ +
+ +/* this must be the same as MAX_PENDING_REQS in blkback.c */
+ +#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+ +
+ +active_req_t     active_reqs[MAX_ACTIVE_REQS];
+ +ACTIVE_RING_IDX  active_req_ring[MAX_ACTIVE_REQS];
+ +spinlock_t       active_req_lock = SPIN_LOCK_UNLOCKED;
+ +ACTIVE_RING_IDX  active_prod, active_cons;
+ +#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+ +#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+ +#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+ +
+ +inline active_req_t *get_active_req(void) 
+ +{
+ +    ACTIVE_RING_IDX idx;
+ +    active_req_t *ar;
+ +    unsigned long flags;
+ +        
+ +    ASSERT(active_cons != active_prod);   
+ +    
+ +    spin_lock_irqsave(&active_req_lock, flags);
+ +    idx =  active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+ +    ar = &active_reqs[idx];
+ +    spin_unlock_irqrestore(&active_req_lock, flags);
+ +    
+ +    return ar;
+ +}
+ +
+ +inline void free_active_req(active_req_t *ar) 
+ +{
+ +    unsigned long flags;
+ +        
+ +    spin_lock_irqsave(&active_req_lock, flags);
+ +    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+ +    spin_unlock_irqrestore(&active_req_lock, flags);
+ +}
+ +
+ +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+ +{
+ +    return &active_reqs[idx];   
+ +}
+ +
+ +void active_reqs_init(void)
+ +{
+ +    ACTIVE_RING_IDX i;
+ +    
+ +    active_cons = 0;
+ +    active_prod = MAX_ACTIVE_REQS;
+ +    memset(active_reqs, 0, sizeof(active_reqs));
+ +    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+ +        active_req_ring[i] = i;
+ +}
+ +
+ +/* Requests passing through the tap to the backend hijack the id field
+ + * in the request message.  In it we put the AR index _AND_ the fe domid.
+ + * the domid is used by the backend to map the pages properly.
+ + */
+ +
+ +static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+ +{
++    return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
+ +}
+ +
+ +/*-----[ Ring helpers ]---------------------------------------------------*/
+ +
+ +inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+ +{
+ +    blkif_response_t *resp_d;
+ +    active_req_t *ar;
+ +    
+ +    ar = &active_reqs[ID_TO_IDX(rsp->id)];
+ +    rsp->id = ar->id;
+ +            
+ +    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+ +            blkif->blk_ring.rsp_prod_pvt);
+ +    memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ +    wmb();
+ +    blkif->blk_ring.rsp_prod_pvt++;
+ +            
+ +    blkif_put(ar->blkif);
+ +    free_active_req(ar);
+ +    
+ +    return 0;
+ +}
+ +
+ +inline int write_req_to_be_ring(blkif_request_t *req)
+ +{
+ +    blkif_request_t *req_d;
+ +
+ +    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+ +        WPRINTK("Tap trying to access an unconnected backend!\n");
+ +        return 0;
+ +    }
+ +    
+ +    req_d = RING_GET_REQUEST(&blktap_be_ring,
+ +            blktap_be_ring.req_prod_pvt);
+ +    memcpy(req_d, req, sizeof(blkif_request_t));
+ +    wmb();
+ +    blktap_be_ring.req_prod_pvt++;
+ +            
+ +    return 0;
+ +}
+ +
+ +void kick_fe_domain(blkif_t *blkif) 
+ +{
+ +    RING_PUSH_RESPONSES(&blkif->blk_ring);
+ +    notify_via_evtchn(blkif->evtchn);
+ +    DPRINTK("notified FE(dom %u)\n", blkif->domid);
+ +    
+ +}
+ +
+ +void kick_be_domain(void)
+ +{
+ +    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 
+ +        return;
+ +    
+ +    wmb(); /* Ensure that the frontend can see the requests. */
+ +    RING_PUSH_REQUESTS(&blktap_be_ring);
+ +    notify_via_evtchn(blktap_be_evtchn);
+ +    DPRINTK("notified BE\n");
+ +}
+ +
+ +/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+ +
+ +/*-----[ Scheduler list maint -from blkback ]--- */
+ +
+ +static struct list_head blkio_schedule_list;
+ +static spinlock_t blkio_schedule_list_lock;
+ +
+ +static int __on_blkdev_list(blkif_t *blkif)
+ +{
+ +    return blkif->blkdev_list.next != NULL;
+ +}
+ +
+ +static void remove_from_blkdev_list(blkif_t *blkif)
+ +{
+ +    unsigned long flags;
+ +    if ( !__on_blkdev_list(blkif) ) return;
+ +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ +    if ( __on_blkdev_list(blkif) )
+ +    {
+ +        list_del(&blkif->blkdev_list);
+ +        blkif->blkdev_list.next = NULL;
+ +        blkif_put(blkif);
+ +    }
+ +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+ +}
+ +
+ +static void add_to_blkdev_list_tail(blkif_t *blkif)
+ +{
+ +    unsigned long flags;
+ +    if ( __on_blkdev_list(blkif) ) return;
+ +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ +    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ +    {
+ +        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ +        blkif_get(blkif);
+ +    }
+ +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+ +}
+ +
+ +
+ +/*-----[ Scheduler functions - from blkback ]--- */
+ +
+ +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+ +
+ +static int do_block_io_op(blkif_t *blkif, int max_to_do);
+ +
+ +static int blkio_schedule(void *arg)
+ +{
+ +    DECLARE_WAITQUEUE(wq, current);
+ +
+ +    blkif_t          *blkif;
+ +    struct list_head *ent;
+ +
+ +    daemonize(
+ +        "xentapd"
+ +        );
+ +
+ +    for ( ; ; )
+ +    {
+ +        /* Wait for work to do. */
+ +        add_wait_queue(&blkio_schedule_wait, &wq);
+ +        set_current_state(TASK_INTERRUPTIBLE);
+ +        if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 
+ +             list_empty(&blkio_schedule_list) )
+ +            schedule();
+ +        __set_current_state(TASK_RUNNING);
+ +        remove_wait_queue(&blkio_schedule_wait, &wq);
+ +
+ +        /* Queue up a batch of requests. */
+ +        while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+ +                !list_empty(&blkio_schedule_list) )
+ +        {
+ +            ent = blkio_schedule_list.next;
+ +            blkif = list_entry(ent, blkif_t, blkdev_list);
+ +            blkif_get(blkif);
+ +            remove_from_blkdev_list(blkif);
+ +            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ +                add_to_blkdev_list_tail(blkif);
+ +            blkif_put(blkif);
+ +        }
+ +
+ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ +        /* Push the batch through to disc. */
+ +        run_task_queue(&tq_disk);
+ +#endif
+ +    }
+ +}
+ +
+ +static void maybe_trigger_blkio_schedule(void)
+ +{
+ +    /*
+ +     * Needed so that two processes, who together make the following predicate
+ +     * true, don't both read stale values and evaluate the predicate
+ +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ +     */
+ +    smp_mb();
+ +
+ +    if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
+ +         !list_empty(&blkio_schedule_list) ) 
+ +        wake_up(&blkio_schedule_wait);
+ +}
+ +
+ +void blkif_deschedule(blkif_t *blkif)
+ +{
+ +    remove_from_blkdev_list(blkif);
+ +}
+ +
+ +void __init blkdev_schedule_init(void)
+ +{
+ +    spin_lock_init(&blkio_schedule_list_lock);
+ +    INIT_LIST_HEAD(&blkio_schedule_list);
+ +
+ +    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ +        BUG();
+ +}
+ +    
+ +/*-----[ Interrupt entry from a frontend ]------ */
+ +
+ +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+ +{
+ +    blkif_t *blkif = dev_id;
+ +
+ +    add_to_blkdev_list_tail(blkif);
+ +    maybe_trigger_blkio_schedule();
+ +    return IRQ_HANDLED;
+ +}
+ +
+ +/*-----[ Other Frontend Ring functions ]-------- */
+ +
+ +/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+ +static int do_block_io_op(blkif_t *blkif, int max_to_do)
+ +{
+ +    /* we have pending messages from the real frontend. */
+ +
+ +    blkif_request_t *req_s;
+ +    RING_IDX i, rp;
+ +    unsigned long flags;
+ +    active_req_t *ar;
+ +    int more_to_do = 0;
+ +    int notify_be = 0, notify_user = 0;
+ +    
+ +    DPRINTK("PT got FE interrupt.\n");
+ +
+ +    if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+ +    
+ +    /* lock both rings */
+ +    spin_lock_irqsave(&blkif_io_lock, flags);
+ +
+ +    rp = blkif->blk_ring.sring->req_prod;
+ +    rmb();
+ +    
+ +    for ( i = blkif->blk_ring.req_cons; 
+ +         (i != rp) && 
+ +            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+ +          i++ )
+ +    {
+ +        
+ +        if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 
+ +        {
+ +            more_to_do = 1;
+ +            break;
+ +        }
+ +        
+ +        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+ +        /* This is a new request:  
+ +         * Assign an active request record, and remap the id. 
+ +         */
+ +        ar = get_active_req();
+ +        ar->id = req_s->id;
+ +        ar->nr_pages = req_s->nr_segments; 
+ +        blkif_get(blkif);
+ +        ar->blkif = blkif;
+ +        req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+ +        /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+ +
+ +        /* FE -> BE interposition point is here. */
+ +        
+ +        /* ------------------------------------------------------------- */
+ +        /* BLKIF_OP_PROBE_HACK:                                          */
+ +        /* Signal to the backend that we are a tap domain.               */
+ +
+ +        if (req_s->operation == BLKIF_OP_PROBE) {
+ +            DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+ +            req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+ +        }
+ +
+ +        /* ------------------------------------------------------------- */
+ +
+ +        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+ +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ +             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+ +            
+ +            /* Copy the response message to UFERing */
+ +            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+ +            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+ +
+ +            DPRINTK("req->UFERing\n"); 
+ +            blktap_write_fe_ring(req_s);
+ +            notify_user = 1;
+ +        }
+ +
+ +        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+ +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ +               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+ +            
+ +            /* be included to prevent noise from the fe when its off */
+ +            /* copy the request message to the BERing */
+ +
+ +            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
+ +                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+ +                    (unsigned)blktap_be_ring.req_prod_pvt & 
+ +                    (RING_SIZE((&blktap_be_ring)-1)));
+ +            
+ +            write_req_to_be_ring(req_s);
+ +            notify_be = 1;
+ +        }
+ +    }
+ +
+ +    blkif->blk_ring.req_cons = i;
+ +    
+ +    /* unlock rings */
+ +    spin_unlock_irqrestore(&blkif_io_lock, flags);
+ +    
+ +    if (notify_user)
+ +        blktap_kick_user();
+ +    if (notify_be)
+ +        kick_be_domain();
+ +    
+ +    return more_to_do;
+ +}
+ +
+ +/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+ +
+ +
+ +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+ +                                  struct pt_regs *ptregs)
+ +{
+ +    blkif_response_t  *resp_s;
+ +    blkif_t *blkif;
+ +    RING_IDX rp, i;
+ +    unsigned long flags;
+ +
+ +    DPRINTK("PT got BE interrupt.\n");
+ +
+ +    /* lock both rings */
+ +    spin_lock_irqsave(&blkif_io_lock, flags);
+ +    
+ +    rp = blktap_be_ring.sring->rsp_prod;
+ +    rmb();
+ +      
+ +    for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+ +    {
+ +        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+ +        
+ +        /* BE -> FE interposition point is here. */
+ +    
+ +        blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+ +        
+ +        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+ +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ +             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+ +
+ +            /* Copy the response message to UBERing */
+ +            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+ +            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+ +
+ +            DPRINTK("rsp->UBERing\n"); 
+ +            blktap_write_be_ring(resp_s);
+ +            blktap_kick_user();
+ +
+ +        }
+ +       
+ +        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+ +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ +               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+ +            
+ +            /* (fe included to prevent random interference from the BE) */
+ +            /* Copy the response message to FERing */
+ +         
+ +            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
+ +                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+ +                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
+ +                    (RING_SIZE((&blkif->blk_ring)-1)));
+ +
+ +            write_resp_to_fe_ring(blkif, resp_s);
+ +            kick_fe_domain(blkif);
+ +
+ +        }
+ +    }
+ +    
+ +    blktap_be_ring.rsp_cons = i;
+ +    
+ +
+ +    spin_unlock_irqrestore(&blkif_io_lock, flags);
+ +    
+ +    return IRQ_HANDLED;
+ +}
+ +
+ +/* Debug : print the current ring indices. */
+ +
+ +void print_vm_ring_idxs(void)
+ +{
+ +    int i;
+ +    blkif_t *blkif;
+ +            
+ +    WPRINTK("FE Rings: \n---------\n");
+ +    for ( i = 0; i < 50; i++) { 
+ +        blkif = blkif_find_by_handle((domid_t)i, 0);
+ +        if (blkif != NULL) {
+ +            if (blkif->blk_ring.sring != NULL) {
+ +                WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
+ +                    "| req_prod: %2d, rsp_prod: %2d\n", i, 
+ +                    blkif->blk_ring.req_cons,
+ +                    blkif->blk_ring.rsp_prod_pvt,
+ +                    blkif->blk_ring.sring->req_prod,
+ +                    blkif->blk_ring.sring->rsp_prod);
+ +            } else {
+ +                WPRINTK("%2d: [no device channel yet]\n", i);
+ +            }
+ +        }
+ +    }
+ +    if (blktap_be_ring.sring != NULL) {
+ +        WPRINTK("BE Ring: \n--------\n");
+ +        WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+ +            "| req_prod: %2d, rsp_prod: %2d\n",
+ +            blktap_be_ring.rsp_cons,
+ +            blktap_be_ring.req_prod_pvt,
+ +            blktap_be_ring.sring->req_prod,
+ +            blktap_be_ring.sring->rsp_prod);
+ +    }
+ +}        
diff --cc linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c

index 3cc307fddf1fe16a562f34708f6f364847da3ac2,0000000000000000000000000000000000000000..b503b1ec13ceb0d9933c6016e1fde05498e843a6

mode 100644,000000..100644
--- 1/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
--- /dev/null
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
@@@ -1,489 -1,0 +1,472 @@@
- static void blktap_fast_flush_area(int idx, int nr_pages)
- {
-     multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-     int               i;
- 
-     for ( i = 0; i < nr_pages; i++ )
-     {
-         mcl[i].op = __HYPERVISOR_update_va_mapping;
-         mcl[i].args[0] = MMAP_VADDR(idx, i);
-         mcl[i].args[1] = 0;
-         mcl[i].args[2] = 0;
-     }
- 
-     mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
-     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-         BUG();
- }
- 
+ +/******************************************************************************
+ + * blktap_userdev.c
+ + * 
+ + * XenLinux virtual block-device tap.
+ + * Control interface between the driver and a character device.
+ + * 
+ + * Copyright (c) 2004, Andrew Warfield
+ + *
+ + */
+ +
+ +#include <linux/config.h>
+ +#include <linux/module.h>
+ +#include <linux/kernel.h>
+ +#include <linux/fs.h>
+ +#include <linux/mm.h>
+ +#include <linux/miscdevice.h>
+ +#include <linux/errno.h>
+ +#include <linux/major.h>
+ +#include <linux/gfp.h>
+ +#include <linux/poll.h>
+ +#include <asm/pgalloc.h>
+ +#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+ +
+ +#include "blktap.h"
+ +
+ +
+ +unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+ +
+ +/* Only one process may open /dev/xen/blktap at any time. */
+ +static unsigned long blktap_dev_inuse;
+ +unsigned long blktap_ring_ok; /* make this ring->state */
+ +
+ +/* for poll: */
+ +static wait_queue_head_t blktap_wait;
+ +
+ +/* Where things are inside the device mapping. */
+ +struct vm_area_struct *blktap_vma;
+ +unsigned long mmap_vstart;
+ +unsigned long rings_vstart;
+ +
+ +/* Rings up to user space. */
+ +static blkif_front_ring_t blktap_ufe_ring;
+ +static blkif_back_ring_t  blktap_ube_ring;
+ +static ctrl_front_ring_t  blktap_uctrl_ring;
+ +
+ +/* local prototypes */
+ +static int blktap_read_fe_ring(void);
+ +static int blktap_read_be_ring(void);
+ +
+ +/* -------[ blktap vm ops ]------------------------------------------- */
+ +
+ +static struct page *blktap_nopage(struct vm_area_struct *vma,
+ +                                             unsigned long address,
+ +                                             int *type)
+ +{
+ +    /*
+ +     * if the page has not been mapped in by the driver then generate
+ +     * a SIGBUS to the domain.
+ +     */
+ +
+ +    force_sig(SIGBUS, current);
+ +
+ +    return 0;
+ +}
+ +
+ +struct vm_operations_struct blktap_vm_ops = {
+ +    nopage:   blktap_nopage,
+ +};
+ +
+ +/* -------[ blktap file ops ]----------------------------------------- */
+ +
+ +static int blktap_open(struct inode *inode, struct file *filp)
+ +{
+ +    blkif_sring_t *sring;
+ +    ctrl_sring_t *csring;
+ +    
+ +    if ( test_and_set_bit(0, &blktap_dev_inuse) )
+ +        return -EBUSY;
+ +
+ +    printk(KERN_ALERT "blktap open.\n");
+ +    
+ +    /* Allocate the ctrl ring. */
+ +    csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
+ +    if (csring == NULL)
+ +        goto fail_nomem;
+ +
+ +    SetPageReserved(virt_to_page(csring));
+ +    
+ +    SHARED_RING_INIT(csring);
+ +    FRONT_RING_INIT(&blktap_uctrl_ring, csring);
+ +
+ +
+ +    /* Allocate the fe ring. */
+ +    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ +    if (sring == NULL)
+ +        goto fail_free_ctrl;
+ +
+ +    SetPageReserved(virt_to_page(sring));
+ +    
+ +    SHARED_RING_INIT(sring);
+ +    FRONT_RING_INIT(&blktap_ufe_ring, sring);
+ +
+ +    /* Allocate the be ring. */
+ +    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ +    if (sring == NULL)
+ +        goto fail_free_fe;
+ +
+ +    SetPageReserved(virt_to_page(sring));
+ +    
+ +    SHARED_RING_INIT(sring);
+ +    BACK_RING_INIT(&blktap_ube_ring, sring);
+ +
+ +    DPRINTK(KERN_ALERT "blktap open.\n");
+ +
+ +    return 0;
+ +    
+ + fail_free_ctrl:
+ +    free_page( (unsigned long) blktap_uctrl_ring.sring);
+ +
+ + fail_free_fe:
+ +    free_page( (unsigned long) blktap_ufe_ring.sring);
+ +
+ + fail_nomem:
+ +    return -ENOMEM;
+ +}
+ +
+ +static int blktap_release(struct inode *inode, struct file *filp)
+ +{
+ +    blktap_dev_inuse = 0;
+ +    blktap_ring_ok = 0;
+ +
+ +    printk(KERN_ALERT "blktap closed.\n");
+ +
+ +    /* Free the ring page. */
+ +    ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
+ +    free_page((unsigned long) blktap_uctrl_ring.sring);
+ +
+ +    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+ +    free_page((unsigned long) blktap_ufe_ring.sring);
+ +
+ +    ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+ +    free_page((unsigned long) blktap_ube_ring.sring);
+ +    
+ +    return 0;
+ +}
+ +
+ +/* Note on mmap:
+ + * remap_pfn_range sets VM_IO on vma->vm_flags.  In trying to make libaio
+ + * work to do direct page access from userspace, this ended up being a
+ + * problem.  The bigger issue seems to be that there is no way to map
+ + * a foreign page in to user space and have the virtual address of that 
+ + * page map sanely down to a mfn.
+ + * Removing the VM_IO flag results in a loop in get_user_pages, as 
+ + * pfn_valid() always fails on a foreign page.
+ + */
+ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+ +{
+ +    int size;
+ +
+ +    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ +           vma->vm_start, vma->vm_end);
+ +
+ +    vma->vm_ops = &blktap_vm_ops;
+ +
+ +    size = vma->vm_end - vma->vm_start;
+ +    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+ +        printk(KERN_INFO 
+ +               "blktap: you _must_ map exactly %d pages!\n",
+ +               MMAP_PAGES + RING_PAGES);
+ +        return -EAGAIN;
+ +    }
+ +
+ +    size >>= PAGE_SHIFT;
+ +    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+ +    
+ +    rings_vstart = vma->vm_start;
+ +    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+ +    
+ +    /* Map the ring pages to the start of the region and reserve it. */
+ +
+ +    /* not sure if I really need to do this... */
+ +    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ +
+ +    DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
+ +    if (remap_pfn_range(vma, vma->vm_start, 
+ +                         __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 
+ +                         PAGE_SIZE, vma->vm_page_prot)) {
+ +        WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
+ +    }
+ +
+ +
+ +    DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+ +    if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 
+ +                         __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 
+ +                         PAGE_SIZE, vma->vm_page_prot)) {
+ +        WPRINTK("be_ring: remap_pfn_range failure!\n");
+ +    }
+ +
+ +    DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+ +    if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 
+ +                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
+ +                         PAGE_SIZE, vma->vm_page_prot)) {
+ +        WPRINTK("fe_ring: remap_pfn_range failure!\n");
+ +    }
+ +            
+ +    blktap_vma = vma;
+ +    blktap_ring_ok = 1;
+ +
+ +    return 0;
+ +}
+ +
+ +static int blktap_ioctl(struct inode *inode, struct file *filp,
+ +                        unsigned int cmd, unsigned long arg)
+ +{
+ +    switch(cmd) {
+ +    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+ +        return blktap_read_fe_ring();
+ +
+ +    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+ +        return blktap_read_be_ring();
+ +
+ +    case BLKTAP_IOCTL_SETMODE:
+ +        if (BLKTAP_MODE_VALID(arg)) {
+ +            blktap_mode = arg;
+ +            /* XXX: may need to flush rings here. */
+ +            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+ +            return 0;
+ +        }
+ +    case BLKTAP_IOCTL_PRINT_IDXS:
+ +        {
+ +            print_vm_ring_idxs();
+ +            WPRINTK("User Rings: \n-----------\n");
+ +            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+ +                            "| req_prod: %2d, rsp_prod: %2d\n",
+ +                            blktap_ufe_ring.rsp_cons,
+ +                            blktap_ufe_ring.req_prod_pvt,
+ +                            blktap_ufe_ring.sring->req_prod,
+ +                            blktap_ufe_ring.sring->rsp_prod);
+ +            WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+ +                            "| req_prod: %2d, rsp_prod: %2d\n",
+ +                            blktap_ube_ring.req_cons,
+ +                            blktap_ube_ring.rsp_prod_pvt,
+ +                            blktap_ube_ring.sring->req_prod,
+ +                            blktap_ube_ring.sring->rsp_prod);
+ +            
+ +        }
+ +    }
+ +    return -ENOIOCTLCMD;
+ +}
+ +
+ +static unsigned int blktap_poll(struct file *file, poll_table *wait)
+ +{
+ +        poll_wait(file, &blktap_wait, wait);
+ +
+ +        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
+ +             RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
+ +             RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+ +
+ +            RING_PUSH_REQUESTS(&blktap_uctrl_ring);
+ +            RING_PUSH_REQUESTS(&blktap_ufe_ring);
+ +            RING_PUSH_RESPONSES(&blktap_ube_ring);
+ +            return POLLIN | POLLRDNORM;
+ +        }
+ +
+ +        return 0;
+ +}
+ +
+ +void blktap_kick_user(void)
+ +{
+ +    /* blktap_ring->req_prod = blktap_req_prod; */
+ +    wake_up_interruptible(&blktap_wait);
+ +}
+ +
+ +static struct file_operations blktap_fops = {
+ +    owner:    THIS_MODULE,
+ +    poll:     blktap_poll,
+ +    ioctl:    blktap_ioctl,
+ +    open:     blktap_open,
+ +    release:  blktap_release,
+ +    mmap:     blktap_mmap,
+ +};
+ +    
+ +/*-----[ Data to/from user space ]----------------------------------------*/
+ +
+ +
+ +int blktap_write_fe_ring(blkif_request_t *req)
+ +{
+ +    blkif_request_t *target;
+ +    int error, i;
+ +
+ +    /*
+ +     * This is called to pass a request from the real frontend domain's
+ +     * blkif ring to the character device.
+ +     */
+ +
+ +    if ( ! blktap_ring_ok ) {
+ +        DPRINTK("blktap: ufe_ring not ready for a request!\n");
+ +        return 0;
+ +    }
+ +
+ +    if ( RING_FULL(&blktap_ufe_ring) ) {
+ +        DPRINTK("blktap: fe_ring is full, can't add.\n");
+ +        return 0;
+ +    }
+ +
+ +    target = RING_GET_REQUEST(&blktap_ufe_ring,
+ +            blktap_ufe_ring.req_prod_pvt);
+ +    memcpy(target, req, sizeof(*req));
+ +
+ +    /* Attempt to map the foreign pages directly in to the application */
+ +    for (i=0; i<target->nr_segments; i++) {
+ +
+ +        error = direct_remap_area_pages(blktap_vma->vm_mm, 
+ +                                        MMAP_VADDR(ID_TO_IDX(req->id), i), 
+ +                                        target->frame_and_sects[i] & PAGE_MASK,
+ +                                        PAGE_SIZE,
+ +                                        blktap_vma->vm_page_prot,
+ +                                        ID_TO_DOM(req->id));
+ +        if ( error != 0 ) {
+ +            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ +            /* the request is now dropped on the floor. */
+ +            return 0;
+ +        }
+ +    }
+ +    
+ +    blktap_ufe_ring.req_prod_pvt++;
+ +    
+ +    return 0;
+ +}
+ +
+ +int blktap_write_be_ring(blkif_response_t *rsp)
+ +{
+ +    blkif_response_t *target;
+ +
+ +    /*
+ +     * This is called to pass a request from the real backend domain's
+ +     * blkif ring to the character device.
+ +     */
+ +
+ +    if ( ! blktap_ring_ok ) {
+ +        DPRINTK("blktap: be_ring not ready for a request!\n");
+ +        return 0;
+ +    }
+ +
+ +    /* No test for fullness in the response direction. */
+ +
+ +    target = RING_GET_RESPONSE(&blktap_ube_ring,
+ +            blktap_ube_ring.rsp_prod_pvt);
+ +    memcpy(target, rsp, sizeof(*rsp));
+ +
+ +    /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+ +
+ +    blktap_ube_ring.rsp_prod_pvt++;
+ +    
+ +    return 0;
+ +}
+ +
-             blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+ +static int blktap_read_fe_ring(void)
+ +{
+ +    /* This is called to read responses from the UFE ring. */
+ +
+ +    RING_IDX i, rp;
+ +    blkif_response_t *resp_s;
+ +    blkif_t *blkif;
+ +    active_req_t *ar;
+ +
+ +    DPRINTK("blktap_read_fe_ring()\n");
+ +
+ +    /* if we are forwarding from UFERring to FERing */
+ +    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+ +
+ +        /* for each outstanding message on the UFEring  */
+ +        rp = blktap_ufe_ring.sring->rsp_prod;
+ +        rmb();
+ +        
+ +        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+ +        {
+ +            resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+ +            
+ +            DPRINTK("resp->fe_ring\n");
+ +            ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+ +            blkif = ar->blkif;
++            zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0), 
++                    ar->nr_pages << PAGE_SHIFT, NULL);
+ +            write_resp_to_fe_ring(blkif, resp_s);
+ +            kick_fe_domain(blkif);
+ +        }
+ +        
+ +        blktap_ufe_ring.rsp_cons = i;
+ +    }
+ +    return 0;
+ +}
+ +
+ +static int blktap_read_be_ring(void)
+ +{
+ +    /* This is called to read requests from the UBE ring. */
+ +
+ +    RING_IDX i, rp;
+ +    blkif_request_t *req_s;
+ +
+ +    DPRINTK("blktap_read_be_ring()\n");
+ +
+ +    /* if we are forwarding from UFERring to FERing */
+ +    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+ +
+ +        /* for each outstanding message on the UFEring  */
+ +        rp = blktap_ube_ring.sring->req_prod;
+ +        rmb();
+ +        for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+ +        {
+ +            req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
+ +
+ +            DPRINTK("req->be_ring\n");
+ +            write_req_to_be_ring(req_s);
+ +            kick_be_domain();
+ +        }
+ +        
+ +        blktap_ube_ring.req_cons = i;
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +int blktap_write_ctrl_ring(ctrl_msg_t *msg)
+ +{
+ +    ctrl_msg_t *target;
+ +
+ +    if ( ! blktap_ring_ok ) {
+ +        DPRINTK("blktap: be_ring not ready for a request!\n");
+ +        return 0;
+ +    }
+ +
+ +    /* No test for fullness in the response direction. */
+ +
+ +    target = RING_GET_REQUEST(&blktap_uctrl_ring,
+ +            blktap_uctrl_ring.req_prod_pvt);
+ +    memcpy(target, msg, sizeof(*msg));
+ +
+ +    blktap_uctrl_ring.req_prod_pvt++;
+ +    
+ +    /* currently treat the ring as unidirectional. */
+ +    blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
+ +    
+ +    return 0;
+ +       
+ +}
+ +
+ +/* -------[ blktap module setup ]------------------------------------- */
+ +
+ +static struct miscdevice blktap_miscdev = {
+ +    .minor        = BLKTAP_MINOR,
+ +    .name         = "blktap",
+ +    .fops         = &blktap_fops,
+ +    .devfs_name   = "misc/blktap",
+ +};
+ +
+ +int blktap_init(void)
+ +{
+ +    int err;
+ +
+ +    err = misc_register(&blktap_miscdev);
+ +    if ( err != 0 )
+ +    {
+ +        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+ +        return err;
+ +    }
+ +
+ +    init_waitqueue_head(&blktap_wait);
+ +
+ +
+ +    return 0;
+ +}
author	akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
	Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)
committer	akw27@arcadians.cl.cam.ac.uk <akw27@arcadians.cl.cam.ac.uk>
	Mon, 14 Mar 2005 09:04:55 +0000 (09:04 +0000)
		1	2
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c	patch \|	diff1 \|	\|	blob \| history
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c	patch \|	diff1 \|	\|	blob \| history