bitkeeper revision 1.872 (4087cf0eay7XY7T1xObNygn1qSwJ0g)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 22 Apr 2004 13:56:30 +0000 (13:56 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 22 Apr 2004 13:56:30 +0000 (13:56 +0000)
Partial checkin of new blkdev backend in Xenolinux. Also updates
to the mmu_update interface.

27 files changed:
.rootkeys
tools/xc/lib/xc_linux_build.c
tools/xc/lib/xc_linux_restore.c
tools/xc/lib/xc_linux_save.c
tools/xc/lib/xc_netbsd_build.c
tools/xc/lib/xc_private.c
tools/xc/lib/xc_private.h
tools/xend/lib/domain_controller.h
tools/xend/lib/utils.c
xen/common/memory.c
xen/include/hypervisor-ifs/hypervisor-if.h
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c [new file with mode: 0644]
xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c
xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c
xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c
xenolinux-2.4.26-sparse/drivers/char/mem.c
xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h
xenolinux-2.4.26-sparse/include/asm-xen/evtchn.h
xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h
xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h

index d27db0000a2189b56973246f529549b5eff0f994..b4940a3b04df3583a8046dea5ae076170ba803df 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
 4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
 4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
+4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
+4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
+4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
+4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
+4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
 4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
 4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
 4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
 4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
 4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
 4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
+4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
 405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
 405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
index 6f10afde1a7b9e2a5c77ef1155d7bf57099cfd17..7f81c924ad3b80de2db73c309f137b28fe293faf 100644 (file)
@@ -165,7 +165,7 @@ static int setup_guestos(int xc_handle,
 
     memset(builddomain, 0, sizeof(*builddomain));
 
-    if ( (pm_handle = init_pfn_mapper()) < 0 )
+    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
         goto error_out;
 
     if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
index d06804ed935a42dd56c1a6298f7fcab86ed04443..3decb285596300591fa09a2e95b0b96be2198b28 100644 (file)
@@ -186,7 +186,7 @@ int xc_linux_restore(int xc_handle,
     }
     shared_info_frame = op.u.getdomaininfo.shared_info_frame;
 
-    if ( (pm_handle = init_pfn_mapper()) < 0 )
+    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
         goto out;
 
     /* Copy saved contents of shared-info page. No checking needed. */
index a702a4a2920d1f4cc70b19c8c54250748be1c8b4..dc759f546ca43cd3ccab5a19a40dac745874e649 100644 (file)
@@ -178,7 +178,7 @@ int xc_linux_save(int xc_handle,
         goto out;
     }
 
-    if ( (pm_handle = init_pfn_mapper()) < 0 )
+    if ( (pm_handle = init_pfn_mapper((domid_t)domid)) < 0 )
         goto out;
 
     /* Is the suspend-record MFN actually valid for this domain? */
index db5552d26edb2d55ee8a66e058253a4a30275a2f..8793a512f296905584fe342b08e7ac1fc0eb5d8d 100644 (file)
@@ -80,7 +80,7 @@ static int setup_guestos(int xc_handle,
 
     memset(builddomain, 0, sizeof(*builddomain));
 
-    if ( (pm_handle = init_pfn_mapper()) < 0 )
+    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
         goto error_out;
 
     if ( (page_array = malloc(tot_pages * sizeof(unsigned long))) == NULL )
index 3b634148eb298f0b6a3464aa411da11039f5c827..485aa5875465eb10e728f2d0695b48116e3a5a5c 100644 (file)
@@ -6,9 +6,15 @@
 
 #include "xc_private.h"
 
-int init_pfn_mapper(void)
+int init_pfn_mapper(domid_t domid)
 {
-    return open("/dev/mem", O_RDWR);
+    int fd = open("/dev/mem", O_RDWR);
+    if ( fd >= 0 )
+    {
+        (void)ioctl(fd, _IO('M', 1), (unsigned long)(domid>> 0)); /* low  */
+        (void)ioctl(fd, _IO('M', 2), (unsigned long)(domid>>32)); /* high */
+    }
+    return fd;
 }
 
 int close_pfn_mapper(int pm_handle)
@@ -49,7 +55,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
     if ( mmu->idx == FIRST_MMU_UPDATE )
         return 0;
 
-    /* The first two requests set the correct subject domain. */
+    /* The first two requests set the correct subject domain (PTS and GPS). */
     mmu->updates[0].val  = (unsigned long)(mmu->subject<<16) & ~0xFFFFUL;
     mmu->updates[0].ptr  = (unsigned long)(mmu->subject<< 0) & ~0xFFFFUL;
     mmu->updates[1].val  = (unsigned long)(mmu->subject>>16) & ~0xFFFFUL;
@@ -57,7 +63,7 @@ static int flush_mmu_updates(int xc_handle, mmu_t *mmu)
     mmu->updates[0].ptr |= MMU_EXTENDED_COMMAND;
     mmu->updates[0].val |= MMUEXT_SET_SUBJECTDOM_L;
     mmu->updates[1].ptr |= MMU_EXTENDED_COMMAND;
-    mmu->updates[1].val |= MMUEXT_SET_SUBJECTDOM_H;
+    mmu->updates[1].val |= MMUEXT_SET_SUBJECTDOM_H | SET_PAGETABLE_SUBJECTDOM;
 
     hypercall.op     = __HYPERVISOR_mmu_update;
     hypercall.arg[0] = (unsigned long)mmu->updates;
index b6c78b74fddc64b8e59c25c75de25a3129a6d057..d4299109e5503b051c8aabce489d8a753846b69b 100644 (file)
@@ -154,7 +154,7 @@ static inline int do_block_io_op(int xc_handle, block_io_op_t *op)
 /*
  * PFN mapping.
  */
-int init_pfn_mapper(void);
+int init_pfn_mapper(domid_t domid);
 int close_pfn_mapper(int pm_handle);
 void *map_pfn_writeable(int pm_handle, unsigned long pfn);
 void *map_pfn_readonly(int pm_handle, unsigned long pfn);
index a6ac3b4c925cc6eae5c4a4ebafb5423336af3141..6a49630113e775a78785c66a7d755dff4fcb9434 100644 (file)
@@ -49,8 +49,116 @@ typedef struct {
     CONTROL_RING_IDX rx_req_prod, rx_resp_prod;
 } control_if_t;
 
-#define CMSG_CONSOLE      0
-#define CMSG_CONSOLE_DATA 0
+/*
+ * Top-level command types.
+ */
+#define CMSG_CONSOLE            0  /* Console               */
+#define CMSG_BLKIF_BE           1  /* Block-device backend  */
+#define CMSG_BLKIF_FE           2  /* Block-device frontend */
+
+/*
+ * Subtypes for console messages.
+ */
+#define CMSG_CONSOLE_DATA       0
+
+/*
+ * Subtypes for block-device messages.
+ */
+#define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
+#define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
+#define CMSG_BLKIF_BE_VBD_CREATE  2  /* Create a new VBD for an interface.   */
+#define CMSG_BLKIF_BE_VBD_DESTROY 3  /* Delete a VBD from an interface.      */
+#define CMSG_BLKIF_BE_VBD_GROW    4  /* Append an extent to a given VBD.     */
+#define CMSG_BLKIF_BE_VBD_SHRINK  5  /* Remove last extent from a given VBD. */
+
+/*
+ * Message request/response defintions for block-device messages.
+ */
+
+typedef u16 blkif_vdev_t;
+typedef u16 blkif_pdev_t;
+typedef u64 blkif_sector_t;
+typedef struct {
+    blkif_pdev_t   device;
+    blkif_sector_t sector_start;
+    blkif_sector_t sector_length;
+} blkif_extent_t;
+
+/* Non-specific 'okay' return. */
+#define BLKIF_STATUS_OKAY                0
+/* Non-specific 'error' return. */
+#define BLKIF_STATUS_ERROR               1
+/* The following are specific error returns. */
+#define BLKIF_STATUS_INTERFACE_EXISTS    2
+#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3
+
+/* This macro can be used to create an array of descriptive error strings. */
+#define BLKIF_STATUS_ERRORS {    \
+    "Okay",                      \
+    "Non-specific error",        \
+    "Interface already exists",  \
+    "Interface not found" }
+
+/* CMSG_BLKIF_CREATE */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Domain attached to new interface.   */
+    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
+    unsigned int   evtchn_port;       /* Event channel for notifications.    */
+    unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
+    /* OUT */
+    unsigned int   status;
+} blkif_create_t; 
+
+/* CMSG_BLKIF_DESTROY */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Identify interface to be destroyed. */
+    unsigned int   blkif_handle;      /* ...ditto...                         */
+    /* OUT */
+    unsigned int   status;
+} blkif_destroy_t; 
+
+/* CMSG_BLKIF_VBD_CREATE */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Identify blkdev interface.          */
+    unsigned int   blkif_handle;      /* ...ditto...                         */
+    blkif_vdev_t   vdevice;           /* Interface-specific id for this VBD. */
+    int            readonly;          /* Non-zero -> VBD isn't writeable.    */
+    /* OUT */
+    unsigned int   status;
+} blkif_vbd_create_t; 
+
+/* CMSG_BLKIF_VBD_DESTROY */
+typedef struct {
+    /* IN */
+    domid_t        domid;             /* Identify blkdev interface.          */
+    unsigned int   blkif_handle;      /* ...ditto...                         */
+    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
+    /* OUT */
+    unsigned int   status;
+} blkif_vbd_destroy_t; 
+
+/* CMSG_BLKIF_VBD_GROW */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Identify blkdev interface.          */
+    unsigned int   blkif_handle;      /* ...ditto...                         */
+    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
+    blkif_extent_t extent;            /* Physical extent to append to VBD.   */
+    /* OUT */
+    unsigned int   status;
+} blkif_vbd_grow_t; 
 
+/* CMSG_BLKIF_VBD_SHRINK */
+typedef struct { 
+    /* IN */
+    domid_t        domid;             /* Identify blkdev interface.          */
+    unsigned int   blkif_handle;      /* ...ditto...                         */
+    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
+    /* OUT */
+    unsigned int   status;
+} blkif_vbd_shrink_t; 
 
 #endif /* __DOMAIN_CONTROLLER_H__ */
index ea2cee05d504836ab6778bb8e18e20519a7c29be..4883ec1a462d7685c7cee943c9e2704e4d41fa16 100644 (file)
@@ -674,6 +674,10 @@ static PyObject *xu_port_new(PyObject *self, PyObject *args)
         goto fail1;
     }
 
+    /* Set the General-Purpose Subject whose page frame will be mapped. */
+    (void)ioctl(xup->mem_fd, _IO('M', 1), (unsigned long)(dom>> 0)); /* low  */
+    (void)ioctl(xup->mem_fd, _IO('M', 2), (unsigned long)(dom>>32)); /* high */
+
     if ( (xup->xc_handle = xc_interface_open()) == -1 )
     {
         PyErr_SetString(port_error, "Could not open Xen control interface");
index 7c94748e073093f78fa3a536d6382550bb9de0af..6d3dc9ead48c1d32771566730ed0768cdaa38de1 100644 (file)
 
 static int alloc_l2_table(struct pfn_info *page);
 static int alloc_l1_table(struct pfn_info *page);
-static int get_page_from_pagenr(unsigned long page_nr, int check_level);
+static int get_page_from_pagenr(unsigned long page_nr, struct task_struct *p);
 static int get_page_and_type_from_pagenr(unsigned long page_nr, 
                                          u32 type,
-                                         int check_level);
-#define CHECK_STRICT 0 /* Subject domain must own the page                  */
-#define CHECK_ANYDOM 1 /* Any domain may own the page (if subject is priv.) */
+                                         struct task_struct *p);
 
 static void free_l2_table(struct pfn_info *page);
 static void free_l1_table(struct pfn_info *page);
@@ -180,9 +178,14 @@ static struct {
     unsigned long       deferred_ops;
     unsigned long       cr0;
     domid_t             subject_id;
-    struct task_struct *subject_p;
+    /* General-Purpose Subject, Page-Table Subject */
+    struct task_struct *gps, *pts;
 } percpu_info[NR_CPUS] __cacheline_aligned;
 
+/* Determine the current General-Purpose Subject or Page-Table Subject. */
+#define PTS (percpu_info[smp_processor_id()].pts ? : current)
+#define GPS (percpu_info[smp_processor_id()].gps ? : current)
+
 
 /*
  * init_frametable:
@@ -295,11 +298,9 @@ int map_ldt_shadow_page(unsigned int off)
 }
 
 
-static int get_page_from_pagenr(unsigned long page_nr, int check_level)
+static int get_page_from_pagenr(unsigned long page_nr, struct task_struct *p)
 {
-    struct task_struct *p = current;
     struct pfn_info *page = &frame_table[page_nr];
-    u32 y, x, nx;
 
     if ( unlikely(!pfn_is_ram(page_nr)) )
     {
@@ -307,37 +308,10 @@ static int get_page_from_pagenr(unsigned long page_nr, int check_level)
         return 0;
     }
 
-    /* Find the correct subject domain. */
-    if ( unlikely(percpu_info[p->processor].subject_p != NULL) )
-        p = percpu_info[p->processor].subject_p;
-
-    /* Demote ANYDOM to STRICT if subject domain is not privileged. */
-    if ( check_level == CHECK_ANYDOM && !IS_PRIV(p) )
-        check_level = CHECK_STRICT;
-
-    switch ( check_level )
+    if ( unlikely(!get_page(page, p)) )
     {
-    case CHECK_STRICT:
-        if ( unlikely(!get_page(page, p)) )
-        {
-            MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
-            return 0;
-        }
-        break;
-    case CHECK_ANYDOM:
-        y = page->count_and_flags;
-        do {
-            x  = y;
-            nx = x + 1;
-            if ( unlikely((x & PGC_count_mask) == 0) ||
-                 unlikely((nx & PGC_count_mask) == 0) )
-            {
-                MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
-                return 0;
-            }
-        }
-        while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
-        break;
+        MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
+        return 0;
     }
 
     return 1;
@@ -346,11 +320,11 @@ static int get_page_from_pagenr(unsigned long page_nr, int check_level)
 
 static int get_page_and_type_from_pagenr(unsigned long page_nr, 
                                          u32 type,
-                                         int check_level)
+                                         struct task_struct *p)
 {
     struct pfn_info *page = &frame_table[page_nr];
 
-    if ( unlikely(!get_page_from_pagenr(page_nr, check_level)) )
+    if ( unlikely(!get_page_from_pagenr(page_nr, p)) )
         return 0;
 
     if ( unlikely(!get_page_type(page, type)) )
@@ -391,8 +365,7 @@ static int get_linear_pagetable(l2_pgentry_t l2e, unsigned long pfn)
     if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
     {
         /* Make sure the mapped frame belongs to the correct domain. */
-        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), 
-                                            CHECK_STRICT)) )
+        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), PTS)) )
             return 0;
 
         /*
@@ -443,14 +416,14 @@ static int get_page_from_l1e(l1_pgentry_t l1e)
     if ( l1v & _PAGE_RW )
     {
         if ( unlikely(!get_page_and_type_from_pagenr(
-            pfn, PGT_writeable_page, CHECK_ANYDOM)) )
+            pfn, PGT_writeable_page, GPS)) )
             return 0;
         set_bit(_PGC_tlb_flush_on_type_change, 
                 &frame_table[pfn].count_and_flags);
         return 1;
     }
 
-    return get_page_from_pagenr(pfn, CHECK_ANYDOM);
+    return get_page_from_pagenr(pfn, GPS);
 }
 
 
@@ -468,7 +441,7 @@ static int get_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
     }
 
     if ( unlikely(!get_page_and_type_from_pagenr(
-        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, CHECK_STRICT)) )
+        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, PTS)) )
         return get_linear_pagetable(l2e, pfn);
 
     return 1;
@@ -771,12 +744,12 @@ void free_page_type(struct pfn_info *page, unsigned int type)
                                 page-frame_table) & PSH_shadowed) )
         {
             /*
-             * Using 'current->mm' is safe and correct because page-table pages 
-             * are not shared across domains. Updates to such pages' types are 
-             * thus only done within the context of the owning domain. The one 
-             * exception is when destroying a domain; however, this is not a 
-             * problem as the currently-executing domain will not have this 
-             * MFN shadowed, and at domain end-of-day we explicitly unshadow 
+             * Using 'current->mm' is safe and correct because page-table pages
+             * are not shared across domains. Updates to such pages' types are
+             * thus only done within the context of the owning domain. The one
+             * exception is when destroying a domain; however, this is not a
+             * problem as the currently-executing domain will not have this MFN
+             * shadowed, and at domain end-of-day we explicitly unshadow
              * everything so that nothing will get left lying around.
              */
             unshadow_table( page-frame_table, type );
@@ -814,9 +787,9 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
     case MMUEXT_PIN_L1_TABLE:
     case MMUEXT_PIN_L2_TABLE:
         okay = get_page_and_type_from_pagenr(
-            pfn, (cmd == MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : 
-            PGT_l1_page_table,
-            CHECK_STRICT);
+            pfn, 
+            (cmd==MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : PGT_l1_page_table,
+            PTS);
         if ( unlikely(!okay) )
         {
             MEM_LOG("Error while pinning pfn %08lx", pfn);
@@ -836,7 +809,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
         break;
 
     case MMUEXT_UNPIN_TABLE:
-        if ( unlikely(!(okay = get_page_from_pagenr(pfn, CHECK_STRICT))) )
+        if ( unlikely(!(okay = get_page_from_pagenr(pfn, PTS))) )
         {
             MEM_LOG("Page %08lx bad domain (dom=%p)",
                     ptr, page->u.domain);
@@ -856,8 +829,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
         break;
 
     case MMUEXT_NEW_BASEPTR:
-        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, 
-                                             CHECK_STRICT);
+        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, current);
         if ( likely(okay) )
         {
             invalidate_shadow_ldt();
@@ -890,7 +862,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
         break;
     
     case MMUEXT_INVLPG:
-        __flush_tlb_one(val & ~MMUEXT_CMD_MASK);
+        __flush_tlb_one(ptr);
         break;
 
     case MMUEXT_SET_LDT:
@@ -932,11 +904,13 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
         }
         else
         {
-            if ( percpu_info[cpu].subject_p != NULL )
-                put_task_struct(percpu_info[cpu].subject_p);
-            percpu_info[cpu].subject_p = find_domain_by_id(
+            if ( percpu_info[cpu].gps != NULL )
+                put_task_struct(percpu_info[cpu].gps);
+            percpu_info[cpu].gps = find_domain_by_id(
                 percpu_info[cpu].subject_id);
-            if ( percpu_info[cpu].subject_p == NULL )
+            percpu_info[cpu].pts = (val & SET_PAGETABLE_SUBJECTDOM) ? 
+                percpu_info[cpu].gps : NULL;
+            if ( percpu_info[cpu].gps == NULL )
             {
                 MEM_LOG("Unknown domain '%llu'", percpu_info[cpu].subject_id);
                 okay = 0;
@@ -987,7 +961,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
              * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
              */
         case MMU_NORMAL_PT_UPDATE:
-            if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
+            if ( unlikely(!get_page_from_pagenr(pfn, PTS)) )
             {
                 MEM_LOG("Could not get page for normal update");
                 break;
@@ -1059,7 +1033,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
             break;
 
         case MMU_MACHPHYS_UPDATE:
-            if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
+            if ( unlikely(!get_page_from_pagenr(pfn, GPS)) )
             {
                 MEM_LOG("Could not get page for mach->phys update");
                 break;
@@ -1108,10 +1082,10 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
     if ( deferred_ops & DOP_RELOAD_LDT )
         (void)map_ldt_shadow_page(0);
 
-    if ( unlikely(percpu_info[cpu].subject_p != NULL) )
+    if ( unlikely(percpu_info[cpu].gps != NULL) )
     {
-        put_task_struct(percpu_info[cpu].subject_p);
-        percpu_info[cpu].subject_p = NULL;
+        put_task_struct(percpu_info[cpu].gps);
+        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
     }
 
     return rc;
index a196832eb9dcbdb99749cc41775666c87050c768..8660d86ed5990cfa2c03d88110ea61156445711b 100644 (file)
 #define NR_VIRQS       12
 
 /*
- * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked
- *  off to get the real 'ptr' value.
- * All requests specify relevent address in 'ptr'. This is either a
- * machine/physical address (MA), or linear/virtual address (VA).
- * Normal requests specify update value in 'value'.
- * Extended requests specify command in least 8 bits of 'value'. These bits
- *  are masked off to get the real 'val' value. Except for MMUEXT_SET_LDT 
- *  which shifts the least bits out.
+ * MMU-UPDATE REQUESTS
+ * 
+ * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
+ * ptr[1:0] specifies the appropriate MMU_* command.
+ * 
+ * GPS (General-Purpose Subject)
+ * -----------------------------
+ *  This domain that must own all non-page-table pages that are involved in
+ *  MMU updates. By default it is the domain that executes mmu_update(). If the
+ *  caller has sufficient privilege then it can be changed by executing
+ *  MMUEXT_SET_SUBJECTDOM_{L,H}.
+ * 
+ * PTS (Page-Table Subject)
+ * ------------------------
+ *  This domain must own all the page-table pages that are subject to MMU
+ *  updates. By default it is the domain that executes mmu_update(). If the
+ *  caller has sufficient privilege then it can be changed by executing
+ *  MMUEXT_SET_SUBJECTDOM_H with val[14] (SET_PAGETABLE_SUBJECTDOM) set.
+ * 
+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
+ * Updates an entry in a page table.
+ * ptr[:2]  -- machine address of the page-table entry to modify [1]
+ * val      -- value to write [2]
+ * 
+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
+ * Updates an entry in the machine->pseudo-physical mapping table.
+ * ptr[:2]  -- machine address within the frame whose mapping to modify [3]
+ * val      -- value to write into the mapping entry
+ *  
+ * ptr[1:0] == MMU_EXTENDED_COMMAND:
+ * val[7:0] -- MMUEXT_* command
+ * 
+ *   val[7:0] == MMUEXT_(UN)PIN_*_TABLE:
+ *   ptr[:2]  -- machine address of frame to be (un)pinned as a p.t. page [1]
+ * 
+ *   val[7:0] == MMUEXT_NEW_BASEPTR:
+ *   ptr[:2]  -- machine address of new page-table base to install in MMU [1]
+ * 
+ *   val[7:0] == MMUEXT_TLB_FLUSH:
+ *   no additional arguments
+ * 
+ *   val[7:0] == MMUEXT_INVLPG:
+ *   ptr[:2]  -- linear address to be flushed from the TLB
+ * 
+ *   val[7:0] == MMUEXT_SET_LDT:
+ *   ptr[:2]  -- linear address of LDT base (NB. must be page-aligned)
+ *   val[:8]  -- number of entries in LDT
+ * 
+ *   val[7:0] == MMUEXT_SET_SUBJECTDOM_L:
+ *   (ptr[31:15],val[31:15]) -- dom[31:0]
+ * 
+ *   val[7:0] == MMUEXT_SET_SUBJECTDOM_H:
+ *   val[14]  -- if TRUE then sets the PTS in addition to the GPS.
+ *   (ptr[31:15],val[31:15]) -- dom[63:32]
+ *   NB. This command must be immediately preceded by SET_SUBJECTDOM_L.
+ * 
+ * Notes on constraints on the above arguments:
+ *  [1] The page frame containing the machine address must belong to the PTS.
+ *  [2] If the PTE is valid (i.e., bit 0 is set) then the specified page frame
+ *      must belong to: 
+ *       (a) the PTS (if the PTE is part of a non-L1 table); or
+ *       (b) the GPS (if the PTE is part of an L1 table).
+ *  [3] The page frame containing the machine address must belong to the GPS.
  */
-/* A normal page-table update request. */
 #define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
-/* Update an entry in the machine->physical mapping table. */
 #define MMU_MACHPHYS_UPDATE      2 /* ptr = MA of frame to modify entry for  */
-/* An extended command. */
 #define MMU_EXTENDED_COMMAND     3 /* least 8 bits of val demux further      */
-/* Extended commands: */
 #define MMUEXT_PIN_L1_TABLE      0 /* ptr = MA of frame to pin               */
 #define MMUEXT_PIN_L2_TABLE      1 /* ptr = MA of frame to pin               */
 #define MMUEXT_PIN_L3_TABLE      2 /* ptr = MA of frame to pin               */
 #define MMUEXT_UNPIN_TABLE       4 /* ptr = MA of frame to unpin             */
 #define MMUEXT_NEW_BASEPTR       5 /* ptr = MA of new pagetable base         */
 #define MMUEXT_TLB_FLUSH         6 /* ptr = NULL                             */
-#define MMUEXT_INVLPG            7 /* ptr = NULL ; val = VA to invalidate    */
+#define MMUEXT_INVLPG            7 /* ptr = VA to invalidate                 */
 #define MMUEXT_SET_LDT           8 /* ptr = VA of table; val = # entries     */
 /* NB. MMUEXT_SET_SUBJECTDOM must consist of *_L followed immediately by *_H */
 #define MMUEXT_SET_SUBJECTDOM_L  9 /* (ptr[31:15],val[31:15]) = dom[31:0]    */
 #define MMUEXT_SET_SUBJECTDOM_H 10 /* (ptr[31:15],val[31:15]) = dom[63:32]   */
+#define SET_PAGETABLE_SUBJECTDOM (1<<14) /* OR into 'val' arg of SUBJECTDOM_H*/
 #define MMUEXT_CMD_MASK        255
 #define MMUEXT_CMD_SHIFT         8
 
index 032d02d7ccba8193fcaa088d47ea6a41932db131..4c8c17367c6af01645343b778129ea266c8f9b29 100644 (file)
@@ -1,3 +1,3 @@
 O_TARGET := drv.o
-obj-y := main.o
+obj-y := main.o control.o interface.o vbd.o
 include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
new file mode 100644 (file)
index 0000000..865c241
--- /dev/null
@@ -0,0 +1,94 @@
+/******************************************************************************
+ * arch/xen/drivers/vblkif/backend/common.h
+ */
+
+#ifndef __VBLKIF__BACKEND__COMMON_H__
+#define __VBLKIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <asm/ctrl_if.h>
+#include <asm/io.h>
+
+#ifndef NDEBUG
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct {
+    /* Unique identifier for this interface. */
+    domid_t          domid;
+    unsigned int     handle;
+    /* Physical parameters of the comms window. */
+    unsigned long    shmem_frame;
+    unsigned int     evtchn;
+    int              irq;
+    /* Comms information. */
+    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
+    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
+    /* VBDs attached to this interface. */
+    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
+    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
+    /* Private fields. */
+    struct list_head blkdev_list;
+    spinlock_t       blk_ring_lock;
+} blkif_t;
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_get(blkif_t *blkif);
+void blkif_put(blkif_t *blkif);
+
+/* An entry in a list of xen_extents. */
+typedef struct _blkif_extent_le { 
+    blkif_extent_t extent;               /* an individual extent */
+    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
+} blkif_extent_le_t; 
+
+typedef struct _vbd { 
+    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
+    unsigned char      mode;      /* VBD_MODE_{R,W} */
+    unsigned char      type;      /* XD_TYPE_xxx */
+    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
+    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
+} vbd_t; 
+
+long vbd_create(blkif_vbd_create_t *create_params); 
+long vbd_grow(blkif_vbd_grow_t *grow_params); 
+long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
+long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
+
+void destroy_all_vbds(struct task_struct *p);
+
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    atomic_t       pendcnt;
+    unsigned short operation;
+    unsigned short status;
+} pending_req_t;
+
+/* Describes a [partial] disk extent (part of a block io request) */
+typedef struct {
+    unsigned short dev;
+    unsigned short nr_sects;
+    unsigned long  buffer;
+    xen_sector_t   sector_number;
+} phys_seg_t;
+
+int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
+
+int vblkif_be_controller_init(void);
+
+void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __VBLKIF__BACKEND__COMMON_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
new file mode 100644 (file)
index 0000000..a662d9c
--- /dev/null
@@ -0,0 +1,60 @@
+/******************************************************************************
+ * arch/xen/drivers/vblkif/backend/control.c
+ * 
+ * Routines for interfacing with the control plane.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->subtype )
+    {
+    case CMSG_BLKIF_BE_CREATE:
+        if ( msg->length != sizeof(blkif_create_t) )
+            goto parse_error;
+        blkif_create((blkif_create_t *)&msg->msg[0]);
+        break;        
+    case CMSG_BLKIF_BE_DESTROY:
+        if ( msg->length != sizeof(blkif_destroy_t) )
+            goto parse_error;
+        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
+        break;        
+    case CMSG_BLKIF_BE_VBD_CREATE:
+        if ( msg->length != sizeof(blkif_vbd_create_t) )
+            goto parse_error;
+        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_DESTROY:
+        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
+            goto parse_error;
+        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_GROW:
+        if ( msg->length != sizeof(blkif_vbd_grow_t) )
+            goto parse_error;
+        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_SHRINK:
+        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
+            goto parse_error;
+        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
+        break;
+    default:
+        goto parse_error;
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+int blkif_ctrlif_init(void)
+{
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
new file mode 100644 (file)
index 0000000..0a42bc5
--- /dev/null
@@ -0,0 +1,96 @@
+/******************************************************************************
+ * arch/xen/drivers/vblkif/backend/interface.c
+ * 
+ * Block-device interface management.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) \
+    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+    blkif_t *blkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif != NULL) && 
+            (blkif->domid != domid) && 
+            (blkif->handle != handle) )
+        blkif = blkif->hash_next;
+    return blkif;
+}
+
+static void blkif_create(blkif_create_t *create)
+{
+    domid_t       domid  = create->domid;
+    unsigned int  handle = create->blkif_handle;
+    unsigned int  evtchn = create->evtchn;
+    unsigned long shmem_frame = create->shmem_frame;
+    blkif_t     **pblkif, *blkif;
+
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( *pblkif == NULL )
+    {
+        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+            goto found_match;
+        pblkif = &(*pblkif)->hash_next;
+    }
+
+    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid       = domid;
+    blkif->handle      = handle;
+    blkif->evtchn      = evtchn;
+    blkif->irq         = bind_evtchn_to_irq(evtchn);
+    blkif->shmem_frame = shmem_frame;
+    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
+    spin_lock_init(&blkif->vbd_lock);
+    spin_lock_init(&blkif->blk_ring_lock);
+
+    request_irq(irq, vblkif_be_int, 0, "vblkif-backend", blkif);
+
+    blkif->hash_next = *pblkif;
+    *pblkif = blkif;
+
+    create->status = BLKIF_STATUS_OKAY;
+    return;
+
+ found_match:
+    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
+    return;
+
+ evtchn_in_use:
+    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
+    create->status = BLKIF_STATUS_ERROR;
+    return;
+}
+
+static void blkif_destroy(blkif_destroy_t *destroy)
+{
+    domid_t       domid  = destroy->domid;
+    unsigned int  handle = destroy->blkif_handle;
+    blkif_t     **pblkif, *blkif;
+
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif = *pblkif) == NULL )
+    {
+        if ( (blkif->domid == domid) && (blkif->handle == handle) )
+            goto found_match;
+        pblkif = &blkif->hash_next;
+    }
+
+    destroy->status = BLKIF_STATUS_NO_INTERFACE;
+    return;
+
+ found_match:
+    free_irq(blkif->irq, NULL);
+    unbind_evtchn_from_irq(blkif->evtchn);
+    *pblkif = blkif->hash_next;
+    kmem_cache_free(blkif_cachep, blkif);
+    destroy->status = BLKIF_STATUS_OKAY;
+}
+
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
new file mode 100644 (file)
index 0000000..cb44ac1
--- /dev/null
@@ -0,0 +1,508 @@
+/******************************************************************************
+ * arch/xen/drivers/vblkif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/vblkif/frontend
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ * 
+ * We can't allocate pending_req's in order, since they may complete out of 
+ * order. We therefore maintain an allocation ring. This ring also indicates 
+ * when enough work has been passed down -- at that point the allocation ring 
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+/* NB. We use a different index type to differentiate from shared blk rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static kmem_cache_t *buffer_head_cachep;
+
+static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
+
+static int lock_buffer(blkif_t *blkif,
+                       unsigned long buffer,
+                       unsigned short size,
+                       int writeable_buffer);
+static void unlock_buffer(unsigned long buffer,
+                          unsigned short size,
+                          int writeable_buffer);
+
+static void io_schedule(unsigned long unused);
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                 blk_ring_req_entry_t *req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, unsigned long st);
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head io_schedule_list;
+static spinlock_t io_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) )
+    {
+        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
+
+static void io_schedule(unsigned long unused)
+{
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    /* Queue up a batch of requests. */
+    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+            !list_empty(&io_schedule_list) )
+    {
+        ent = io_schedule_list.next;
+        blkif = list_entry(ent, blkif_t, blkdev_list);
+        blkif_get(blkif);
+        remove_from_blkdev_list(blkif);
+        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+            add_to_blkdev_list_tail(blkif);
+        blkif_put(blkif);
+    }
+
+    /* Push the batch through to disc. */
+    run_task_queue(&tq_disk);
+}
+
+static void maybe_trigger_io_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+         !list_empty(&io_schedule_list) )
+        tasklet_schedule(&io_schedule_tasklet);
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
+{
+    pending_req_t *pending_req = bh->b_private;
+
+    /* An error fails the entire request. */
+    if ( !uptodate )
+    {
+        DPRINTK("Buffer not up-to-date at end of operation\n");
+        pending_req->status = 2;
+    }
+
+    unlock_buffer(virt_to_phys(bh->b_data), 
+                  bh->b_size, 
+                  (pending_req->operation==READ));
+    
+    if ( atomic_dec_and_test(&pending_req->pendcnt) )
+    {
+        make_response(pending_req->blkif, pending_req->id,
+                      pending_req->operation, pending_req->status);
+        blkif_put(pending_req->blkif);
+        spin_lock(&pend_prod_lock);
+        pending_ring[MASK_PEND_IDX(pending_prod)] = 
+            pending_req - pending_reqs;
+        pending_prod++;
+        spin_unlock(&pend_prod_lock);
+        maybe_trigger_io_schedule();
+    }
+}
+
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_io_schedule();
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int lock_buffer(blkif_t *blkif,
+                       unsigned long buffer,
+                       unsigned short size,
+                       int writeable_buffer)
+{
+    unsigned long    pfn;
+
+    for ( pfn = buffer >> PAGE_SHIFT; 
+          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+          pfn++ )
+    {
+    }
+
+    return 1;
+
+ fail:
+    while ( pfn-- > (buffer >> PAGE_SHIFT) )
+    {        
+    }
+    return 0;
+}
+
+static void unlock_buffer(unsigned long buffer,
+                          unsigned short size,
+                          int writeable_buffer)
+{
+    unsigned long pfn;
+
+    for ( pfn = buffer >> PAGE_SHIFT; 
+          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+          pfn++ )
+    {
+    }
+}
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    blk_ring_t *blk_ring = blkif->blk_ring_base;
+    blk_ring_req_entry_t *req;
+    BLK_RING_IDX i;
+    int more_to_do = 0;
+
+    /* Take items off the comms ring, taking care not to overflow. */
+    for ( i = blkif->blk_req_cons; 
+          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
+                                        BLK_RING_SIZE);
+          i++ )
+    {
+        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
+        switch ( req->operation )
+        {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+            dispatch_rw_block_io(blkif, req);
+            break;
+
+        default:
+            DPRINTK("error: unknown block io operation [%d]\n",
+                    blk_ring->ring[i].req.operation);
+            make_response(blkif, blk_ring->ring[i].req.id, 
+                          blk_ring->ring[i].req.operation, 1);
+            break;
+        }
+    }
+
+    blkif->blk_req_cons = i;
+    return more_to_do;
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                 blk_ring_req_entry_t *req)
+{
+    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+    struct buffer_head *bh;
+    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
+    unsigned short nr_sects;
+    unsigned long buffer;
+    int i, tot_sects;
+    pending_req_t *pending_req;
+
+    /* We map virtual scatter/gather segments to physical segments. */
+    int new_segs, nr_psegs = 0;
+    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
+
+    /* Check that number of segments is sane. */
+    if ( unlikely(req->nr_segments == 0) || 
+         unlikely(req->nr_segments > MAX_BLK_SEGS) )
+    {
+        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
+        goto bad_descriptor;
+    }
+
+    /*
+     * Check each address/size pair is sane, and convert into a
+     * physical device and block offset. Note that if the offset and size
+     * crosses a virtual extent boundary, we may end up with more
+     * physical scatter/gather segments than virtual segments.
+     */
+    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
+    {
+        buffer   = req->buffer_and_sects[i] & ~0x1FF;
+        nr_sects = req->buffer_and_sects[i] &  0x1FF;
+
+        if ( unlikely(nr_sects == 0) )
+        {
+            DPRINTK("zero-sized data request\n");
+            goto bad_descriptor;
+        }
+
+        phys_seg[nr_psegs].dev           = req->device;
+        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
+        phys_seg[nr_psegs].buffer        = buffer;
+        phys_seg[nr_psegs].nr_sects      = nr_sects;
+
+        /* Translate the request into the relevant 'physical device' */
+        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
+        if ( new_segs < 0 )
+        { 
+            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                    operation == READ ? "read" : "write", 
+                    req->sector_number + tot_sects, 
+                    req->sector_number + tot_sects + nr_sects, 
+                    req->device); 
+            goto bad_descriptor;
+        }
+  
+        nr_psegs += new_segs;
+        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
+    }
+
+    for ( i = 0; i < nr_psegs; i++ )
+    {
+        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
+                                   phys_seg[i].nr_sects << 9,
+                                   operation==READ)) )
+        {
+            DPRINTK("invalid buffer\n");
+            while ( i-- > 0 )
+                unlock_buffer(phys_seg[i].buffer, 
+                              phys_seg[i].nr_sects << 9,
+                              operation==READ);
+            goto bad_descriptor;
+        }
+    }
+
+    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
+    pending_req->blkif     = blkif;
+    pending_req->id        = req->id;
+    pending_req->operation = operation;
+    pending_req->status    = 0;
+    atomic_set(&pending_req->pendcnt, nr_psegs);
+
+    blkif_get(blkif);
+
+    /* Now we pass each segment down to the real blkdev layer. */
+    for ( i = 0; i < nr_psegs; i++ )
+    {
+        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+        if ( unlikely(bh == NULL) )
+            panic("bh is null\n");
+        memset(bh, 0, sizeof (struct buffer_head));
+    
+        bh->b_size          = phys_seg[i].nr_sects << 9;
+        bh->b_dev           = phys_seg[i].dev;
+        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
+
+        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
+           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
+        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
+        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
+        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
+        bh->b_end_io        = end_block_io_op;
+        bh->b_private       = pending_req;
+
+        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
+        if ( operation == WRITE )
+            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
+
+        atomic_set(&bh->b_count, 1);
+
+        /* Dispatch a single request. We'll flush it to disc later. */
+        submit_bh(operation, bh);
+    }
+
+    return;
+
+ bad_descriptor:
+    make_response(blkif, req->id, req->operation, 1);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, unsigned long st)
+{
+    blk_ring_resp_entry_t *resp;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock(&blkif->blk_ring_lock);
+    resp = &blkif->blk_ring_base->
+        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
+    resp->id        = id;
+    resp->operation = op;
+    resp->status    = st;
+    wmb();
+    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
+    spin_unlock(&blkif->blk_ring_lock);
+
+    /* Kick the relevant domain. */
+    notify_via_evtchn(blkif->evtchn);
+}
+
+static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
+{
+#if 0
+    unsigned long flags;
+    struct task_struct *p;
+    blk_ring_t *blk_ring;
+    int i;
+
+    printk("Dumping block queue stats: nr_pending = %d"
+           " (prod=0x%08x,cons=0x%08x)\n",
+           NR_PENDING_REQS, pending_prod, pending_cons);
+
+    read_lock_irqsave(&tasklist_lock, flags);
+    for_each_domain ( p )
+    {
+        printk("Domain: %llu\n", blkif->domain);
+        blk_ring = blkif->blk_ring_base;
+        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
+               "0x%08x on_list=%d\n",
+               blk_ring->req_prod, blkif->blk_req_cons,
+               blk_ring->resp_prod, blkif->blk_resp_prod,
+               __on_blkdev_list(p));
+    }
+    read_unlock_irqrestore(&tasklist_lock, flags);
+
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+    {
+        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
+               i, pending_reqs[i].domain, pending_reqs[i].id,
+               atomic_read(&pending_reqs[i].pendcnt), 
+               pending_reqs[i].operation, pending_reqs[i].status);
+    }
+#endif
+}
+
+void unlink_blkdev_info(blkif_t *blkif)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = (void *)0xdeadbeef;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static int __init init_module(void)
+{
+    int i;
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    memset(pending_reqs, 0, sizeof(pending_reqs));
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+    
+    for ( i = 0; i < NR_CPUS; i++ )
+        completed_bhs[i] = NULL;
+        
+    spin_lock_init(&io_schedule_list_lock);
+    INIT_LIST_HEAD(&io_schedule_list);
+
+    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
+                     SA_SHIRQ, "vblkif-backend-dbg", &blkif_debug_int) != 0 )
+        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
+
+    buffer_head_cachep = kmem_cache_create(
+        "buffer_head_cache", sizeof(struct buffer_head),
+        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+    return 0;
+}
+
+static void cleanup_module(void)
+{
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
new file mode 100644 (file)
index 0000000..89acb63
--- /dev/null
@@ -0,0 +1,701 @@
+/******************************************************************************
+ * arch/xen/drivers/vblkif/backend/vbd.c
+ * 
+ * Routines for managing virtual block devices (VBDs).
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+long __vbd_create(struct task_struct *p,
+                  unsigned short vdevice,
+                  unsigned char mode,
+                  unsigned char type)
+{
+    vbd_t *vbd; 
+    rb_node_t **rb_p, *rb_parent = NULL;
+    long ret = 0;
+
+    spin_lock(&p->vbd_lock);
+
+    rb_p = &p->vbd_rb.rb_node;
+    while ( *rb_p != NULL )
+    {
+        rb_parent = *rb_p;
+        vbd = rb_entry(rb_parent, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_left;
+        }
+        else if ( vdevice > vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_right;
+        }
+        else
+        {
+            DPRINTK("vbd_create attempted for already existing vbd\n");
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+
+    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_create: out of memory\n");
+        ret = -ENOMEM;
+        goto out;
+    }
+
+    vbd->vdevice = vdevice; 
+    vbd->mode    = mode; 
+    vbd->type    = type;
+    vbd->extents = NULL; 
+
+    rb_link_node(&vbd->rb, rb_parent, rb_p);
+    rb_insert_color(&vbd->rb, &p->vbd_rb);
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    return ret; 
+}
+
+
+long vbd_create(vbd_create_t *create) 
+{
+    struct task_struct *p;
+    long rc;
+
+    if ( unlikely(!IS_PRIV(current)) )
+        return -EPERM;
+
+    if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
+    {
+        DPRINTK("vbd_create attempted for non-existent domain %llu\n", 
+                create->domain); 
+        return -EINVAL; 
+    }
+
+    rc = __vbd_create(p, create->vdevice, create->mode,
+                      XD_TYPE_DISK | XD_FLAG_VIRT);
+
+    put_task_struct(p);
+
+    return rc;
+}
+
+
+long __vbd_grow(struct task_struct *p,
+                unsigned short vdevice,
+                xen_extent_t *extent)
+{
+    xen_extent_le_t **px, *x; 
+    vbd_t *vbd = NULL;
+    rb_node_t *rb;
+    long ret = 0;
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+    {
+        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
+        ret = -EINVAL;
+        goto out;
+    } 
+
+    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_grow: out of memory\n");
+        ret = -ENOMEM;
+        goto out;
+    }
+    x->extent.device       = extent->device; 
+    x->extent.start_sector = extent->start_sector; 
+    x->extent.nr_sectors   = extent->nr_sectors; 
+    x->next                = (xen_extent_le_t *)NULL; 
+
+    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
+        continue;
+
+    *px = x;
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    return ret;
+}
+
+
+/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
+long vbd_grow(vbd_grow_t *grow) 
+{
+    struct task_struct *p;
+    long rc;
+
+    if ( unlikely(!IS_PRIV(current)) )
+        return -EPERM; 
+
+    if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
+    {
+        DPRINTK("vbd_grow: attempted for non-existent domain %llu\n", 
+                grow->domain); 
+        return -EINVAL; 
+    }
+
+    rc = __vbd_grow(p, grow->vdevice, &grow->extent);
+
+    put_task_struct(p);
+
+    return rc;
+}
+
+
+long vbd_shrink(vbd_shrink_t *shrink)
+{
+    struct task_struct *p; 
+    xen_extent_le_t **px, *x; 
+    vbd_t *vbd = NULL;
+    rb_node_t *rb;
+    long ret = 0;
+
+    if ( !IS_PRIV(current) )
+        return -EPERM; 
+
+    if ( (p = find_domain_by_id(shrink->domain)) == NULL )
+    {
+        DPRINTK("vbd_shrink attempted for non-existent domain %llu\n", 
+                shrink->domain); 
+        return -EINVAL; 
+    }
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( shrink->vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( shrink->vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || 
+         unlikely(vbd->vdevice != shrink->vdevice) ||
+         unlikely(vbd->extents == NULL) )
+    {
+        DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n"); 
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Find the last extent. We now know that there is at least one. */
+    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
+        continue;
+
+    x   = *px;
+    *px = x->next;
+    kfree(x);
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p);
+    return ret; 
+}
+
+
+long vbd_setextents(vbd_setextents_t *setextents)
+{
+    struct task_struct *p; 
+    xen_extent_t e;
+    xen_extent_le_t *new_extents, *x, *t; 
+    vbd_t *vbd = NULL;
+    rb_node_t *rb;
+    int i;
+    long ret = 0;
+
+    if ( !IS_PRIV(current) )
+        return -EPERM; 
+
+    if ( (p = find_domain_by_id(setextents->domain)) == NULL )
+    {
+        DPRINTK("vbd_setextents attempted for non-existent domain %llu\n", 
+                setextents->domain); 
+        return -EINVAL; 
+    }
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( setextents->vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( setextents->vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || 
+         unlikely(vbd->vdevice != setextents->vdevice) )
+    {
+        DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n"); 
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Construct the new extent list. */
+    new_extents = NULL;
+    for ( i = setextents->nr_extents - 1; i >= 0; i-- )
+    {
+        if ( unlikely(copy_from_user(&e, 
+                                     &setextents->extents[i], 
+                                     sizeof(e)) != 0) )
+        {
+            DPRINTK("vbd_setextents: copy_from_user failed\n");
+            ret = -EFAULT;
+            goto free_and_out;
+        }
+        
+        if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL))
+                      == NULL) )
+        {
+            DPRINTK("vbd_setextents: out of memory\n");
+            ret = -ENOMEM;
+            goto free_and_out;
+        }
+        
+        x->extent = e;
+        x->next   = new_extents;
+
+        new_extents = x;
+    }
+
+    /* Delete the old extent list _after_ successfully creating the new. */
+    for ( x = vbd->extents; x != NULL; x = t )
+    {
+        t = x->next;
+        kfree(x);
+    }
+
+    /* Make the new list visible. */
+    vbd->extents = new_extents;
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p);
+    return ret;
+
+ free_and_out:
+    /* Failed part-way through the new list. Delete all that we managed. */
+    for ( x = new_extents; x != NULL; x = t )
+    {
+        t = x->next;
+        kfree(x);
+    }
+    goto out;
+}
+
+
+long vbd_delete(vbd_delete_t *delete) 
+{
+    struct task_struct *p; 
+    vbd_t *vbd;
+    rb_node_t *rb;
+    xen_extent_le_t *x, *t;
+
+    if( !IS_PRIV(current) )
+        return -EPERM; 
+
+    if ( (p = find_domain_by_id(delete->domain)) == NULL )
+    {
+        DPRINTK("vbd_delete attempted for non-existent domain %llu\n", 
+                delete->domain); 
+        return -EINVAL; 
+    }
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( delete->vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( delete->vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    DPRINTK("vbd_delete attempted for non-existing VBD.\n");
+
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p);
+    return -EINVAL;
+
+ found:
+    rb_erase(rb, &p->vbd_rb);
+    x = vbd->extents;
+    kfree(vbd);
+
+    while ( x != NULL )
+    {
+        t = x->next;
+        kfree(x);
+        x = t;
+    }
+    
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p);
+    return 0;
+}
+
+
+void destroy_all_vbds(struct task_struct *p)
+{
+    vbd_t *vbd;
+    rb_node_t *rb;
+    xen_extent_le_t *x, *t;
+
+    spin_lock(&p->vbd_lock);
+
+    while ( (rb = p->vbd_rb.rb_node) != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+
+        rb_erase(rb, &p->vbd_rb);
+        x = vbd->extents;
+        kfree(vbd);
+        
+        while ( x != NULL )
+        {
+            t = x->next;
+            kfree(x);
+            x = t;
+        }          
+    }
+
+    spin_unlock(&p->vbd_lock);
+}
+
+
+static int vbd_probe_single(xen_disk_info_t *xdi, 
+                            vbd_t *vbd, 
+                            struct task_struct *p)
+{
+    xen_extent_le_t *x; 
+    xen_disk_t cur_disk; 
+
+    if ( xdi->count == xdi->max )
+    {
+        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
+        return -ENOMEM; 
+    }
+
+    cur_disk.device = vbd->vdevice; 
+    cur_disk.info   = vbd->type;
+    if ( !VBD_CAN_WRITE(vbd) )
+        cur_disk.info |= XD_FLAG_RO; 
+    cur_disk.capacity = 0ULL;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+        cur_disk.capacity += x->extent.nr_sectors; 
+    cur_disk.domain = p->domain; 
+        
+    /* Now copy into relevant part of user-space buffer */
+    if( copy_to_user(&xdi->disks[xdi->count], 
+                     &cur_disk, 
+                     sizeof(xen_disk_t)) )
+    { 
+        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
+        return -EFAULT;
+    } 
+        
+    xdi->count++; 
+
+    return 0;
+}
+
+
+static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
+{
+    int rc = 0;
+    rb_node_t *rb;
+
+    spin_lock(&p->vbd_lock);
+
+    if ( (rb = p->vbd_rb.rb_node) == NULL )
+        goto out;
+
+ new_subtree:
+    /* STEP 1. Find least node (it'll be left-most). */
+    while ( rb->rb_left != NULL )
+        rb = rb->rb_left;
+
+    for ( ; ; )
+    {
+        /* STEP 2. Dealt with left subtree. Now process current node. */
+        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
+            goto out;
+
+        /* STEP 3. Process right subtree, if any. */
+        if ( rb->rb_right != NULL )
+        {
+            rb = rb->rb_right;
+            goto new_subtree;
+        }
+
+        /* STEP 4. Done both subtrees. Head back through ancesstors. */
+        for ( ; ; ) 
+        {
+            /* We're done when we get back to the root node. */
+            if ( rb->rb_parent == NULL )
+                goto out;
+            /* If we are left of parent, then parent is next to process. */
+            if ( rb->rb_parent->rb_left == rb )
+                break;
+            /* If we are right of parent, then we climb to grandparent. */
+            rb = rb->rb_parent;
+        }
+
+        rb = rb->rb_parent;
+    }
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    return rc;  
+}
+
+
+/*
+ * Return information about the VBDs available for a given domain, or for all 
+ * domains; in the general case the 'domain' argument will be 0 which means 
+ * "information about the caller"; otherwise the 'domain' argument will 
+ * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
+ * these cases require the caller to be privileged.
+ */
+long vbd_probe(vbd_probe_t *probe) 
+{
+    struct task_struct *p = NULL; 
+    unsigned long flags;
+    long ret = 0;  
+
+    if ( probe->domain != 0 )
+    { 
+        /* We can only probe for ourselves (unless we're privileged). */
+        if( (probe->domain != current->domain) && !IS_PRIV(current) )
+            return -EPERM; 
+
+        if ( (probe->domain != VBD_PROBE_ALL) &&
+             ((p = find_domain_by_id(probe->domain)) == NULL) )
+        {
+            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
+                    probe->domain); 
+            return -EINVAL; 
+        }
+    }
+    else
+    { 
+        /* Default is to probe for ourselves. */
+        p = current; 
+        get_task_struct(p); /* to mirror final put_task_struct */
+    }
+
+    if ( probe->domain == VBD_PROBE_ALL )
+    { 
+        read_lock_irqsave(&tasklist_lock, flags);
+        for_each_domain ( p )
+        {
+            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
+            { 
+                read_unlock_irqrestore(&tasklist_lock, flags);
+                goto out; 
+            }
+        }
+        read_unlock_irqrestore(&tasklist_lock, flags);
+    } 
+    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
+        goto out; 
+
+ out: 
+    if ( ret != 0 )
+        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
+    if ( p != NULL )
+        put_task_struct(p); 
+    return ret; 
+}
+
+
+long vbd_info(vbd_info_t *info) 
+{
+    struct task_struct *p; 
+    xen_extent_le_t *x; 
+    xen_extent_t *extents; 
+    vbd_t *vbd = NULL;
+    rb_node_t *rb;
+    long ret = 0;  
+   
+    if ( (info->domain != current->domain) && !IS_PRIV(current) )
+        return -EPERM; 
+
+    if ( (p = find_domain_by_id(info->domain)) == NULL )
+    {
+        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
+                info->domain); 
+        return -EINVAL; 
+    }
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( info->vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( info->vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
+    {
+        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
+        ret = -EINVAL; 
+        goto out; 
+    }
+
+    info->mode     = vbd->mode;
+    info->nextents = 0; 
+
+    extents = info->extents;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+    {
+        if ( info->nextents == info->maxextents )
+            break;
+        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
+        {
+            DPRINTK("vbd_info: copy_to_user failed\n");
+            ret = -EFAULT;
+            goto out; 
+        } 
+        extents++;
+        info->nextents++;
+    }
+
+ out: 
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p); 
+    return ret; 
+}
+
+
+int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
+{
+    xen_extent_le_t *x; 
+    vbd_t *vbd;
+    rb_node_t *rb;
+    xen_sector_t sec_off;
+    unsigned long nr_secs;
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( pseg->dev < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( pseg->dev > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    DPRINTK("vbd_translate; domain %llu attempted to access "
+            "non-existent VBD.\n", p->domain); 
+
+    spin_unlock(&p->vbd_lock);
+    return -ENODEV; 
+
+ found:
+
+    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
+         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
+    {
+        spin_unlock(&p->vbd_lock);
+        return -EACCES; 
+    }
+
+    /*
+     * Now iterate through the list of xen_extents, working out which should 
+     * be used to perform the translation.
+     */
+    sec_off = pseg->sector_number; 
+    nr_secs = pseg->nr_sects;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+    { 
+        if ( sec_off < x->extent.nr_sectors )
+        {
+            pseg->dev = x->extent.device; 
+            pseg->sector_number = x->extent.start_sector + sec_off;
+            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
+                goto overrun;
+            spin_unlock(&p->vbd_lock);
+            return 1;
+        } 
+        sec_off -= x->extent.nr_sectors; 
+    }
+
+    DPRINTK("vbd_translate: end of vbd.\n");
+    spin_unlock(&p->vbd_lock);
+    return -EACCES; 
+
+    /*
+     * Here we deal with overrun onto the following extent. We don't deal with 
+     * overrun of more than one boundary since each request is restricted to 
+     * 2^9 512-byte sectors, so it should be trivial for control software to 
+     * ensure that extents are large enough to prevent excessive overrun.
+     */
+ overrun:
+
+    /* Adjust length of first chunk to run to end of first extent. */
+    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
+
+    /* Set second chunk buffer and length to start where first chunk ended. */
+    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
+    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
+
+    /* Now move to the next extent. Check it exists and is long enough! */
+    if ( unlikely((x = x->next) == NULL) || 
+         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
+    {
+        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
+        spin_unlock(&p->vbd_lock);
+        return -EACCES;
+    }
+
+    /* Store the real device and start sector for the second chunk. */
+    pseg[1].dev           = x->extent.device;
+    pseg[1].sector_number = x->extent.start_sector;
+    
+    spin_unlock(&p->vbd_lock);
+    return 2;
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
new file mode 100644 (file)
index 0000000..b0e77ab
--- /dev/null
@@ -0,0 +1,26 @@
+/******************************************************************************
+ * arch/xen/drivers/vnetif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/vnetif/frontend
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+static int __init init_module(void)
+{
+    return 0;
+}
+
+static void cleanup_module(void)
+{
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
index 4002ae4c618ea761032a67539422ab9789090bd4..d21b0f90b57c87040f6ea3dcb9ffe07b5edf40dc 100644 (file)
@@ -15,8 +15,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <asm/ctrl_if.h>
-#include <asm/hypervisor.h>
-#include <asm/hypervisor-ifs/event_channel.h>
+#include <asm/evtchn.h>
 
 static int        ctrl_if_evtchn;
 static int        ctrl_if_irq;
@@ -50,10 +49,7 @@ static DECLARE_TASKLET(ctrl_if_rx_tasklet, __ctrl_if_rx_tasklet, 0);
 
 static void ctrl_if_notify_controller(void)
 {
-    evtchn_op_t evtchn_op;
-    evtchn_op.cmd = EVTCHNOP_send;
-    evtchn_op.u.send.local_port = ctrl_if_evtchn;
-    (void)HYPERVISOR_event_channel_op(&evtchn_op);
+    notify_via_evtchn(ctrl_if_evtchn);
 }
 
 static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
index b98e0cc6bb47e34faa1b1e2f3a640dbb0857e6d6..0337cae1ca34cb9bc41356eab83773a9d1c20c2c 100644 (file)
@@ -321,7 +321,11 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
                        u.ptr  = MMU_EXTENDED_COMMAND;
                        u.ptr |= (unsigned long)&default_ldt[0];
                        u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
-                       HYPERVISOR_mmu_update(&u, 1);
+                       if ( unlikely(HYPERVISOR_mmu_update(&u, 1) < 0) )
+                       {
+                               show_trace(NULL);
+                               panic("Failed to install default LDT");
+                       }
                        return;
                }
        }
index 39f6863d663b026e24ba530eba3d70cb243ed6d8..c6dc7105766c56e5d613b8aaa66ef843b8dbf47a 100644 (file)
@@ -116,7 +116,8 @@ static inline void __flush_page_update_queue(void)
 #endif
     idx = 0;
     wmb(); /* Make sure index is cleared first to avoid double updates. */
-    HYPERVISOR_mmu_update(update_queue, _idx);
+    if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx) < 0) )
+        panic("Failed to execute MMU updates");
 }
 
 void _flush_page_update_queue(void)
@@ -182,8 +183,8 @@ void queue_invlpg(unsigned long ptr)
     unsigned long flags;
     spin_lock_irqsave(&update_lock, flags);
     update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
-    update_queue[idx].val  = ptr & PAGE_MASK;
-    update_queue[idx].val |= MMUEXT_INVLPG;
+    update_queue[idx].ptr |= ptr & PAGE_MASK;
+    update_queue[idx].val  = MMUEXT_INVLPG;
     increment_index();
     spin_unlock_irqrestore(&update_lock, flags);
 }
index 7b1162de9c7651f265de3135f11e48a3d7b40253..665357d4bcc6dfb4cab757bad06ed48664f4d460 100644 (file)
@@ -31,10 +31,28 @@ static inline void direct_remap_area_pte(pte_t *pte,
                                          unsigned long address, 
                                          unsigned long size,
                                          unsigned long machine_addr, 
-                                         pgprot_t prot)
+                                         pgprot_t prot,
+                                         domid_t  domid)
 {
     unsigned long end;
 
+    mmu_update_t *u, *v;
+    u = v = vmalloc(3*PAGE_SIZE); /* plenty */
+
+    /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
+    if ( domid != 0 )
+    {
+        v[0].val  = (unsigned long)(domid<<16) & ~0xFFFFUL;
+        v[0].ptr  = (unsigned long)(domid<< 0) & ~0xFFFFUL;
+        v[1].val  = (unsigned long)(domid>>16) & ~0xFFFFUL;
+        v[1].ptr  = (unsigned long)(domid>>32) & ~0xFFFFUL;
+        v[0].ptr |= MMU_EXTENDED_COMMAND;
+        v[0].val |= MMUEXT_SET_SUBJECTDOM_L;
+        v[1].ptr |= MMU_EXTENDED_COMMAND;
+        v[1].val |= MMUEXT_SET_SUBJECTDOM_H;
+        v += 2;
+    }
+
     address &= ~PMD_MASK;
     end = address + size;
     if (end > PMD_SIZE)
@@ -46,11 +64,18 @@ static inline void direct_remap_area_pte(pte_t *pte,
             printk("direct_remap_area_pte: page already exists\n");
             BUG();
         }
-        set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); 
+        v->ptr = virt_to_machine(pte);
+        v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
+        v++;
         address += PAGE_SIZE;
         machine_addr += PAGE_SIZE;
         pte++;
     } while (address && (address < end));
+
+    if ( ((v-u) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
+        printk(KERN_WARNING "Failed to ioremap %08lx->%08lx (%08lx)\n",
+               end-size, end, machine_addr-size);
+    vfree(u);
 }
 
 static inline int direct_remap_area_pmd(struct mm_struct *mm,
@@ -58,7 +83,8 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm,
                                         unsigned long address, 
                                         unsigned long size,
                                         unsigned long machine_addr,
-                                        pgprot_t prot)
+                                        pgprot_t prot,
+                                        domid_t  domid)
 {
     unsigned long end;
 
@@ -74,7 +100,7 @@ static inline int direct_remap_area_pmd(struct mm_struct *mm,
         if (!pte)
             return -ENOMEM;
         direct_remap_area_pte(pte, address, end - address, 
-                              address + machine_addr, prot);
+                              address + machine_addr, prot, domid);
         address = (address + PMD_SIZE) & PMD_MASK;
         pmd++;
     } while (address && (address < end));
@@ -85,7 +111,8 @@ int direct_remap_area_pages(struct mm_struct *mm,
                             unsigned long address, 
                             unsigned long machine_addr,
                             unsigned long size, 
-                            pgprot_t prot)
+                            pgprot_t prot,
+                            domid_t  domid)
 {
     int error = 0;
     pgd_t * dir;
@@ -103,7 +130,7 @@ int direct_remap_area_pages(struct mm_struct *mm,
         if (!pmd)
             break;
         error = direct_remap_area_pmd(mm, pmd, address, end - address,
-                                      machine_addr + address, prot);
+                                      machine_addr + address, prot, domid);
         if (error)
             break;
         address = (address + PGDIR_SIZE) & PGDIR_MASK;
@@ -158,7 +185,7 @@ void * __ioremap(unsigned long machine_addr,
     prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | 
                     _PAGE_ACCESSED | flags);
     if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), 
-                                machine_addr, size, prot)) {
+                                machine_addr, size, prot, 0)) {
         vfree(addr);
         return NULL;
     }
index dbc10d638224cbff5375f1a702c8b3a743aacc02..1d3ec0fe0575c56ea3f3277f92b2c3c8899f8daa 100644 (file)
@@ -197,24 +197,11 @@ static inline int noncached_address(unsigned long addr)
 #endif
 }
 
+#if !defined(CONFIG_XEN)
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
 
-#if defined(CONFIG_XEN) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
-       if (!(start_info.flags & SIF_PRIVILEGED))
-               return -ENXIO;
-
-       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
-       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
-                            vma->vm_end-vma->vm_start, vma->vm_page_prot))
-               return -EAGAIN;
-       return 0;
-#elif defined(CONFIG_XEN)
-       return -ENXIO;
-#else
        /*
         * Accessing memory above the top the kernel knows about or
         * through a file pointer that was marked O_SYNC will be
@@ -236,8 +223,50 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
                             vma->vm_page_prot))
                return -EAGAIN;
        return 0;
-#endif
 }
+#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST)
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+       return -ENXIO;
+}
+#else
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+       domid_t domid;
+
+       if (!(start_info.flags & SIF_PRIVILEGED))
+               return -ENXIO;
+
+       domid = file->private_data ? *(domid_t *)file->private_data : 0;
+
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
+                               vma->vm_end-vma->vm_start, vma->vm_page_prot,
+                               domid))
+               return -EAGAIN;
+       return 0;
+}
+static int ioctl_mem(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+       if (file->private_data == NULL)
+               file->private_data = kmalloc(sizeof(domid_t), GFP_KERNEL);
+       switch (cmd) {
+       case _IO('M', 1): ((unsigned long *)file->private_data)[0]=arg; break;
+       case _IO('M', 2): ((unsigned long *)file->private_data)[1]=arg; break;
+       default: return -ENOSYS;
+       }
+       return 0;
+}
+static int release_mem(struct inode * inode, struct file * file)
+{
+       if (file->private_data != NULL)
+               kfree(file->private_data);
+       return 0;
+}
+#endif /* CONFIG_XEN */
 
 /*
  * This function reads the *virtual* memory as seen by the kernel.
@@ -426,10 +455,6 @@ static inline size_t read_zero_pagealigned(char * buf, size_t size)
                        goto out_up;
                if (vma->vm_flags & VM_SHARED)
                        break;
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
-               if (vma->vm_flags & VM_IO)
-                       break;
-#endif
                count = vma->vm_end - addr;
                if (count > size)
                        count = size;
@@ -615,10 +640,6 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
        unsigned long size = vma->vm_end - vma->vm_start;
 
-#if defined(CONFIG_XEN)
-       return -ENXIO;
-#endif
-
        /*
         * If the user is not attempting to mmap a high memory address then
         * the standard mmap_mem mechanism will work.  High memory addresses
@@ -663,13 +684,19 @@ static struct file_operations mem_fops = {
        write:          write_mem,
        mmap:           mmap_mem,
        open:           open_mem,
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       release:        release_mem,
+       ioctl:          ioctl_mem,
+#endif
 };
 
 static struct file_operations kmem_fops = {
        llseek:         memory_lseek,
        read:           read_kmem,
        write:          write_kmem,
+#if !defined(CONFIG_XEN)
        mmap:           mmap_kmem,
+#endif
        open:           open_kmem,
 };
 
@@ -715,12 +742,6 @@ static int memory_open(struct inode * inode, struct file * filp)
                        break;
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
                case 4:
-#if defined(CONFIG_XEN)
-#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
-                       if (!(start_info.flags & SIF_PRIVILEGED))
-#endif
-                               return -ENXIO;
-#endif
                        filp->f_op = &port_fops;
                        break;
 #endif
index 9d12487144a87132982a2ee702dd3cbf68cc67c1..f1d2b77c2e21773e0328187ebcc8fd8b96f40abf 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef __ASM_XEN__CTRL_IF_H__
 #define __ASM_XEN__CTRL_IF_H__
 
+#include <linux/tqueue.h>
 #include <asm/hypervisor.h>
 
 typedef control_msg_t ctrl_msg_t;
index ececad9447d59586d361e5fced486131e2970ff7..128d766a340c4d72aeff6cd9035e0501260952a9 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/hypervisor.h>
 #include <asm/ptrace.h>
 #include <asm/synch_bitops.h>
+#include <asm/hypervisor-ifs/event_channel.h>
 
 /*
  * LOW-LEVEL DEFINITIONS
@@ -62,6 +63,14 @@ static inline void clear_evtchn_exception(int port)
     synch_clear_bit(port, &s->evtchn_exception[0]);
 }
 
+static inline void notify_via_evtchn(int port)
+{
+    evtchn_op_t op;
+    op.cmd = EVTCHNOP_send;
+    op.u.send.local_port = port;
+    (void)HYPERVISOR_event_channel_op(&op);
+}
+
 /*
  * CHARACTER-DEVICE DEFINITIONS
  */
index e20f67e65188ba3d3503b1b1ed1f2d0e22945d7c..c454728c0e65b240da89d7b01ff93aa46e1eb32c 100644 (file)
@@ -161,13 +161,6 @@ static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count)
         : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), 
         "b" (req), "c" (count) : "memory" );
 
-    if ( unlikely(ret < 0) )
-    {
-        extern void show_trace(unsigned long *);
-        show_trace(NULL);
-        panic("Failed mmu update: %p, %d", req, count);
-    }
-
     return ret;
 }
 
index 308a1b7c402a979e69eed2c596c3697672f0b161..d853a3f2af155b4f69648d7f171342d78df650e0 100644 (file)
@@ -265,10 +265,15 @@ static inline void flush_tlb_pgtables(struct mm_struct *mm,
     XEN_flush_page_update_queue();
 }
 
+/*
+ * NB. The 'domid' field should be zero if mapping I/O space (non RAM).
+ * Otherwise it identifies the owner of the memory that is being mapped.
+ */
 extern int direct_remap_area_pages(struct mm_struct *mm,
                                    unsigned long address, 
                                    unsigned long machine_addr,
                                    unsigned long size, 
-                                   pgprot_t prot);
+                                   pgprot_t prot,
+                                   domid_t  domid);
 
 #endif /* _I386_PGALLOC_H */