bitkeeper revision 1.878 (408cde49uOK-SAzfb5CBmpUoC0yXOg)

author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)

committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)
author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)
committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)
diff --git a/.rootkeys b/.rootkeys

index f81687053e8caf754851b71d7a319c65159cb8ae..74cc58825a1f30d923c820543e7013e0ac3ec722 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
@@ -656,6 +656,18 @@
  40648526SxcA4lGIHB_k7ID8VlRSzw xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev
  3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/Makefile
  3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/balloon.c
+4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile
+4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile
+4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
+4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
+4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
+4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
+4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
+40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
+4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
+4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
+4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
+4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile
  3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
  3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.h
@@ -667,25 +679,13 @@
  3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/vfr.c
  40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/Makefile
  40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/evtchn.c
+4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile
+4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile
+4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
+405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile
+405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c
  3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.26-sparse/arch/xen/drivers/network/Makefile
  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
-4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
-4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
-4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
-4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
-4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
-4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
-4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
-4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
-4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
-4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
-4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
-40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h
-4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
-4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
-4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
-405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
-405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
  3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
  4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
  3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.26-sparse/arch/xen/kernel/entry.S
diff --git a/xenolinux-2.4.26-sparse/arch/xen/Makefile b/xenolinux-2.4.26-sparse/arch/xen/Makefile

index 3abb3c7421f6c5d8ab6677c2121d418ae44773a4..d799c003bb7d55759edddfa82c536b3d0e8c3456 100644 (file)
--- a/xenolinux-2.4.26-sparse/arch/xen/Makefile
+++ b/xenolinux-2.4.26-sparse/arch/xen/Makefile
@@ -52,8 +52,8 @@ SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
  SUBDIRS += arch/xen/drivers/console 
  SUBDIRS += arch/xen/drivers/evtchn
  ifdef CONFIG_XEN_NEWIO
-SUBDIRS += arch/xen/drivers/vblkif
-SUBDIRS += arch/xen/drivers/vnetif
+SUBDIRS += arch/xen/drivers/blkif
+SUBDIRS += arch/xen/drivers/netif
  else
  SUBDIRS += arch/xen/drivers/block
  SUBDIRS += arch/xen/drivers/network
@@ -67,8 +67,8 @@ CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o
  CORE_FILES += arch/xen/drivers/evtchn/drv.o
  CORE_FILES += arch/xen/drivers/console/drv.o
  ifdef CONFIG_XEN_NEWIO
-CORE_FILES += arch/xen/drivers/vblkif/drv.o
-CORE_FILES += arch/xen/drivers/vnetif/drv.o
+CORE_FILES += arch/xen/drivers/blkif/drv.o
+CORE_FILES += arch/xen/drivers/netif/drv.o
  else
  CORE_FILES += arch/xen/drivers/block/drv.o
  CORE_FILES += arch/xen/drivers/network/drv.o
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile

new file mode 100644 (file)

index 0000000..20c8192
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile
@@ -0,0 +1,10 @@
+
+O_TARGET := drv.o
+
+subdir-y += frontend
+obj-y    += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile

new file mode 100644 (file)

index 0000000..4c8c173
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o control.o interface.o vbd.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h

new file mode 100644 (file)

index 0000000..4895172
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/common.h
+ */
+
+#ifndef __BLKIF__BACKEND__COMMON_H__
+#define __BLKIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <asm/ctrl_if.h>
+#include <asm/io.h>
+#include "../blkif.h"
+
+#ifndef NDEBUG
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct blkif_st {
+    /* Unique identifier for this interface. */
+    domid_t          domid;
+    unsigned int     handle;
+    /* Physical parameters of the comms window. */
+    unsigned long    shmem_frame;
+    unsigned int     evtchn;
+    int              irq;
+    /* Comms information. */
+    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
+    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
+    /* VBDs attached to this interface. */
+    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
+    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
+    /* Private fields. */
+    struct blkif_st *hash_next;
+    struct list_head blkdev_list;
+    spinlock_t       blk_ring_lock;
+} blkif_t;
+
+void blkif_create(blkif_create_t *create);
+void blkif_destroy(blkif_destroy_t *destroy);
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_get(blkif_t *blkif);
+void blkif_put(blkif_t *blkif);
+
+/* An entry in a list of xen_extents. */
+typedef struct _blkif_extent_le { 
+    blkif_extent_t extent;               /* an individual extent */
+    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
+} blkif_extent_le_t; 
+
+typedef struct _vbd { 
+    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
+    unsigned char      mode;      /* VBD_MODE_{R,W} */
+    unsigned char      type;      /* XD_TYPE_xxx */
+    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
+    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
+} vbd_t; 
+
+long vbd_create(blkif_vbd_create_t *create_params); 
+long vbd_grow(blkif_vbd_grow_t *grow_params); 
+long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
+long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
+
+void destroy_all_vbds(struct task_struct *p);
+
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    atomic_t       pendcnt;
+    unsigned short operation;
+    unsigned short status;
+} pending_req_t;
+
+/* Describes a [partial] disk extent (part of a block io request) */
+typedef struct {
+    unsigned short dev;
+    unsigned short nr_sects;
+    unsigned long  buffer;
+    xen_sector_t   sector_number;
+} phys_seg_t;
+
+int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
+
+int blkif_be_controller_init(void);
+
+void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c

new file mode 100644 (file)

index 0000000..c7ef10c
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/control.c
+ * 
+ * Routines for interfacing with the control plane.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->subtype )
+    {
+    case CMSG_BLKIF_BE_CREATE:
+        if ( msg->length != sizeof(blkif_create_t) )
+            goto parse_error;
+        blkif_create((blkif_create_t *)&msg->msg[0]);
+        break;        
+    case CMSG_BLKIF_BE_DESTROY:
+        if ( msg->length != sizeof(blkif_destroy_t) )
+            goto parse_error;
+        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
+        break;        
+    case CMSG_BLKIF_BE_VBD_CREATE:
+        if ( msg->length != sizeof(blkif_vbd_create_t) )
+            goto parse_error;
+        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_DESTROY:
+        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
+            goto parse_error;
+        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_GROW:
+        if ( msg->length != sizeof(blkif_vbd_grow_t) )
+            goto parse_error;
+        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
+        break;
+    case CMSG_BLKIF_BE_VBD_SHRINK:
+        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
+            goto parse_error;
+        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
+        break;
+    default:
+        goto parse_error;
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+int blkif_ctrlif_init(void)
+{
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
+    return 0;
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c

new file mode 100644 (file)

index 0000000..579795d
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
@@ -0,0 +1,96 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ * 
+ * Block-device interface management.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) \
+    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif != NULL) && 
+            (blkif->domid != domid) && 
+            (blkif->handle != handle) )
+        blkif = blkif->hash_next;
+    return blkif;
+}
+
+void blkif_create(blkif_create_t *create)
+{
+    domid_t       domid  = create->domid;
+    unsigned int  handle = create->blkif_handle;
+    unsigned int  evtchn = create->evtchn;
+    unsigned long shmem_frame = create->shmem_frame;
+    blkif_t     **pblkif, *blkif;
+
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( *pblkif == NULL )
+    {
+        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+            goto found_match;
+        pblkif = &(*pblkif)->hash_next;
+    }
+
+    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid       = domid;
+    blkif->handle      = handle;
+    blkif->evtchn      = evtchn;
+    blkif->irq         = bind_evtchn_to_irq(evtchn);
+    blkif->shmem_frame = shmem_frame;
+    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
+    spin_lock_init(&blkif->vbd_lock);
+    spin_lock_init(&blkif->blk_ring_lock);
+
+    request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif);
+
+    blkif->hash_next = *pblkif;
+    *pblkif = blkif;
+
+    create->status = BLKIF_STATUS_OKAY;
+    return;
+
+ found_match:
+    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
+    return;
+
+ evtchn_in_use:
+    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
+    create->status = BLKIF_STATUS_ERROR;
+    return;
+}
+
+void blkif_destroy(blkif_destroy_t *destroy)
+{
+    domid_t       domid  = destroy->domid;
+    unsigned int  handle = destroy->blkif_handle;
+    blkif_t     **pblkif, *blkif;
+
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif = *pblkif) == NULL )
+    {
+        if ( (blkif->domid == domid) && (blkif->handle == handle) )
+            goto found_match;
+        pblkif = &blkif->hash_next;
+    }
+
+    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+    return;
+
+ found_match:
+    free_irq(blkif->irq, NULL);
+    unbind_evtchn_from_irq(blkif->evtchn);
+    *pblkif = blkif->hash_next;
+    kmem_cache_free(blkif_cachep, blkif);
+    destroy->status = BLKIF_STATUS_OKAY;
+}
+
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c

new file mode 100644 (file)

index 0000000..1e6190c
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
@@ -0,0 +1,508 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/blkif/frontend
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ * 
+ * We can't allocate pending_req's in order, since they may complete out of 
+ * order. We therefore maintain an allocation ring. This ring also indicates 
+ * when enough work has been passed down -- at that point the allocation ring 
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
+/* NB. We use a different index type to differentiate from shared blk rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static kmem_cache_t *buffer_head_cachep;
+
+static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
+
+static int lock_buffer(blkif_t *blkif,
+                       unsigned long buffer,
+                       unsigned short size,
+                       int writeable_buffer);
+static void unlock_buffer(unsigned long buffer,
+                          unsigned short size,
+                          int writeable_buffer);
+
+static void io_schedule(unsigned long unused);
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                 blk_ring_req_entry_t *req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, unsigned long st);
+
+
+/******************************************************************
+ * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head io_schedule_list;
+static spinlock_t io_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) )
+    {
+        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
+
+static void io_schedule(unsigned long unused)
+{
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    /* Queue up a batch of requests. */
+    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+            !list_empty(&io_schedule_list) )
+    {
+        ent = io_schedule_list.next;
+        blkif = list_entry(ent, blkif_t, blkdev_list);
+        blkif_get(blkif);
+        remove_from_blkdev_list(blkif);
+        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+            add_to_blkdev_list_tail(blkif);
+        blkif_put(blkif);
+    }
+
+    /* Push the batch through to disc. */
+    run_task_queue(&tq_disk);
+}
+
+static void maybe_trigger_io_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+         !list_empty(&io_schedule_list) )
+        tasklet_schedule(&io_schedule_tasklet);
+}
+
+
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void end_block_io_op(struct buffer_head *bh, int uptodate)
+{
+    pending_req_t *pending_req = bh->b_private;
+
+    /* An error fails the entire request. */
+    if ( !uptodate )
+    {
+        DPRINTK("Buffer not up-to-date at end of operation\n");
+        pending_req->status = 2;
+    }
+
+    unlock_buffer(virt_to_phys(bh->b_data), 
+                  bh->b_size, 
+                  (pending_req->operation==READ));
+    
+    if ( atomic_dec_and_test(&pending_req->pendcnt) )
+    {
+        make_response(pending_req->blkif, pending_req->id,
+                      pending_req->operation, pending_req->status);
+        blkif_put(pending_req->blkif);
+        spin_lock(&pend_prod_lock);
+        pending_ring[MASK_PEND_IDX(pending_prod)] = 
+            pending_req - pending_reqs;
+        pending_prod++;
+        spin_unlock(&pend_prod_lock);
+        maybe_trigger_io_schedule();
+    }
+}
+
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_io_schedule();
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int lock_buffer(blkif_t *blkif,
+                       unsigned long buffer,
+                       unsigned short size,
+                       int writeable_buffer)
+{
+    unsigned long    pfn;
+
+    for ( pfn = buffer >> PAGE_SHIFT; 
+          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+          pfn++ )
+    {
+    }
+
+    return 1;
+
+ fail:
+    while ( pfn-- > (buffer >> PAGE_SHIFT) )
+    {        
+    }
+    return 0;
+}
+
+static void unlock_buffer(unsigned long buffer,
+                          unsigned short size,
+                          int writeable_buffer)
+{
+    unsigned long pfn;
+
+    for ( pfn = buffer >> PAGE_SHIFT; 
+          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+          pfn++ )
+    {
+    }
+}
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    blk_ring_t *blk_ring = blkif->blk_ring_base;
+    blk_ring_req_entry_t *req;
+    BLK_RING_IDX i;
+    int more_to_do = 0;
+
+    /* Take items off the comms ring, taking care not to overflow. */
+    for ( i = blkif->blk_req_cons; 
+          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
+                                        BLK_RING_SIZE);
+          i++ )
+    {
+        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
+        switch ( req->operation )
+        {
+        case BLKIF_OP_READ:
+        case BLKIF_OP_WRITE:
+            dispatch_rw_block_io(blkif, req);
+            break;
+
+        default:
+            DPRINTK("error: unknown block io operation [%d]\n",
+                    blk_ring->ring[i].req.operation);
+            make_response(blkif, blk_ring->ring[i].req.id, 
+                          blk_ring->ring[i].req.operation, 1);
+            break;
+        }
+    }
+
+    blkif->blk_req_cons = i;
+    return more_to_do;
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                 blk_ring_req_entry_t *req)
+{
+    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+    struct buffer_head *bh;
+    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
+    unsigned short nr_sects;
+    unsigned long buffer;
+    int i, tot_sects;
+    pending_req_t *pending_req;
+
+    /* We map virtual scatter/gather segments to physical segments. */
+    int new_segs, nr_psegs = 0;
+    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
+
+    /* Check that number of segments is sane. */
+    if ( unlikely(req->nr_segments == 0) || 
+         unlikely(req->nr_segments > MAX_BLK_SEGS) )
+    {
+        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
+        goto bad_descriptor;
+    }
+
+    /*
+     * Check each address/size pair is sane, and convert into a
+     * physical device and block offset. Note that if the offset and size
+     * crosses a virtual extent boundary, we may end up with more
+     * physical scatter/gather segments than virtual segments.
+     */
+    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
+    {
+        buffer   = req->buffer_and_sects[i] & ~0x1FF;
+        nr_sects = req->buffer_and_sects[i] &  0x1FF;
+
+        if ( unlikely(nr_sects == 0) )
+        {
+            DPRINTK("zero-sized data request\n");
+            goto bad_descriptor;
+        }
+
+        phys_seg[nr_psegs].dev           = req->device;
+        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
+        phys_seg[nr_psegs].buffer        = buffer;
+        phys_seg[nr_psegs].nr_sects      = nr_sects;
+
+        /* Translate the request into the relevant 'physical device' */
+        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
+        if ( new_segs < 0 )
+        { 
+            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                    operation == READ ? "read" : "write", 
+                    req->sector_number + tot_sects, 
+                    req->sector_number + tot_sects + nr_sects, 
+                    req->device); 
+            goto bad_descriptor;
+        }
+  
+        nr_psegs += new_segs;
+        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
+    }
+
+    for ( i = 0; i < nr_psegs; i++ )
+    {
+        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
+                                   phys_seg[i].nr_sects << 9,
+                                   operation==READ)) )
+        {
+            DPRINTK("invalid buffer\n");
+            while ( i-- > 0 )
+                unlock_buffer(phys_seg[i].buffer, 
+                              phys_seg[i].nr_sects << 9,
+                              operation==READ);
+            goto bad_descriptor;
+        }
+    }
+
+    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
+    pending_req->blkif     = blkif;
+    pending_req->id        = req->id;
+    pending_req->operation = operation;
+    pending_req->status    = 0;
+    atomic_set(&pending_req->pendcnt, nr_psegs);
+
+    blkif_get(blkif);
+
+    /* Now we pass each segment down to the real blkdev layer. */
+    for ( i = 0; i < nr_psegs; i++ )
+    {
+        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
+        if ( unlikely(bh == NULL) )
+            panic("bh is null\n");
+        memset(bh, 0, sizeof (struct buffer_head));
+    
+        bh->b_size          = phys_seg[i].nr_sects << 9;
+        bh->b_dev           = phys_seg[i].dev;
+        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
+
+        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
+           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
+        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
+ 
+        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
+        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
+        bh->b_end_io        = end_block_io_op;
+        bh->b_private       = pending_req;
+
+        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
+        if ( operation == WRITE )
+            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
+
+        atomic_set(&bh->b_count, 1);
+
+        /* Dispatch a single request. We'll flush it to disc later. */
+        submit_bh(operation, bh);
+    }
+
+    return;
+
+ bad_descriptor:
+    make_response(blkif, req->id, req->operation, 1);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, unsigned long st)
+{
+    blk_ring_resp_entry_t *resp;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock(&blkif->blk_ring_lock);
+    resp = &blkif->blk_ring_base->
+        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
+    resp->id        = id;
+    resp->operation = op;
+    resp->status    = st;
+    wmb();
+    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
+    spin_unlock(&blkif->blk_ring_lock);
+
+    /* Kick the relevant domain. */
+    notify_via_evtchn(blkif->evtchn);
+}
+
+static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
+{
+#if 0
+    unsigned long flags;
+    struct task_struct *p;
+    blk_ring_t *blk_ring;
+    int i;
+
+    printk("Dumping block queue stats: nr_pending = %d"
+           " (prod=0x%08x,cons=0x%08x)\n",
+           NR_PENDING_REQS, pending_prod, pending_cons);
+
+    read_lock_irqsave(&tasklist_lock, flags);
+    for_each_domain ( p )
+    {
+        printk("Domain: %llu\n", blkif->domain);
+        blk_ring = blkif->blk_ring_base;
+        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
+               "0x%08x on_list=%d\n",
+               blk_ring->req_prod, blkif->blk_req_cons,
+               blk_ring->resp_prod, blkif->blk_resp_prod,
+               __on_blkdev_list(p));
+    }
+    read_unlock_irqrestore(&tasklist_lock, flags);
+
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+    {
+        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
+               i, pending_reqs[i].domain, pending_reqs[i].id,
+               atomic_read(&pending_reqs[i].pendcnt), 
+               pending_reqs[i].operation, pending_reqs[i].status);
+    }
+#endif
+}
+
+void unlink_blkdev_info(blkif_t *blkif)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&io_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = (void *)0xdeadbeef;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
+}
+
+static int __init init_module(void)
+{
+    int i;
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    memset(pending_reqs, 0, sizeof(pending_reqs));
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+    
+    for ( i = 0; i < NR_CPUS; i++ )
+        completed_bhs[i] = NULL;
+        
+    spin_lock_init(&io_schedule_list_lock);
+    INIT_LIST_HEAD(&io_schedule_list);
+
+    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
+                     SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 )
+        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
+
+    buffer_head_cachep = kmem_cache_create(
+        "buffer_head_cache", sizeof(struct buffer_head),
+        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+    return 0;
+}
+
+static void cleanup_module(void)
+{
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c

new file mode 100644 (file)

index 0000000..bd6c401
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
@@ -0,0 +1,578 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/vbd.c
+ * 
+ * Routines for managing virtual block devices (VBDs).
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+void vbd_create(blkif_vbd_create_t *create) 
+{
+    vbd_t       *vbd; 
+    rb_node_t  **rb_p, *rb_parent = NULL;
+    blkif_t     *blkif;
+    blkif_vdev_t vdevice = create->vdevice;
+
+    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
+                create->domid, create->blkif_handle); 
+        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    spin_lock(&blkif->vbd_lock);
+
+    rb_p = &blkif->vbd_rb.rb_node;
+    while ( *rb_p != NULL )
+    {
+        rb_parent = *rb_p;
+        vbd = rb_entry(rb_parent, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_left;
+        }
+        else if ( vdevice > vbd->vdevice )
+        {
+            rb_p = &rb_parent->rb_right;
+        }
+        else
+        {
+            DPRINTK("vbd_create attempted for already existing vbd\n");
+            create->status = BLKIF_STATUS_VBD_EXISTS;
+            goto out;
+        }
+    }
+
+    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_create: out of memory\n");
+        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
+        goto out;
+    }
+
+    vbd->vdevice = vdevice; 
+    vbd->mode    = create->mode; 
+    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
+    vbd->extents = NULL; 
+
+    rb_link_node(&vbd->rb, rb_parent, rb_p);
+    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+
+    create->status = BLKIF_STATUS_OKAY;
+
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    blkif_put(blkif);
+}
+
+
+/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
+void vbd_grow(blkif_vbd_grow_t *grow) 
+{
+    blkif_t          *blkif;
+    xen_extent_le_t **px, *x; 
+    vbd_t            *vbd = NULL;
+    rb_node_t        *rb;
+    blkif_vdev_t      vdevice = grow->vdevice;
+
+    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
+                grow->domid, grow->blkif_handle); 
+        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    spin_lock(&blkif->vbd_lock);
+
+    rb = blkif->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+    {
+        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
+        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
+        goto out;
+    } 
+
+    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_grow: out of memory\n");
+        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
+        goto out;
+    }
+ 
+    x->extent.device        = grow->extent.device; 
+    x->extent.sector_start  = grow->extent.sector_start; 
+    x->extent.sector_length = grow->extent.sector_length; 
+    x->next                 = (xen_extent_le_t *)NULL; 
+
+    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
+        continue;
+
+    *px = x;
+
+    grow->status = BLKIF_STATUS_OKAY;
+
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    blkif_put(blkif);
+}
+
+
+void vbd_shrink(blkif_vbd_shrink_t *shrink)
+{
+    blkif_t          *blkif;
+    xen_extent_le_t **px, *x; 
+    vbd_t            *vbd = NULL;
+    rb_node_t        *rb;
+    blkif_vdev_t      vdevice = shrink->vdevice;
+
+    blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
+                shrink->domid, shrink->blkif_handle); 
+        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    spin_lock(&blkif->vbd_lock);
+
+    rb = blkif->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+    {
+        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
+        goto out;
+    }
+
+    if ( unlikely(vbd->extents == NULL) )
+    {
+        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
+        goto out;
+    }
+
+    /* Find the last extent. We now know that there is at least one. */
+    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
+        continue;
+
+    x   = *px;
+    *px = x->next;
+    kfree(x);
+
+    shrink->status = BLKIF_STATUS_OKAY;
+
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    blkif_put(blkif);
+}
+
+
+void vbd_destroy(blkif_vbd_destroy_t *destroy) 
+{
+    blkif_t         *blkif;
+    vbd_t           *vbd;
+    rb_node_t       *rb;
+    xen_extent_le_t *x, *t;
+    blkif_vdev_t     vdevice = destroy->vdevice;
+
+    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
+                destroy->domid, destroy->blkif_handle); 
+        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    spin_lock(&blkif->vbd_lock);
+
+    rb = blkif->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
+    goto out;
+
+ found:
+    rb_erase(rb, &blkif->vbd_rb);
+    x = vbd->extents;
+    kfree(vbd);
+
+    while ( x != NULL )
+    {
+        t = x->next;
+        kfree(x);
+        x = t;
+    }
+    
+ out:
+    spin_unlock(&blkif->vbd_lock);
+    blkif_put(blkif);
+}
+
+
+void destroy_all_vbds(blkif_t *blkif)
+{
+    vbd_t *vbd;
+    rb_node_t *rb;
+    xen_extent_le_t *x, *t;
+
+    spin_lock(&blkif->vbd_lock);
+
+    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+
+        rb_erase(rb, &blkif->vbd_rb);
+        x = vbd->extents;
+        kfree(vbd);
+        
+        while ( x != NULL )
+        {
+            t = x->next;
+            kfree(x);
+            x = t;
+        }          
+    }
+
+    spin_unlock(&blkif->vbd_lock);
+}
+
+
+static int vbd_probe_single(xen_disk_info_t *xdi, 
+                            vbd_t *vbd, 
+                            struct task_struct *p)
+{
+    xen_extent_le_t *x; 
+    xen_disk_t cur_disk; 
+
+    if ( xdi->count == xdi->max )
+    {
+        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
+        return -ENOMEM; 
+    }
+
+    cur_disk.device = vbd->vdevice; 
+    cur_disk.info   = vbd->type;
+    if ( !VBD_CAN_WRITE(vbd) )
+        cur_disk.info |= XD_FLAG_RO; 
+    cur_disk.capacity = 0ULL;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+        cur_disk.capacity += x->extent.nr_sectors; 
+    cur_disk.domain = p->domain; 
+        
+    /* Now copy into relevant part of user-space buffer */
+    if( copy_to_user(&xdi->disks[xdi->count], 
+                     &cur_disk, 
+                     sizeof(xen_disk_t)) )
+    { 
+        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
+        return -EFAULT;
+    } 
+        
+    xdi->count++; 
+
+    return 0;
+}
+
+
+static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
+{
+    int rc = 0;
+    rb_node_t *rb;
+
+    spin_lock(&p->vbd_lock);
+
+    if ( (rb = p->vbd_rb.rb_node) == NULL )
+        goto out;
+
+ new_subtree:
+    /* STEP 1. Find least node (it'll be left-most). */
+    while ( rb->rb_left != NULL )
+        rb = rb->rb_left;
+
+    for ( ; ; )
+    {
+        /* STEP 2. Dealt with left subtree. Now process current node. */
+        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
+            goto out;
+
+        /* STEP 3. Process right subtree, if any. */
+        if ( rb->rb_right != NULL )
+        {
+            rb = rb->rb_right;
+            goto new_subtree;
+        }
+
+        /* STEP 4. Done both subtrees. Head back through ancesstors. */
+        for ( ; ; ) 
+        {
+            /* We're done when we get back to the root node. */
+            if ( rb->rb_parent == NULL )
+                goto out;
+            /* If we are left of parent, then parent is next to process. */
+            if ( rb->rb_parent->rb_left == rb )
+                break;
+            /* If we are right of parent, then we climb to grandparent. */
+            rb = rb->rb_parent;
+        }
+
+        rb = rb->rb_parent;
+    }
+
+ out:
+    spin_unlock(&p->vbd_lock);
+    return rc;  
+}
+
+
+/*
+ * Return information about the VBDs available for a given domain, or for all 
+ * domains; in the general case the 'domain' argument will be 0 which means 
+ * "information about the caller"; otherwise the 'domain' argument will 
+ * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
+ * these cases require the caller to be privileged.
+ */
+long vbd_probe(vbd_probe_t *probe) 
+{
+    struct task_struct *p = NULL; 
+    unsigned long flags;
+    long ret = 0;  
+
+    if ( probe->domain != 0 )
+    { 
+        /* We can only probe for ourselves (unless we're privileged). */
+        if( (probe->domain != current->domain) && !IS_PRIV(current) )
+            return -EPERM; 
+
+        if ( (probe->domain != VBD_PROBE_ALL) &&
+             ((p = find_domain_by_id(probe->domain)) == NULL) )
+        {
+            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
+                    probe->domain); 
+            return -EINVAL; 
+        }
+    }
+    else
+    { 
+        /* Default is to probe for ourselves. */
+        p = current; 
+        get_task_struct(p); /* to mirror final put_task_struct */
+    }
+
+    if ( probe->domain == VBD_PROBE_ALL )
+    { 
+        read_lock_irqsave(&tasklist_lock, flags);
+        for_each_domain ( p )
+        {
+            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
+            { 
+                read_unlock_irqrestore(&tasklist_lock, flags);
+                goto out; 
+            }
+        }
+        read_unlock_irqrestore(&tasklist_lock, flags);
+    } 
+    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
+        goto out; 
+
+ out: 
+    if ( ret != 0 )
+        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
+    if ( p != NULL )
+        put_task_struct(p); 
+    return ret; 
+}
+
+
+long vbd_info(vbd_info_t *info) 
+{
+    struct task_struct *p; 
+    xen_extent_le_t *x; 
+    xen_extent_t *extents; 
+    vbd_t *vbd = NULL;
+    rb_node_t *rb;
+    long ret = 0;  
+   
+    if ( (info->domain != current->domain) && !IS_PRIV(current) )
+        return -EPERM; 
+
+    if ( (p = find_domain_by_id(info->domain)) == NULL )
+    {
+        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
+                info->domain); 
+        return -EINVAL; 
+    }
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( info->vdevice < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( info->vdevice > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            break;
+    }
+
+    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
+    {
+        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
+        ret = -EINVAL; 
+        goto out; 
+    }
+
+    info->mode     = vbd->mode;
+    info->nextents = 0; 
+
+    extents = info->extents;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+    {
+        if ( info->nextents == info->maxextents )
+            break;
+        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
+        {
+            DPRINTK("vbd_info: copy_to_user failed\n");
+            ret = -EFAULT;
+            goto out; 
+        } 
+        extents++;
+        info->nextents++;
+    }
+
+ out: 
+    spin_unlock(&p->vbd_lock);
+    put_task_struct(p); 
+    return ret; 
+}
+
+
+int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
+{
+    xen_extent_le_t *x; 
+    vbd_t *vbd;
+    rb_node_t *rb;
+    xen_sector_t sec_off;
+    unsigned long nr_secs;
+
+    spin_lock(&p->vbd_lock);
+
+    rb = p->vbd_rb.rb_node;
+    while ( rb != NULL )
+    {
+        vbd = rb_entry(rb, vbd_t, rb);
+        if ( pseg->dev < vbd->vdevice )
+            rb = rb->rb_left;
+        else if ( pseg->dev > vbd->vdevice )
+            rb = rb->rb_right;
+        else
+            goto found;
+    }
+
+    DPRINTK("vbd_translate; domain %llu attempted to access "
+            "non-existent VBD.\n", p->domain); 
+
+    spin_unlock(&p->vbd_lock);
+    return -ENODEV; 
+
+ found:
+
+    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
+         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
+    {
+        spin_unlock(&p->vbd_lock);
+        return -EACCES; 
+    }
+
+    /*
+     * Now iterate through the list of xen_extents, working out which should 
+     * be used to perform the translation.
+     */
+    sec_off = pseg->sector_number; 
+    nr_secs = pseg->nr_sects;
+    for ( x = vbd->extents; x != NULL; x = x->next )
+    { 
+        if ( sec_off < x->extent.nr_sectors )
+        {
+            pseg->dev = x->extent.device; 
+            pseg->sector_number = x->extent.start_sector + sec_off;
+            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
+                goto overrun;
+            spin_unlock(&p->vbd_lock);
+            return 1;
+        } 
+        sec_off -= x->extent.nr_sectors; 
+    }
+
+    DPRINTK("vbd_translate: end of vbd.\n");
+    spin_unlock(&p->vbd_lock);
+    return -EACCES; 
+
+    /*
+     * Here we deal with overrun onto the following extent. We don't deal with 
+     * overrun of more than one boundary since each request is restricted to 
+     * 2^9 512-byte sectors, so it should be trivial for control software to 
+     * ensure that extents are large enough to prevent excessive overrun.
+     */
+ overrun:
+
+    /* Adjust length of first chunk to run to end of first extent. */
+    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
+
+    /* Set second chunk buffer and length to start where first chunk ended. */
+    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
+    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
+
+    /* Now move to the next extent. Check it exists and is long enough! */
+    if ( unlikely((x = x->next) == NULL) || 
+         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
+    {
+        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
+        spin_unlock(&p->vbd_lock);
+        return -EACCES;
+    }
+
+    /* Store the real device and start sector for the second chunk. */
+    pseg[1].dev           = x->extent.device;
+    pseg[1].sector_number = x->extent.start_sector;
+    
+    spin_unlock(&p->vbd_lock);
+    return 2;
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h

new file mode 100644 (file)

index 0000000..f6e8a4d
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+ * blkif.h
+ * 
+ * Unified block-device I/O interface for Xen guest OSes.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __SHARED_BLKIF_H__
+#define __SHARED_BLKIF_H__
+
+#define blkif_vdev_t   u16
+#define blkif_sector_t u64
+
+#define BLKIF_OP_READ      0
+#define BLKIF_OP_WRITE     1
+#define BLKIF_OP_PROBE     2
+
+/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
+#define BLKIF_RING_SIZE        64
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_REQUEST_MAX_SEGMENTS 11
+
+typedef struct {
+    unsigned char  operation;        /* BLKIF_OP_???                         */
+    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
+    blkif_vdev_t   device;           /* only for read/write requests         */
+    unsigned long  id;               /* private guest value, echoed in resp  */
+    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
+    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
+    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
+} blkif_request_t;
+
+typedef struct {
+    unsigned long   id;              /* copied from request */
+    unsigned char   operation;       /* copied from request */
+    int             status;          /* BLKIF_RSP_???       */
+} blkif_response_t;
+
+#define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
+#define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
+
+/*
+ * We use a special capitalised type name because it is _essential_ that all 
+ * arithmetic on indexes is done on an integer type of the correct size.
+ */
+typedef unsigned int BLKIF_RING_IDX;
+
+/*
+ * Ring indexes are 'free running'. That is, they are not stored modulo the
+ * size of the ring buffer. The following macro converts a free-running counter
+ * into a value that can directly index a ring-buffer array.
+ */
+#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
+
+typedef struct {
+    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
+    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
+    union {
+        blkif_request_t  req;
+        blkif_response_t resp;
+    } ring[BLKIF_RING_SIZE];
+} blkif_ring_t;
+
+
+/*
+ * BLKIF_OP_PROBE:
+ * The request format for a probe request is constrained as follows:
+ *  @operation   == BLKIF_OP_PROBE
+ *  @nr_segments == size of probe buffer in pages
+ *  @device      == unused (zero)
+ *  @id          == any value (echoed in response message)
+ *  @sector_num  == unused (zero)
+ *  @buffer_and_sects == list of page-aligned, page-sized buffers.
+ *                       (i.e., nr_sects == 8).
+ * 
+ * The response is a list of vdisk_t elements copied into the out-of-band
+ * probe buffer. On success the response status field contains the number
+ * of vdisk_t elements.
+ */
+
+/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
+#define VDISK_TYPE_FLOPPY  0x00
+#define VDISK_TYPE_TAPE    0x01
+#define VDISK_TYPE_CDROM   0x05
+#define VDISK_TYPE_OPTICAL 0x07
+#define VDISK_TYPE_DISK    0x20 
+
+#define VDISK_TYPE_MASK    0x3F
+#define VDISK_TYPE(_x)     ((_x) & VDISK_TYPE_MASK) 
+
+/* The top two bits of the type field encode various flags. */
+#define VDISK_FLAG_RO      0x40
+#define VDISK_FLAG_VIRT    0x80
+#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
+#define VDISK_VIRTUAL(_x)  ((_x) & VDISK_FLAG_VIRT) 
+
+typedef struct {
+    blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
+    unsigned short info;         /* Device type and flags (VDISK_*).     */
+    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
+} vdisk_t;
+
+#endif /* __SHARED_BLKIF_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile

new file mode 100644 (file)

index 0000000..35986ca
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := block.o vbd.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c

new file mode 100644 (file)

index 0000000..d00dd98
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
@@ -0,0 +1,625 @@
+/******************************************************************************
+ * block.c
+ * 
+ * Xenolinux virtual block-device driver.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "block.h"
+#include <linux/blk.h>
+#include <linux/cdrom.h>
+#include <linux/tqueue.h>
+#include <linux/sched.h>
+#include <scsi/scsi.h>
+
+#include <linux/interrupt.h>
+
+typedef unsigned char byte; /* from linux/ide.h */
+
+#define STATE_ACTIVE    0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED    2
+static unsigned int state = STATE_SUSPENDED;
+
+/* Dynamically-mapped IRQs. */
+static int xlblk_response_irq, xlblk_update_irq;
+
+static blk_ring_t *blk_ring;
+static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
+static BLK_RING_IDX req_prod;  /* Private request producer.         */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
+                      (state != STATE_ACTIVE))
+
+
+/*
+ * Request queues with outstanding work, but ring is currently full.
+ * We need no special lock here, as we always access this with the
+ * io_request_lock held. We only need a small maximum list.
+ */
+#define MAX_PENDING 8
+static request_queue_t *pending_queues[MAX_PENDING];
+static int nr_pending;
+
+static kdev_t        sg_dev;
+static int           sg_operation = -1;
+static unsigned long sg_next_sect;
+#define DISABLE_SCATTERGATHER() (sg_operation = -1)
+
+static inline void signal_requests_to_xen(void)
+{
+    block_io_op_t op; 
+
+    DISABLE_SCATTERGATHER();
+    blk_ring->req_prod = req_prod;
+
+    op.cmd = BLOCK_IO_OP_SIGNAL; 
+    HYPERVISOR_block_io_op(&op);
+    return;
+}
+
+
+/*
+ * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
+ * 
+ * Schedule a task for keventd to run, which will update the VBDs and perform 
+ * the corresponding updates to our view of VBD state, so the XenoLinux will 
+ * respond to changes / additions / deletions to the set of VBDs automatically.
+ */
+static struct tq_struct update_tq;
+static void update_vbds_task(void *unused)
+{ 
+    xlvbd_update_vbds();
+}
+static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    update_tq.routine = update_vbds_task;
+    schedule_task(&update_tq);
+}
+
+
+int xen_block_open(struct inode *inode, struct file *filep)
+{
+    short xldev = inode->i_rdev; 
+    struct gendisk *gd = get_gendisk(xldev);
+    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+    short minor = MINOR(xldev); 
+
+    if ( gd->part[minor].nr_sects == 0 )
+    { 
+        /*
+         * Device either doesn't exist, or has zero capacity; we use a few
+         * cheesy heuristics to return the relevant error code
+         */
+        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
+             ((minor & (gd->max_p - 1)) != 0) )
+        { 
+            /*
+             * We have a real device, but no such partition, or we just have a
+             * partition number so guess this is the problem.
+             */
+            return -ENXIO;     /* no such device or address */
+        }
+        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
+        {
+            /* This is a removable device => assume that media is missing. */ 
+            return -ENOMEDIUM; /* media not present (this is a guess) */
+        } 
+        else
+        { 
+            /* Just go for the general 'no such device' error. */
+            return -ENODEV;    /* no such device */
+        }
+    }
+    
+    /* Update of usage count is protected by per-device semaphore. */
+    disk->usage++;
+
+    return 0;
+}
+
+
+int xen_block_release(struct inode *inode, struct file *filep)
+{
+    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+
+    /*
+     * When usage drops to zero it may allow more VBD updates to occur.
+     * Update of usage count is protected by a per-device semaphore.
+     */
+    if ( --disk->usage == 0 )
+    {
+        update_tq.routine = update_vbds_task;
+        schedule_task(&update_tq);
+    }
+
+    return 0;
+}
+
+
+int xen_block_ioctl(struct inode *inode, struct file *filep,
+                          unsigned command, unsigned long argument)
+{
+    kdev_t dev = inode->i_rdev;
+    struct hd_geometry *geo = (struct hd_geometry *)argument;
+    struct gendisk *gd;     
+    struct hd_struct *part; 
+    int i;
+
+    /* NB. No need to check permissions. That is done for us. */
+    
+    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+                  command, (long) argument, dev); 
+  
+    gd = get_gendisk(dev);
+    part = &gd->part[MINOR(dev)]; 
+
+    switch ( command )
+    {
+    case BLKGETSIZE:
+        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
+        return put_user(part->nr_sects, (unsigned long *) argument);
+
+    case BLKGETSIZE64:
+        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
+                      (u64)part->nr_sects * 512);
+        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
+
+    case BLKRRPART:                               /* re-read partition table */
+        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
+        return xen_block_revalidate(dev);
+
+    case BLKSSZGET:
+        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
+
+    case BLKBSZGET:                                        /* get block size */
+        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
+        break;
+
+    case BLKBSZSET:                                        /* set block size */
+        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
+        break;
+
+    case BLKRASET:                                         /* set read-ahead */
+        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
+        break;
+
+    case BLKRAGET:                                         /* get read-ahead */
+        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
+        break;
+
+    case HDIO_GETGEO:
+        /* note: these values are complete garbage */
+        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
+        if (!argument) return -EINVAL;
+        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
+        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
+        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
+        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
+        return 0;
+
+    case HDIO_GETGEO_BIG: 
+        /* note: these values are complete garbage */
+        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
+        if (!argument) return -EINVAL;
+        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
+        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
+        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
+        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
+        return 0;
+
+    case CDROMMULTISESSION:
+        DPRINTK("FIXME: support multisession CDs later\n");
+        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
+            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
+        return 0;
+
+    case SCSI_IOCTL_GET_BUS_NUMBER:
+        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
+        return -ENOSYS;
+
+    default:
+        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
+        return -ENOSYS;
+    }
+    
+    return 0;
+}
+
+/* check media change: should probably do something here in some cases :-) */
+int xen_block_check(kdev_t dev)
+{
+    DPRINTK("xen_block_check\n");
+    return 0;
+}
+
+int xen_block_revalidate(kdev_t dev)
+{
+    struct block_device *bd;
+    struct gendisk *gd;
+    xl_disk_t *disk;
+    unsigned long capacity;
+    int i, rc = 0;
+    
+    if ( (bd = bdget(dev)) == NULL )
+        return -EINVAL;
+
+    /*
+     * Update of partition info, and check of usage count, is protected
+     * by the per-block-device semaphore.
+     */
+    down(&bd->bd_sem);
+
+    if ( ((gd = get_gendisk(dev)) == NULL) ||
+         ((disk = xldev_to_xldisk(dev)) == NULL) ||
+         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ( disk->usage > 1 )
+    {
+        rc = -EBUSY;
+        goto out;
+    }
+
+    /* Only reread partition table if VBDs aren't mapped to partitions. */
+    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
+    {
+        for ( i = gd->max_p - 1; i >= 0; i-- )
+        {
+            invalidate_device(dev+i, 1);
+            gd->part[MINOR(dev+i)].start_sect = 0;
+            gd->part[MINOR(dev+i)].nr_sects   = 0;
+            gd->sizes[MINOR(dev+i)]           = 0;
+        }
+
+        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
+    }
+
+ out:
+    up(&bd->bd_sem);
+    bdput(bd);
+    return rc;
+}
+
+
+/*
+ * hypervisor_request
+ *
+ * request block io 
+ * 
+ * id: for guest use only.
+ * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
+ * buffer: buffer to read/write into. this should be a
+ *   virtual address in the guest os.
+ */
+static int hypervisor_request(unsigned long   id,
+                              int             operation,
+                              char *          buffer,
+                              unsigned long   sector_number,
+                              unsigned short  nr_sectors,
+                              kdev_t          device)
+{
+    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
+    struct gendisk *gd;
+    blk_ring_req_entry_t *req;
+    struct buffer_head *bh;
+
+    if ( unlikely(nr_sectors >= (1<<9)) )
+        BUG();
+    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
+        BUG();
+
+    if ( unlikely(state == STATE_CLOSED) )
+        return 1;
+
+    switch ( operation )
+    {
+
+    case XEN_BLOCK_READ:
+    case XEN_BLOCK_WRITE:
+        gd = get_gendisk(device); 
+
+        /*
+         * Update the sector_number we'll pass down as appropriate; note that
+         * we could sanity check that resulting sector will be in this
+         * partition, but this will happen in xen anyhow.
+         */
+        sector_number += gd->part[MINOR(device)].start_sect;
+
+        /*
+         * If this unit doesn't consist of virtual (i.e., Xen-specified)
+         * partitions then we clear the partn bits from the device number.
+         */
+        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
+               GENHD_FL_VIRT_PARTNS) )
+            device &= ~(gd->max_p - 1);
+
+        if ( (sg_operation == operation) &&
+             (sg_dev == device) &&
+             (sg_next_sect == sector_number) )
+        {
+            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
+            bh = (struct buffer_head *)id;
+            bh->b_reqnext = (struct buffer_head *)req->id;
+            req->id = id;
+            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
+            if ( ++req->nr_segments < MAX_BLK_SEGS )
+                sg_next_sect += nr_sectors;
+            else
+                DISABLE_SCATTERGATHER();
+            return 0;
+        }
+        else if ( RING_PLUGGED )
+        {
+            return 1;
+        }
+        else
+        {
+            sg_operation = operation;
+            sg_dev       = device;
+            sg_next_sect = sector_number + nr_sectors;
+        }
+        break;
+
+    default:
+        panic("unknown op %d\n", operation);
+    }
+
+    /* Fill out a communications ring structure. */
+    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
+    req->id            = id;
+    req->operation     = operation;
+    req->sector_number = (xen_sector_t)sector_number;
+    req->device        = device; 
+    req->nr_segments   = 1;
+    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
+    req_prod++;
+
+    return 0;
+}
+
+
+/*
+ * do_xlblk_request
+ *  read a block; request is in a request queue
+ */
+void do_xlblk_request(request_queue_t *rq)
+{
+    struct request *req;
+    struct buffer_head *bh, *next_bh;
+    int rw, nsect, full, queued = 0;
+
+    DPRINTK("xlblk.c::do_xlblk_request\n"); 
+
+    while ( !rq->plugged && !list_empty(&rq->queue_head))
+    {
+        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
+            goto out;
+  
+        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
+                req, req->cmd, req->sector,
+                req->current_nr_sectors, req->nr_sectors, req->bh);
+
+        rw = req->cmd;
+        if ( rw == READA )
+            rw = READ;
+        if ( unlikely((rw != READ) && (rw != WRITE)) )
+            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
+
+        req->errors = 0;
+
+        bh = req->bh;
+        while ( bh != NULL )
+        {
+            next_bh = bh->b_reqnext;
+            bh->b_reqnext = NULL;
+
+            full = hypervisor_request(
+                (unsigned long)bh,
+                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
+                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
+
+            if ( full )
+            { 
+                bh->b_reqnext = next_bh;
+                pending_queues[nr_pending++] = rq;
+                if ( unlikely(nr_pending >= MAX_PENDING) )
+                    BUG();
+                goto out; 
+            }
+
+            queued++;
+
+            /* Dequeue the buffer head from the request. */
+            nsect = bh->b_size >> 9;
+            bh = req->bh = next_bh;
+            
+            if ( bh != NULL )
+            {
+                /* There's another buffer head to do. Update the request. */
+                req->hard_sector += nsect;
+                req->hard_nr_sectors -= nsect;
+                req->sector = req->hard_sector;
+                req->nr_sectors = req->hard_nr_sectors;
+                req->current_nr_sectors = bh->b_size >> 9;
+                req->buffer = bh->b_data;
+            }
+            else
+            {
+                /* That was the last buffer head. Finalise the request. */
+                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
+                    BUG();
+                blkdev_dequeue_request(req);
+                end_that_request_last(req);
+            }
+        }
+    }
+
+ out:
+    if ( queued != 0 ) signal_requests_to_xen();
+}
+
+
+static void kick_pending_request_queues(void)
+{
+    /* We kick pending request queues if the ring is reasonably empty. */
+    if ( (nr_pending != 0) && 
+         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
+    {
+        /* Attempt to drain the queue, but bail if the ring becomes full. */
+        while ( (nr_pending != 0) && !RING_PLUGGED )
+            do_xlblk_request(pending_queues[--nr_pending]);
+    }
+}
+
+
+static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    BLK_RING_IDX i; 
+    unsigned long flags; 
+    struct buffer_head *bh, *next_bh;
+    
+    if ( unlikely(state == STATE_CLOSED) )
+        return;
+    
+    spin_lock_irqsave(&io_request_lock, flags);     
+
+    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
+    {
+        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
+        switch ( bret->operation )
+        {
+        case XEN_BLOCK_READ:
+        case XEN_BLOCK_WRITE:
+            if ( unlikely(bret->status != 0) )
+                DPRINTK("Bad return from blkdev data request: %lx\n",
+                        bret->status);
+            for ( bh = (struct buffer_head *)bret->id; 
+                  bh != NULL; 
+                  bh = next_bh )
+            {
+                next_bh = bh->b_reqnext;
+                bh->b_reqnext = NULL;
+                bh->b_end_io(bh, !bret->status);
+            }
+            break;
+     
+        default:
+            BUG();
+        }
+    }
+    
+    resp_cons = i;
+
+    kick_pending_request_queues();
+
+    spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+static void reset_xlblk_interface(void)
+{
+    block_io_op_t op; 
+
+    nr_pending = 0;
+
+    op.cmd = BLOCK_IO_OP_RESET;
+    if ( HYPERVISOR_block_io_op(&op) != 0 )
+        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
+
+    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
+    (void)HYPERVISOR_block_io_op(&op);
+
+    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
+    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
+    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+
+    wmb();
+    state = STATE_ACTIVE;
+}
+
+
+int __init xlblk_init(void)
+{
+    int error; 
+
+    reset_xlblk_interface();
+
+    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
+    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
+
+    error = request_irq(xlblk_response_irq, xlblk_response_int, 
+                        SA_SAMPLE_RANDOM, "blkdev", NULL);
+    if ( error )
+    {
+        printk(KERN_ALERT "Could not allocate receive interrupt\n");
+        goto fail;
+    }
+
+    error = request_irq(xlblk_update_irq, xlblk_update_int,
+                        0, "blkdev", NULL);
+
+    if ( error )
+    {
+        printk(KERN_ALERT "Could not allocate block update interrupt\n");
+        goto fail;
+    }
+
+    (void)xlvbd_init();
+
+    return 0;
+
+ fail:
+    return error;
+}
+
+
+static void __exit xlblk_cleanup(void)
+{
+    xlvbd_cleanup();
+    free_irq(xlblk_response_irq, NULL);
+    free_irq(xlblk_update_irq, NULL);
+    unbind_virq_from_irq(VIRQ_BLKDEV);
+    unbind_virq_from_irq(VIRQ_VBD_UPD);
+}
+
+
+#ifdef MODULE
+module_init(xlblk_init);
+module_exit(xlblk_cleanup);
+#endif
+
+
+void blkdev_suspend(void)
+{
+    state = STATE_SUSPENDED;
+    wmb();
+
+    while ( resp_cons != blk_ring->req_prod )
+    {
+        barrier();
+        current->state = TASK_INTERRUPTIBLE;
+        schedule_timeout(1);
+    }
+
+    wmb();
+    state = STATE_CLOSED;
+    wmb();
+
+    clear_fixmap(FIX_BLKRING_BASE);
+}
+
+
+void blkdev_resume(void)
+{
+    reset_xlblk_interface();
+    spin_lock_irq(&io_request_lock);
+    kick_pending_request_queues();
+    spin_unlock_irq(&io_request_lock);
+}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h

new file mode 100644 (file)

index 0000000..e41e039
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
@@ -0,0 +1,82 @@
+/******************************************************************************
+ * block.h
+ * 
+ * Shared definitions between all levels of XenoLinux Virtual block devices.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/hypervisor-ifs/vbd.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+/* Private gendisk->flags[] values. */
+#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
+#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.
+ * They hang in an array off the gendisk structure. We may end up putting
+ * all kinds of interesting stuff here :-)
+ */
+typedef struct xl_disk {
+    int usage;
+} xl_disk_t;
+
+extern int xen_control_msg(int operration, char *buffer, int size);
+extern int xen_block_open(struct inode *inode, struct file *filep);
+extern int xen_block_release(struct inode *inode, struct file *filep);
+extern int xen_block_ioctl(struct inode *inode, struct file *filep,
+                                 unsigned command, unsigned long argument);
+extern int xen_block_check(kdev_t dev);
+extern int xen_block_revalidate(kdev_t dev);
+extern void do_xlblk_request (request_queue_t *rq); 
+
+extern void xlvbd_update_vbds(void);
+
+static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
+{
+    struct gendisk *gd = get_gendisk(xldev);
+    
+    if ( gd == NULL ) 
+        return NULL;
+    
+    return (xl_disk_t *)gd->real_devices + 
+        (MINOR(xldev) >> gd->minor_shift);
+}
+
+
+/* Virtual block-device subsystem. */
+extern int  xlvbd_init(void);
+extern void xlvbd_cleanup(void); 
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c

new file mode 100644 (file)

index 0000000..e08b976
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -0,0 +1,561 @@
+/******************************************************************************
+ * vbd.c
+ * 
+ * Xenolinux virtual block-device driver (xvd).
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ */
+
+#include "block.h"
+#include <linux/blk.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few 
+ * other places (like default readahead, etc).
+ */
+#define XLIDE_MAJOR_NAME  "hd"
+#define XLSCSI_MAJOR_NAME "sd"
+#define XLVBD_MAJOR_NAME "xvd"
+
+#define XLIDE_DEVS_PER_MAJOR   2
+#define XLSCSI_DEVS_PER_MAJOR 16
+#define XLVBD_DEVS_PER_MAJOR  16
+
+#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
+#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
+
+#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
+#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
+
+#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
+#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
+
+/* The below are for the generic drivers/block/ll_rw_block.c code. */
+static int xlide_blksize_size[256];
+static int xlide_hardsect_size[256];
+static int xlide_max_sectors[256];
+static int xlscsi_blksize_size[256];
+static int xlscsi_hardsect_size[256];
+static int xlscsi_max_sectors[256];
+static int xlvbd_blksize_size[256];
+static int xlvbd_hardsect_size[256];
+static int xlvbd_max_sectors[256];
+
+/* Information from Xen about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static xen_disk_t *vbd_info;
+
+static struct block_device_operations xlvbd_block_fops = 
+{
+    open:               xen_block_open,
+    release:            xen_block_release,
+    ioctl:              xen_block_ioctl,
+    check_media_change: xen_block_check,
+    revalidate:         xen_block_revalidate,
+};
+
+static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
+{
+    int error;
+    block_io_op_t op; 
+
+    /* Probe for disk information. */
+    memset(&op, 0, sizeof(op)); 
+    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
+    op.u.probe_params.domain    = 0; 
+    op.u.probe_params.xdi.max   = MAX_VBDS;
+    op.u.probe_params.xdi.disks = disk_info;
+    op.u.probe_params.xdi.count = 0;
+
+    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
+    {
+        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
+        return -1;
+    }
+
+    return op.u.probe_params.xdi.count;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk:              a xen_disk_t describing the VBD
+ *
+ * Takes a xen_disk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur.  Also, devices that are in use should not have
+ * their details updated.  This is the caller's responsibility.
+ */
+static int xlvbd_init_device(xen_disk_t *xd)
+{
+    int device = xd->device;
+    int major  = MAJOR(device); 
+    int minor  = MINOR(device);
+    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
+    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
+    char *major_name;
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk;
+    int i, rc = 0, max_part, partno;
+    unsigned long capacity;
+
+    unsigned char buf[64];
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    /*
+     * Update of partition info, and check of usage count, is protected
+     * by the per-block-device semaphore.
+     */
+    down(&bd->bd_sem);
+
+    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
+    {
+        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+
+    if ( is_ide ) {
+
+       major_name = XLIDE_MAJOR_NAME; 
+       max_part   = XLIDE_MAX_PART;
+
+    } else if ( is_scsi ) {
+
+       major_name = XLSCSI_MAJOR_NAME;
+       max_part   = XLSCSI_MAX_PART;
+
+    } else if (XD_VIRTUAL(xd->info)) {
+
+       major_name = XLVBD_MAJOR_NAME;
+       max_part   = XLVBD_MAX_PART;
+
+    } else { 
+
+        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
+       printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
+              major, minor);
+       is_scsi    = 1; 
+       major_name = "cciss"; 
+       max_part   = XLSCSI_MAX_PART;
+
+    }
+    
+    partno = minor & (max_part - 1); 
+    
+    if ( (gd = get_gendisk(device)) == NULL )
+    {
+        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
+        if ( rc < 0 )
+        {
+            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
+            goto out;
+        }
+
+        if ( is_ide )
+        { 
+            blksize_size[major]  = xlide_blksize_size;
+            hardsect_size[major] = xlide_hardsect_size;
+            max_sectors[major]   = xlide_max_sectors;
+            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
+        } 
+        else if ( is_scsi )
+        { 
+            blksize_size[major]  = xlscsi_blksize_size;
+            hardsect_size[major] = xlscsi_hardsect_size;
+            max_sectors[major]   = xlscsi_max_sectors;
+            read_ahead[major]    = 0; /* XXX 8; -- guessing */
+        }
+        else
+        { 
+            blksize_size[major]  = xlvbd_blksize_size;
+            hardsect_size[major] = xlvbd_hardsect_size;
+            max_sectors[major]   = xlvbd_max_sectors;
+            read_ahead[major]    = 8;
+        }
+
+        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
+
+        /*
+         * Turn off barking 'headactive' mode. We dequeue buffer heads as
+         * soon as we pass them down to Xen.
+         */
+        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
+
+        /* Construct an appropriate gendisk structure. */
+        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
+        gd->major      = major;
+        gd->major_name = major_name; 
+    
+        gd->max_p      = max_part; 
+        if ( is_ide )
+        { 
+            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
+            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
+        } 
+        else if ( is_scsi )
+        { 
+            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
+            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
+        }
+        else
+        { 
+            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
+            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
+        }
+
+        /* 
+        ** The sizes[] and part[] arrays hold the sizes and other 
+        ** information about every partition with this 'major' (i.e. 
+        ** every disk sharing the 8 bit prefix * max partns per disk) 
+        */
+        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
+        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
+                            GFP_KERNEL);
+        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
+        memset(gd->part,  0, max_part * gd->nr_real 
+               * sizeof(struct hd_struct));
+
+
+        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
+                                   GFP_KERNEL);
+        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
+
+        gd->next   = NULL;            
+        gd->fops   = &xlvbd_block_fops;
+
+        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
+                             GFP_KERNEL);
+        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
+    
+        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
+        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
+
+        add_gendisk(gd);
+
+        blk_size[major] = gd->sizes;
+    }
+
+    if ( XD_READONLY(xd->info) )
+        set_device_ro(device, 1); 
+
+    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
+
+    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
+    capacity = (unsigned long)xd->capacity;
+
+    if ( partno != 0 )
+    {
+        /*
+         * If this was previously set up as a real disc we will have set 
+         * up partition-table information. Virtual partitions override 
+         * 'real' partitions, and the two cannot coexist on a device.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(max_part-1)] != 0) )
+        {
+            /*
+             * Any non-zero sub-partition entries must be cleaned out before
+             * installing 'virtual' partition entries. The two types cannot
+             * coexist, and virtual partitions are favoured.
+             */
+            kdev_t dev = device & ~(max_part-1);
+            for ( i = max_part - 1; i > 0; i-- )
+            {
+                invalidate_device(dev+i, 1);
+                gd->part[MINOR(dev+i)].start_sect = 0;
+                gd->part[MINOR(dev+i)].nr_sects   = 0;
+                gd->sizes[MINOR(dev+i)]           = 0;
+            }
+            printk(KERN_ALERT
+                   "Virtual partitions found for /dev/%s - ignoring any "
+                   "real partition information we may have found.\n",
+                   disk_name(gd, MINOR(device), buf));
+        }
+
+        /* Need to skankily setup 'partition' information */
+        gd->part[minor].start_sect = 0; 
+        gd->part[minor].nr_sects   = capacity; 
+        gd->sizes[minor]           = capacity; 
+
+        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+    }
+    else
+    {
+        gd->part[minor].nr_sects = capacity;
+        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
+        
+        /* Some final fix-ups depending on the device type */
+        switch ( XD_TYPE(xd->info) )
+        { 
+        case XD_TYPE_CDROM:
+        case XD_TYPE_FLOPPY: 
+        case XD_TYPE_TAPE:
+            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
+            printk(KERN_ALERT 
+                   "Skipping partition check on %s /dev/%s\n", 
+                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
+                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
+                    "floppy"), disk_name(gd, MINOR(device), buf)); 
+            break; 
+
+        case XD_TYPE_DISK:
+            /* Only check partitions on real discs (not virtual!). */
+            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            {
+                printk(KERN_ALERT
+                       "Skipping partition check on virtual /dev/%s\n",
+                       disk_name(gd, MINOR(device), buf));
+                break;
+            }
+            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
+            break; 
+
+        default:
+            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
+                   XD_TYPE(xd->info)); 
+            break; 
+        }
+    }
+
+ out:
+    up(&bd->bd_sem);
+    bdput(bd);    
+    return rc;
+}
+
+
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device:       numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+    int i, rc = 0, minor = MINOR(device);
+    struct gendisk *gd;
+    struct block_device *bd;
+    xl_disk_t *disk = NULL;
+
+    if ( (bd = bdget(device)) == NULL )
+        return -1;
+
+    /*
+     * Update of partition info, and check of usage count, is protected
+     * by the per-block-device semaphore.
+     */
+    down(&bd->bd_sem);
+
+    if ( ((gd = get_gendisk(device)) == NULL) ||
+         ((disk = xldev_to_xldisk(device)) == NULL) )
+        BUG();
+
+    if ( disk->usage != 0 )
+    {
+        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+        rc = -1;
+        goto out;
+    }
+ 
+    if ( (minor & (gd->max_p-1)) != 0 )
+    {
+        /* 1: The VBD is mapped to a partition rather than a whole unit. */
+        invalidate_device(device, 1);
+       gd->part[minor].start_sect = 0;
+        gd->part[minor].nr_sects   = 0;
+        gd->sizes[minor]           = 0;
+
+        /* Clear the consists-of-virtual-partitions flag if possible. */
+        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+        for ( i = 1; i < gd->max_p; i++ )
+            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+        /*
+         * If all virtual partitions are now gone, and a 'whole unit' VBD is
+         * present, then we can try to grok the unit's real partition table.
+         */
+        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+        {
+            register_disk(gd,
+                          device&~(gd->max_p-1), 
+                          gd->max_p, 
+                          &xlvbd_block_fops,
+                          gd->part[minor&~(gd->max_p-1)].nr_sects);
+        }
+    }
+    else
+    {
+        /*
+         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+         * NB. The partition entries are only cleared if there are no VBDs
+         * mapped to individual partitions on this unit.
+         */
+        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+        while ( i >= 0 )
+        {
+            invalidate_device(device+i, 1);
+            gd->part[minor+i].start_sect = 0;
+            gd->part[minor+i].nr_sects   = 0;
+            gd->sizes[minor+i]           = 0;
+            i--;
+        }
+    }
+
+ out:
+    up(&bd->bd_sem);
+    bdput(bd);
+    return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+    int i, j, k, old_nr, new_nr;
+    xen_disk_t *old_info, *new_info, *merged_info;
+
+    old_info = vbd_info;
+    old_nr   = nr_vbds;
+
+    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+    {
+        kfree(new_info);
+        return;
+    }
+
+    /*
+     * Final list maximum size is old list + new list. This occurs only when
+     * old list and new list do not overlap at all, and we cannot yet destroy
+     * VBDs in the old list because the usage counts are busy.
+     */
+    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
+
+    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+    i = j = k = 0;
+
+    while ( (i < old_nr) && (j < new_nr) )
+    {
+        if ( old_info[i].device < new_info[j].device )
+        {
+            if ( xlvbd_remove_device(old_info[i].device) != 0 )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+            i++;
+        }
+        else if ( old_info[i].device > new_info[j].device )
+        {
+            if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+            j++;
+        }
+        else
+        {
+            if ( ((old_info[i].capacity == new_info[j].capacity) &&
+                  (old_info[i].info == new_info[j].info)) ||
+                 (xlvbd_remove_device(old_info[i].device) != 0) )
+                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+            else if ( xlvbd_init_device(&new_info[j]) == 0 )
+                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+            i++; j++;
+        }
+    }
+
+    for ( ; i < old_nr; i++ )
+    {
+        if ( xlvbd_remove_device(old_info[i].device) != 0 )
+            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+    }
+
+    for ( ; j < new_nr; j++ )
+    {
+        if ( xlvbd_init_device(&new_info[j]) == 0 )
+            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+    }
+
+    vbd_info = merged_info;
+    nr_vbds  = k;
+
+    kfree(old_info);
+    kfree(new_info);
+}
+
+
+/*
+ * Set up all the linux device goop for the virtual block devices (vbd's) that 
+ * xen tells us about. Note that although from xen's pov VBDs are addressed 
+ * simply an opaque 16-bit device number, the domain creation tools 
+ * conventionally allocate these numbers to correspond to those used by 'real' 
+ * linux -- this is just for convenience as it means e.g. that the same 
+ * /etc/fstab can be used when booting with or without xen.
+ */
+int __init xlvbd_init(void)
+{
+    int i;
+    
+    /*
+     * If compiled as a module, we don't support unloading yet. We therefore 
+     * permanently increment the reference count to disallow it.
+     */
+    SET_MODULE_OWNER(&xlvbd_block_fops);
+    MOD_INC_USE_COUNT;
+
+    /* Initialize the global arrays. */
+    for ( i = 0; i < 256; i++ ) 
+    {
+        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
+        xlide_blksize_size[i]  = 1024;
+        xlide_hardsect_size[i] = 512;
+        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
+
+        /* from the generic scsi disk code (drivers/scsi/sd.c) */
+        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
+        xlscsi_hardsect_size[i] = 512;
+        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
+
+        /* we don't really know what to set these too since it depends */
+        xlvbd_blksize_size[i]  = 512;
+        xlvbd_hardsect_size[i] = 512;
+        xlvbd_max_sectors[i]   = 128;
+    }
+
+    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
+
+    if ( nr_vbds < 0 )
+    {
+        kfree(vbd_info);
+        vbd_info = NULL;
+        nr_vbds  = 0;
+    }
+    else
+    {
+        for ( i = 0; i < nr_vbds; i++ )
+            xlvbd_init_device(&vbd_info[i]);
+    }
+
+    return 0;
+}
+
+
+#ifdef MODULE
+module_init(xlvbd_init);
+#endif
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile

new file mode 100644 (file)

index 0000000..20c8192
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile
@@ -0,0 +1,10 @@
+
+O_TARGET := drv.o
+
+subdir-y += frontend
+obj-y    += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile

new file mode 100644 (file)

index 0000000..032d02d
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c

new file mode 100644 (file)

index 0000000..dac8624
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
@@ -0,0 +1,26 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/netif/frontend
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+static int __init init_module(void)
+{
+    return 0;
+}
+
+static void cleanup_module(void)
+{
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile

new file mode 100644 (file)

index 0000000..032d02d
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c

new file mode 100644 (file)

index 0000000..f2c36f1
--- /dev/null
+++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c
@@ -0,0 +1,565 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/frontend/main.c
+ * 
+ * Virtual network driver for XenoLinux.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
+
+static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+static void cleanup_module(void);
+
+/* Dynamically-mapped IRQs. */
+static int network_irq, debug_irq;
+
+static struct list_head dev_list;
+
+struct net_private
+{
+    struct list_head list;
+    struct net_device *dev;
+
+    struct net_device_stats stats;
+    NET_RING_IDX rx_resp_cons, tx_resp_cons;
+    unsigned int net_ring_fixmap_idx, tx_full;
+    net_ring_t  *net_ring;
+    net_idx_t   *net_idx;
+    spinlock_t   tx_lock;
+    unsigned int idx; /* Domain-specific index of this VIF. */
+
+    unsigned int rx_bufs_to_notify;
+
+#define STATE_ACTIVE    0
+#define STATE_SUSPENDED 1
+#define STATE_CLOSED    2
+    unsigned int state;
+
+    /*
+     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+     * array is an index into a chain of free entries.
+     */
+    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
+    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
+};
+
+/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id)             \
+    (_list)[(_id)] = (_list)[0];                   \
+    (_list)[0]     = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list)                \
+ ({ unsigned long _id = (unsigned long)(_list)[0]; \
+    (_list)[0]  = (_list)[_id];                    \
+    (unsigned short)_id; })
+
+
+static void _dbg_network_int(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+
+    if ( np->state == STATE_CLOSED )
+        return;
+    
+    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
+           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
+           " tx_event=0x%08x, state=%d\n",
+           np->tx_full, np->tx_resp_cons, 
+           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
+           np->net_idx->tx_event,
+           test_bit(__LINK_STATE_XOFF, &dev->state));
+    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
+           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
+           np->rx_resp_cons, np->net_idx->rx_req_prod,
+           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
+}
+
+
+static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
+{
+    struct list_head *ent;
+    struct net_private *np;
+    list_for_each ( ent, &dev_list )
+    {
+        np = list_entry(ent, struct net_private, list);
+        _dbg_network_int(np->dev);
+    }
+}
+
+
+static int network_open(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+    netop_t netop;
+    int i, ret;
+
+    netop.cmd = NETOP_RESET_RINGS;
+    netop.vif = np->idx;
+    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
+    {
+        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
+        return ret;
+    }
+
+    netop.cmd = NETOP_GET_VIF_INFO;
+    netop.vif = np->idx;
+    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
+    {
+        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
+        return ret;
+    }
+
+    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
+
+    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
+               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
+    np->net_ring = (net_ring_t *)fix_to_virt(
+        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
+
+    np->rx_bufs_to_notify = 0;
+    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+    memset(&np->stats, 0, sizeof(np->stats));
+    spin_lock_init(&np->tx_lock);
+    memset(np->net_ring, 0, sizeof(*np->net_ring));
+    memset(np->net_idx, 0, sizeof(*np->net_idx));
+
+    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
+        np->tx_skbs[i] = (void *)(i+1);
+    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
+        np->rx_skbs[i] = (void *)(i+1);
+
+    wmb();
+    np->state = STATE_ACTIVE;
+
+    network_alloc_rx_buffers(dev);
+
+    netif_start_queue(dev);
+
+    MOD_INC_USE_COUNT;
+
+    return 0;
+}
+
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+    NET_RING_IDX i, prod;
+    unsigned short id;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    tx_entry_t *tx_ring = np->net_ring->tx_ring;
+
+    do {
+        prod = np->net_idx->tx_resp_prod;
+
+        for ( i = np->tx_resp_cons; i != prod; i++ )
+        {
+            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
+            skb = np->tx_skbs[id];
+            ADD_ID_TO_FREELIST(np->tx_skbs, id);
+            dev_kfree_skb_any(skb);
+        }
+        
+        np->tx_resp_cons = prod;
+        
+        /*
+         * Set a new event, then check for race with update of tx_cons. Note
+         * that it is essential to schedule a callback, no matter how few
+         * buffers are pending. Even if there is space in the transmit ring,
+         * higher layers may be blocked because too much data is outstanding:
+         * in such cases notification from Xen is likely to be the only kick
+         * that we'll get.
+         */
+        np->net_idx->tx_event = 
+            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
+        mb();
+    }
+    while ( prod != np->net_idx->tx_resp_prod );
+
+    if ( np->tx_full && 
+         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
+    {
+        np->tx_full = 0;
+        if ( np->state == STATE_ACTIVE )
+            netif_wake_queue(dev);
+    }
+}
+
+
+static inline pte_t *get_ppte(void *addr)
+{
+    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+    pgd = pgd_offset_k(   (unsigned long)addr);
+    pmd = pmd_offset(pgd, (unsigned long)addr);
+    pte = pte_offset(pmd, (unsigned long)addr);
+    return pte;
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+    unsigned short id;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    netop_t netop;
+    NET_RING_IDX i = np->net_idx->rx_req_prod;
+
+    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
+         unlikely(np->state != STATE_ACTIVE) )
+        return;
+
+    do {
+        skb = dev_alloc_skb(RX_BUF_SIZE);
+        if ( unlikely(skb == NULL) )
+            break;
+
+        skb->dev = dev;
+
+        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
+            panic("alloc_skb needs to provide us page-aligned buffers.");
+
+        id = GET_ID_FROM_FREELIST(np->rx_skbs);
+        np->rx_skbs[id] = skb;
+
+        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
+        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
+            virt_to_machine(get_ppte(skb->head));
+
+        np->rx_bufs_to_notify++;
+    }
+    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
+
+    /*
+     * We may have allocated buffers which have entries outstanding in the page
+     * update queue -- make sure we flush those first!
+     */
+    flush_page_update_queue();
+
+    np->net_idx->rx_req_prod = i;
+    np->net_idx->rx_event    = np->rx_resp_cons + 1;
+        
+    /* Batch Xen notifications. */
+    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
+    {
+        netop.cmd = NETOP_PUSH_BUFFERS;
+        netop.vif = np->idx;
+        (void)HYPERVISOR_net_io_op(&netop);
+        np->rx_bufs_to_notify = 0;
+    }
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+    unsigned short id;
+    struct net_private *np = (struct net_private *)dev->priv;
+    tx_req_entry_t *tx;
+    netop_t netop;
+    NET_RING_IDX i;
+
+    if ( unlikely(np->tx_full) )
+    {
+        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
+        netif_stop_queue(dev);
+        return -ENOBUFS;
+    }
+
+    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+                  PAGE_SIZE) )
+    {
+        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
+        if ( unlikely(new_skb == NULL) )
+            return 1;
+        skb_put(new_skb, skb->len);
+        memcpy(new_skb->data, skb->data, skb->len);
+        dev_kfree_skb(skb);
+        skb = new_skb;
+    }   
+    
+    spin_lock_irq(&np->tx_lock);
+
+    i = np->net_idx->tx_req_prod;
+
+    id = GET_ID_FROM_FREELIST(np->tx_skbs);
+    np->tx_skbs[id] = skb;
+
+    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
+
+    tx->id   = id;
+    tx->addr = phys_to_machine(virt_to_phys(skb->data));
+    tx->size = skb->len;
+
+    wmb();
+    np->net_idx->tx_req_prod = i + 1;
+
+    network_tx_buf_gc(dev);
+
+    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
+    {
+        np->tx_full = 1;
+        netif_stop_queue(dev);
+    }
+
+    spin_unlock_irq(&np->tx_lock);
+
+    np->stats.tx_bytes += skb->len;
+    np->stats.tx_packets++;
+
+    /* Only notify Xen if there are no outstanding responses. */
+    mb();
+    if ( np->net_idx->tx_resp_prod == i )
+    {
+        netop.cmd = NETOP_PUSH_BUFFERS;
+        netop.vif = np->idx;
+        (void)HYPERVISOR_net_io_op(&netop);
+    }
+
+    return 0;
+}
+
+
+static inline void _network_interrupt(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+    unsigned long flags;
+    struct sk_buff *skb;
+    rx_resp_entry_t *rx;
+    NET_RING_IDX i;
+
+    if ( unlikely(np->state == STATE_CLOSED) )
+        return;
+    
+    spin_lock_irqsave(&np->tx_lock, flags);
+    network_tx_buf_gc(dev);
+    spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ again:
+    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
+    {
+        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
+
+        skb = np->rx_skbs[rx->id];
+        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+        if ( unlikely(rx->status != RING_STATUS_OK) )
+        {
+            /* Gate this error. We get a (valid) slew of them on suspend. */
+            if ( np->state == STATE_ACTIVE )
+                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
+            dev_kfree_skb_any(skb);
+            continue;
+        }
+
+        /*
+         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
+         * shared info is hidden at the back of the data area (presumably so it
+         * can be shared), but on page flip it gets very spunked.
+         */
+        atomic_set(&(skb_shinfo(skb)->dataref), 1);
+        skb_shinfo(skb)->nr_frags = 0;
+        skb_shinfo(skb)->frag_list = NULL;
+                                
+        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
+            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
+
+        skb->data = skb->tail = skb->head + rx->offset;
+        skb_put(skb, rx->size);
+        skb->protocol = eth_type_trans(skb, dev);
+
+        np->stats.rx_packets++;
+
+        np->stats.rx_bytes += rx->size;
+        netif_rx(skb);
+        dev->last_rx = jiffies;
+    }
+
+    np->rx_resp_cons = i;
+
+    network_alloc_rx_buffers(dev);
+    
+    /* Deal with hypervisor racing our resetting of rx_event. */
+    mb();
+    if ( np->net_idx->rx_resp_prod != i )
+        goto again;
+}
+
+
+static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
+{
+    struct list_head *ent;
+    struct net_private *np;
+    list_for_each ( ent, &dev_list )
+    {
+        np = list_entry(ent, struct net_private, list);
+        _network_interrupt(np->dev);
+    }
+}
+
+
+static int network_close(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+    netop_t netop;
+
+    np->state = STATE_SUSPENDED;
+    wmb();
+
+    netif_stop_queue(np->dev);
+
+    netop.cmd = NETOP_FLUSH_BUFFERS;
+    netop.vif = np->idx;
+    (void)HYPERVISOR_net_io_op(&netop);
+
+    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
+            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
+    {
+        barrier();
+        current->state = TASK_INTERRUPTIBLE;
+        schedule_timeout(1);
+    }
+
+    wmb();
+    np->state = STATE_CLOSED;
+    wmb();
+
+    /* Now no longer safe to take interrupts for this device. */
+    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
+
+    MOD_DEC_USE_COUNT;
+
+    return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+    struct net_private *np = (struct net_private *)dev->priv;
+    return &np->stats;
+}
+
+
+static int __init init_module(void)
+{
+#if 0
+    int i, fixmap_idx=-1, err;
+    struct net_device *dev;
+    struct net_private *np;
+    netop_t netop;
+
+    INIT_LIST_HEAD(&dev_list);
+
+    network_irq = bind_virq_to_irq(VIRQ_NET);
+    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
+
+    err = request_irq(network_irq, network_interrupt, 
+                      SA_SAMPLE_RANDOM, "network", NULL);
+    if ( err )
+    {
+        printk(KERN_WARNING "Could not allocate network interrupt\n");
+        goto fail;
+    }
+    
+    err = request_irq(debug_irq, dbg_network_int, 
+                      SA_SHIRQ, "net_dbg", &dbg_network_int);
+    if ( err )
+        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
+
+    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+    {
+        /* If the VIF is invalid then the query hypercall will fail. */
+        netop.cmd = NETOP_GET_VIF_INFO;
+        netop.vif = i;
+        if ( HYPERVISOR_net_io_op(&netop) != 0 )
+            continue;
+
+        /* We actually only support up to 4 vifs right now. */
+        if ( ++fixmap_idx == 4 )
+            break;
+
+        dev = alloc_etherdev(sizeof(struct net_private));
+        if ( dev == NULL )
+        {
+            err = -ENOMEM;
+            goto fail;
+        }
+
+        np = dev->priv;
+        np->state               = STATE_CLOSED;
+        np->net_ring_fixmap_idx = fixmap_idx;
+        np->idx                 = i;
+
+        SET_MODULE_OWNER(dev);
+        dev->open            = network_open;
+        dev->hard_start_xmit = network_start_xmit;
+        dev->stop            = network_close;
+        dev->get_stats       = network_get_stats;
+
+        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
+
+        if ( (err = register_netdev(dev)) != 0 )
+        {
+            kfree(dev);
+            goto fail;
+        }
+
+        np->dev = dev;
+        list_add(&np->list, &dev_list);
+    }
+
+    return 0;
+
+ fail:
+    cleanup_module();
+    return err;
+#endif
+    return 0;
+}
+
+
+static void cleanup_module(void)
+{
+    struct net_private *np;
+    struct net_device *dev;
+
+    while ( !list_empty(&dev_list) )
+    {
+        np = list_entry(dev_list.next, struct net_private, list);
+        list_del(&np->list);
+        dev = np->dev;
+        unregister_netdev(dev);
+        kfree(dev);
+    }
+
+    free_irq(network_irq, NULL);
+    free_irq(debug_irq, NULL);
+
+    unbind_virq_from_irq(VIRQ_NET);
+    unbind_virq_from_irq(VIRQ_DEBUG);
+}
+
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile

deleted file mode 100644 (file)

index 20c8192..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-
-O_TARGET := drv.o
-
-subdir-y += frontend
-obj-y    += frontend/drv.o
-
-subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
-obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
-
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile

deleted file mode 100644 (file)

index 4c8c173..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-O_TARGET := drv.o
-obj-y := main.o control.o interface.o vbd.o
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h

deleted file mode 100644 (file)

index 849ad1a..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vblkif/backend/common.h
- */
-
-#ifndef __VBLKIF__BACKEND__COMMON_H__
-#define __VBLKIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <asm/ctrl_if.h>
-#include <asm/io.h>
-#include "../vblkif.h"
-
-#ifndef NDEBUG
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
-    /* Physical parameters of the comms window. */
-    unsigned long    shmem_frame;
-    unsigned int     evtchn;
-    int              irq;
-    /* Comms information. */
-    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
-    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
-    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
-    /* VBDs attached to this interface. */
-    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
-    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
-    /* Private fields. */
-    struct blkif_st *hash_next;
-    struct list_head blkdev_list;
-    spinlock_t       blk_ring_lock;
-} blkif_t;
-
-void blkif_create(blkif_create_t *create);
-void blkif_destroy(blkif_destroy_t *destroy);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_get(blkif_t *blkif);
-void blkif_put(blkif_t *blkif);
-
-/* An entry in a list of xen_extents. */
-typedef struct _blkif_extent_le { 
-    blkif_extent_t extent;               /* an individual extent */
-    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
-} blkif_extent_le_t; 
-
-typedef struct _vbd { 
-    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
-    unsigned char      mode;      /* VBD_MODE_{R,W} */
-    unsigned char      type;      /* XD_TYPE_xxx */
-    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
-    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
-} vbd_t; 
-
-long vbd_create(blkif_vbd_create_t *create_params); 
-long vbd_grow(blkif_vbd_grow_t *grow_params); 
-long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
-long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
-
-void destroy_all_vbds(struct task_struct *p);
-
-typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    atomic_t       pendcnt;
-    unsigned short operation;
-    unsigned short status;
-} pending_req_t;
-
-/* Describes a [partial] disk extent (part of a block io request) */
-typedef struct {
-    unsigned short dev;
-    unsigned short nr_sects;
-    unsigned long  buffer;
-    xen_sector_t   sector_number;
-} phys_seg_t;
-
-int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
-
-int vblkif_be_controller_init(void);
-
-void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __VBLKIF__BACKEND__COMMON_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c

deleted file mode 100644 (file)

index 19c4d5b..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vblkif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_BLKIF_BE_CREATE:
-        if ( msg->length != sizeof(blkif_create_t) )
-            goto parse_error;
-        blkif_create((blkif_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_BLKIF_BE_DESTROY:
-        if ( msg->length != sizeof(blkif_destroy_t) )
-            goto parse_error;
-        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
-        break;        
-    case CMSG_BLKIF_BE_VBD_CREATE:
-        if ( msg->length != sizeof(blkif_vbd_create_t) )
-            goto parse_error;
-        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
-        break;
-    case CMSG_BLKIF_BE_VBD_DESTROY:
-        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
-            goto parse_error;
-        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
-        break;
-    case CMSG_BLKIF_BE_VBD_GROW:
-        if ( msg->length != sizeof(blkif_vbd_grow_t) )
-            goto parse_error;
-        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
-        break;
-    case CMSG_BLKIF_BE_VBD_SHRINK:
-        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
-            goto parse_error;
-        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
-        break;
-    default:
-        goto parse_error;
-    }
-
-    ctrl_if_send_response(msg);
-    return;
-
- parse_error:
-    msg->length = 0;
-    ctrl_if_send_response(msg);
-}
-
-int blkif_ctrlif_init(void)
-{
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
-    return 0;
-}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c

deleted file mode 100644 (file)

index 30e5c16..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vblkif/backend/interface.c
- * 
- * Block-device interface management.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) \
-    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            (blkif->domid != domid) && 
-            (blkif->handle != handle) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-void blkif_create(blkif_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    unsigned int  evtchn = create->evtchn;
-    unsigned long shmem_frame = create->shmem_frame;
-    blkif_t     **pblkif, *blkif;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif == NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-            goto found_match;
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid       = domid;
-    blkif->handle      = handle;
-    blkif->evtchn      = evtchn;
-    blkif->irq         = bind_evtchn_to_irq(evtchn);
-    blkif->shmem_frame = shmem_frame;
-    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
-    spin_lock_init(&blkif->vbd_lock);
-    spin_lock_init(&blkif->blk_ring_lock);
-
-    request_irq(irq, vblkif_be_int, 0, "vblkif-backend", blkif);
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    create->status = BLKIF_STATUS_OKAY;
-    return;
-
- found_match:
-    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
-    return;
-
- evtchn_in_use:
-    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
-    create->status = BLKIF_STATUS_ERROR;
-    return;
-}
-
-void blkif_destroy(blkif_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) == NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-            goto found_match;
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- found_match:
-    free_irq(blkif->irq, NULL);
-    unbind_evtchn_from_irq(blkif->evtchn);
-    *pblkif = blkif->hash_next;
-    kmem_cache_free(blkif_cachep, blkif);
-    destroy->status = BLKIF_STATUS_OKAY;
-}
-
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c

deleted file mode 100644 (file)

index cb44ac1..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
+++ /dev/null
@@ -1,508 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vblkif/backend/main.c
- * 
- * Back-end of the driver for virtual block devices. This portion of the
- * driver exports a 'unified' block-device interface that can be accessed
- * by any operating system that implements a compatible front end. A 
- * reference front-end implementation can be found in:
- *  arch/xen/drivers/vblkif/frontend
- * 
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-/*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
- * 
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
- */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a 
- * 'pending_req' allocated to it. Each buffer_head that completes decrements 
- * the pendcnt towards zero. When it hits zero, the specified domain has a 
- * response queued for it, with the saved 'id' passed back.
- * 
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static kmem_cache_t *buffer_head_cachep;
-
-static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
-
-static int lock_buffer(blkif_t *blkif,
-                       unsigned long buffer,
-                       unsigned short size,
-                       int writeable_buffer);
-static void unlock_buffer(unsigned long buffer,
-                          unsigned short size,
-                          int writeable_buffer);
-
-static void io_schedule(unsigned long unused);
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif,
-                                 blk_ring_req_entry_t *req);
-static void make_response(blkif_t *blkif, unsigned long id, 
-                          unsigned short op, unsigned long st);
-
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head io_schedule_list;
-static spinlock_t io_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
-    return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&io_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&io_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) )
-    {
-        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
-
-static void io_schedule(unsigned long unused)
-{
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    /* Queue up a batch of requests. */
-    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-            !list_empty(&io_schedule_list) )
-    {
-        ent = io_schedule_list.next;
-        blkif = list_entry(ent, blkif_t, blkdev_list);
-        blkif_get(blkif);
-        remove_from_blkdev_list(blkif);
-        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-            add_to_blkdev_list_tail(blkif);
-        blkif_put(blkif);
-    }
-
-    /* Push the batch through to disc. */
-    run_task_queue(&tq_disk);
-}
-
-static void maybe_trigger_io_schedule(void)
-{
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-         !list_empty(&io_schedule_list) )
-        tasklet_schedule(&io_schedule_tasklet);
-}
-
-
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as bh->b_end_io()
- */
-
-static void end_block_io_op(struct buffer_head *bh, int uptodate)
-{
-    pending_req_t *pending_req = bh->b_private;
-
-    /* An error fails the entire request. */
-    if ( !uptodate )
-    {
-        DPRINTK("Buffer not up-to-date at end of operation\n");
-        pending_req->status = 2;
-    }
-
-    unlock_buffer(virt_to_phys(bh->b_data), 
-                  bh->b_size, 
-                  (pending_req->operation==READ));
-    
-    if ( atomic_dec_and_test(&pending_req->pendcnt) )
-    {
-        make_response(pending_req->blkif, pending_req->id,
-                      pending_req->operation, pending_req->status);
-        blkif_put(pending_req->blkif);
-        spin_lock(&pend_prod_lock);
-        pending_ring[MASK_PEND_IDX(pending_prod)] = 
-            pending_req - pending_reqs;
-        pending_prod++;
-        spin_unlock(&pend_prod_lock);
-        maybe_trigger_io_schedule();
-    }
-}
-
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    blkif_t *blkif = dev_id;
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_io_schedule();
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the block-device layer proper.
- */
-
-static int lock_buffer(blkif_t *blkif,
-                       unsigned long buffer,
-                       unsigned short size,
-                       int writeable_buffer)
-{
-    unsigned long    pfn;
-
-    for ( pfn = buffer >> PAGE_SHIFT; 
-          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
-          pfn++ )
-    {
-    }
-
-    return 1;
-
- fail:
-    while ( pfn-- > (buffer >> PAGE_SHIFT) )
-    {        
-    }
-    return 0;
-}
-
-static void unlock_buffer(unsigned long buffer,
-                          unsigned short size,
-                          int writeable_buffer)
-{
-    unsigned long pfn;
-
-    for ( pfn = buffer >> PAGE_SHIFT; 
-          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
-          pfn++ )
-    {
-    }
-}
-
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
-{
-    blk_ring_t *blk_ring = blkif->blk_ring_base;
-    blk_ring_req_entry_t *req;
-    BLK_RING_IDX i;
-    int more_to_do = 0;
-
-    /* Take items off the comms ring, taking care not to overflow. */
-    for ( i = blkif->blk_req_cons; 
-          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
-                                        BLK_RING_SIZE);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
-        
-        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
-        switch ( req->operation )
-        {
-        case BLKIF_OP_READ:
-        case BLKIF_OP_WRITE:
-            dispatch_rw_block_io(blkif, req);
-            break;
-
-        default:
-            DPRINTK("error: unknown block io operation [%d]\n",
-                    blk_ring->ring[i].req.operation);
-            make_response(blkif, blk_ring->ring[i].req.id, 
-                          blk_ring->ring[i].req.operation, 1);
-            break;
-        }
-    }
-
-    blkif->blk_req_cons = i;
-    return more_to_do;
-}
-
-static void dispatch_rw_block_io(blkif_t *blkif,
-                                 blk_ring_req_entry_t *req)
-{
-    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
-    struct buffer_head *bh;
-    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
-    unsigned short nr_sects;
-    unsigned long buffer;
-    int i, tot_sects;
-    pending_req_t *pending_req;
-
-    /* We map virtual scatter/gather segments to physical segments. */
-    int new_segs, nr_psegs = 0;
-    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
-
-    /* Check that number of segments is sane. */
-    if ( unlikely(req->nr_segments == 0) || 
-         unlikely(req->nr_segments > MAX_BLK_SEGS) )
-    {
-        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
-        goto bad_descriptor;
-    }
-
-    /*
-     * Check each address/size pair is sane, and convert into a
-     * physical device and block offset. Note that if the offset and size
-     * crosses a virtual extent boundary, we may end up with more
-     * physical scatter/gather segments than virtual segments.
-     */
-    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
-    {
-        buffer   = req->buffer_and_sects[i] & ~0x1FF;
-        nr_sects = req->buffer_and_sects[i] &  0x1FF;
-
-        if ( unlikely(nr_sects == 0) )
-        {
-            DPRINTK("zero-sized data request\n");
-            goto bad_descriptor;
-        }
-
-        phys_seg[nr_psegs].dev           = req->device;
-        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
-        phys_seg[nr_psegs].buffer        = buffer;
-        phys_seg[nr_psegs].nr_sects      = nr_sects;
-
-        /* Translate the request into the relevant 'physical device' */
-        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
-        if ( new_segs < 0 )
-        { 
-            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
-                    operation == READ ? "read" : "write", 
-                    req->sector_number + tot_sects, 
-                    req->sector_number + tot_sects + nr_sects, 
-                    req->device); 
-            goto bad_descriptor;
-        }
-  
-        nr_psegs += new_segs;
-        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
-    }
-
-    for ( i = 0; i < nr_psegs; i++ )
-    {
-        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
-                                   phys_seg[i].nr_sects << 9,
-                                   operation==READ)) )
-        {
-            DPRINTK("invalid buffer\n");
-            while ( i-- > 0 )
-                unlock_buffer(phys_seg[i].buffer, 
-                              phys_seg[i].nr_sects << 9,
-                              operation==READ);
-            goto bad_descriptor;
-        }
-    }
-
-    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
-    pending_req->blkif     = blkif;
-    pending_req->id        = req->id;
-    pending_req->operation = operation;
-    pending_req->status    = 0;
-    atomic_set(&pending_req->pendcnt, nr_psegs);
-
-    blkif_get(blkif);
-
-    /* Now we pass each segment down to the real blkdev layer. */
-    for ( i = 0; i < nr_psegs; i++ )
-    {
-        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
-        if ( unlikely(bh == NULL) )
-            panic("bh is null\n");
-        memset(bh, 0, sizeof (struct buffer_head));
-    
-        bh->b_size          = phys_seg[i].nr_sects << 9;
-        bh->b_dev           = phys_seg[i].dev;
-        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
-
-        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
-           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
-        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
- 
-        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
-        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
-        bh->b_end_io        = end_block_io_op;
-        bh->b_private       = pending_req;
-
-        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
-        if ( operation == WRITE )
-            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
-
-        atomic_set(&bh->b_count, 1);
-
-        /* Dispatch a single request. We'll flush it to disc later. */
-        submit_bh(operation, bh);
-    }
-
-    return;
-
- bad_descriptor:
-    make_response(blkif, req->id, req->operation, 1);
-} 
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(blkif_t *blkif, unsigned long id, 
-                          unsigned short op, unsigned long st)
-{
-    blk_ring_resp_entry_t *resp;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock(&blkif->blk_ring_lock);
-    resp = &blkif->blk_ring_base->
-        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    wmb();
-    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
-    spin_unlock(&blkif->blk_ring_lock);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(blkif->evtchn);
-}
-
-static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
-{
-#if 0
-    unsigned long flags;
-    struct task_struct *p;
-    blk_ring_t *blk_ring;
-    int i;
-
-    printk("Dumping block queue stats: nr_pending = %d"
-           " (prod=0x%08x,cons=0x%08x)\n",
-           NR_PENDING_REQS, pending_prod, pending_cons);
-
-    read_lock_irqsave(&tasklist_lock, flags);
-    for_each_domain ( p )
-    {
-        printk("Domain: %llu\n", blkif->domain);
-        blk_ring = blkif->blk_ring_base;
-        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
-               "0x%08x on_list=%d\n",
-               blk_ring->req_prod, blkif->blk_req_cons,
-               blk_ring->resp_prod, blkif->blk_resp_prod,
-               __on_blkdev_list(p));
-    }
-    read_unlock_irqrestore(&tasklist_lock, flags);
-
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-    {
-        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
-               i, pending_reqs[i].domain, pending_reqs[i].id,
-               atomic_read(&pending_reqs[i].pendcnt), 
-               pending_reqs[i].operation, pending_reqs[i].status);
-    }
-#endif
-}
-
-void unlink_blkdev_info(blkif_t *blkif)
-{
-    unsigned long flags;
-
-    spin_lock_irqsave(&io_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = (void *)0xdeadbeef;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
-}
-
-static int __init init_module(void)
-{
-    int i;
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-    
-    for ( i = 0; i < NR_CPUS; i++ )
-        completed_bhs[i] = NULL;
-        
-    spin_lock_init(&io_schedule_list_lock);
-    INIT_LIST_HEAD(&io_schedule_list);
-
-    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
-                     SA_SHIRQ, "vblkif-backend-dbg", &blkif_debug_int) != 0 )
-        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
-
-    buffer_head_cachep = kmem_cache_create(
-        "buffer_head_cache", sizeof(struct buffer_head),
-        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-
-    return 0;
-}
-
-static void cleanup_module(void)
-{
-}
-
-module_init(init_module);
-module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c

deleted file mode 100644 (file)

index 2545c00..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
+++ /dev/null
@@ -1,578 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vblkif/backend/vbd.c
- * 
- * Routines for managing virtual block devices (VBDs).
- * 
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-void vbd_create(blkif_vbd_create_t *create) 
-{
-    vbd_t       *vbd; 
-    rb_node_t  **rb_p, *rb_parent = NULL;
-    blkif_t     *blkif;
-    blkif_vdev_t vdevice = create->vdevice;
-
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    spin_lock(&blkif->vbd_lock);
-
-    rb_p = &blkif->vbd_rb.rb_node;
-    while ( *rb_p != NULL )
-    {
-        rb_parent = *rb_p;
-        vbd = rb_entry(rb_parent, vbd_t, rb);
-        if ( vdevice < vbd->vdevice )
-        {
-            rb_p = &rb_parent->rb_left;
-        }
-        else if ( vdevice > vbd->vdevice )
-        {
-            rb_p = &rb_parent->rb_right;
-        }
-        else
-        {
-            DPRINTK("vbd_create attempted for already existing vbd\n");
-            create->status = BLKIF_STATUS_VBD_EXISTS;
-            goto out;
-        }
-    }
-
-    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
-    {
-        DPRINTK("vbd_create: out of memory\n");
-        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
-        goto out;
-    }
-
-    vbd->vdevice = vdevice; 
-    vbd->mode    = create->mode; 
-    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
-    vbd->extents = NULL; 
-
-    rb_link_node(&vbd->rb, rb_parent, rb_p);
-    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
-
-    create->status = BLKIF_STATUS_OKAY;
-
- out:
-    spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
-}
-
-
-/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-void vbd_grow(blkif_vbd_grow_t *grow) 
-{
-    blkif_t          *blkif;
-    xen_extent_le_t **px, *x; 
-    vbd_t            *vbd = NULL;
-    rb_node_t        *rb;
-    blkif_vdev_t      vdevice = grow->vdevice;
-
-    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
-                grow->domid, grow->blkif_handle); 
-        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    spin_lock(&blkif->vbd_lock);
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-        if ( vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            break;
-    }
-
-    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
-    {
-        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
-        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
-        goto out;
-    } 
-
-    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
-    {
-        DPRINTK("vbd_grow: out of memory\n");
-        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
-        goto out;
-    }
- 
-    x->extent.device        = grow->extent.device; 
-    x->extent.sector_start  = grow->extent.sector_start; 
-    x->extent.sector_length = grow->extent.sector_length; 
-    x->next                 = (xen_extent_le_t *)NULL; 
-
-    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
-        continue;
-
-    *px = x;
-
-    grow->status = BLKIF_STATUS_OKAY;
-
- out:
-    spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
-}
-
-
-void vbd_shrink(blkif_vbd_shrink_t *shrink)
-{
-    blkif_t          *blkif;
-    xen_extent_le_t **px, *x; 
-    vbd_t            *vbd = NULL;
-    rb_node_t        *rb;
-    blkif_vdev_t      vdevice = shrink->vdevice;
-
-    blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
-                shrink->domid, shrink->blkif_handle); 
-        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    spin_lock(&blkif->vbd_lock);
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-        if ( vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            break;
-    }
-
-    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
-    {
-        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
-        goto out;
-    }
-
-    if ( unlikely(vbd->extents == NULL) )
-    {
-        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
-        goto out;
-    }
-
-    /* Find the last extent. We now know that there is at least one. */
-    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
-        continue;
-
-    x   = *px;
-    *px = x->next;
-    kfree(x);
-
-    shrink->status = BLKIF_STATUS_OKAY;
-
- out:
-    spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
-}
-
-
-void vbd_destroy(blkif_vbd_destroy_t *destroy) 
-{
-    blkif_t         *blkif;
-    vbd_t           *vbd;
-    rb_node_t       *rb;
-    xen_extent_le_t *x, *t;
-    blkif_vdev_t     vdevice = destroy->vdevice;
-
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    spin_lock(&blkif->vbd_lock);
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-        if ( vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
-    goto out;
-
- found:
-    rb_erase(rb, &blkif->vbd_rb);
-    x = vbd->extents;
-    kfree(vbd);
-
-    while ( x != NULL )
-    {
-        t = x->next;
-        kfree(x);
-        x = t;
-    }
-    
- out:
-    spin_unlock(&blkif->vbd_lock);
-    blkif_put(blkif);
-}
-
-
-void destroy_all_vbds(blkif_t *blkif)
-{
-    vbd_t *vbd;
-    rb_node_t *rb;
-    xen_extent_le_t *x, *t;
-
-    spin_lock(&blkif->vbd_lock);
-
-    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-
-        rb_erase(rb, &blkif->vbd_rb);
-        x = vbd->extents;
-        kfree(vbd);
-        
-        while ( x != NULL )
-        {
-            t = x->next;
-            kfree(x);
-            x = t;
-        }          
-    }
-
-    spin_unlock(&blkif->vbd_lock);
-}
-
-
-static int vbd_probe_single(xen_disk_info_t *xdi, 
-                            vbd_t *vbd, 
-                            struct task_struct *p)
-{
-    xen_extent_le_t *x; 
-    xen_disk_t cur_disk; 
-
-    if ( xdi->count == xdi->max )
-    {
-        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
-        return -ENOMEM; 
-    }
-
-    cur_disk.device = vbd->vdevice; 
-    cur_disk.info   = vbd->type;
-    if ( !VBD_CAN_WRITE(vbd) )
-        cur_disk.info |= XD_FLAG_RO; 
-    cur_disk.capacity = 0ULL;
-    for ( x = vbd->extents; x != NULL; x = x->next )
-        cur_disk.capacity += x->extent.nr_sectors; 
-    cur_disk.domain = p->domain; 
-        
-    /* Now copy into relevant part of user-space buffer */
-    if( copy_to_user(&xdi->disks[xdi->count], 
-                     &cur_disk, 
-                     sizeof(xen_disk_t)) )
-    { 
-        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
-        return -EFAULT;
-    } 
-        
-    xdi->count++; 
-
-    return 0;
-}
-
-
-static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
-{
-    int rc = 0;
-    rb_node_t *rb;
-
-    spin_lock(&p->vbd_lock);
-
-    if ( (rb = p->vbd_rb.rb_node) == NULL )
-        goto out;
-
- new_subtree:
-    /* STEP 1. Find least node (it'll be left-most). */
-    while ( rb->rb_left != NULL )
-        rb = rb->rb_left;
-
-    for ( ; ; )
-    {
-        /* STEP 2. Dealt with left subtree. Now process current node. */
-        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
-            goto out;
-
-        /* STEP 3. Process right subtree, if any. */
-        if ( rb->rb_right != NULL )
-        {
-            rb = rb->rb_right;
-            goto new_subtree;
-        }
-
-        /* STEP 4. Done both subtrees. Head back through ancesstors. */
-        for ( ; ; ) 
-        {
-            /* We're done when we get back to the root node. */
-            if ( rb->rb_parent == NULL )
-                goto out;
-            /* If we are left of parent, then parent is next to process. */
-            if ( rb->rb_parent->rb_left == rb )
-                break;
-            /* If we are right of parent, then we climb to grandparent. */
-            rb = rb->rb_parent;
-        }
-
-        rb = rb->rb_parent;
-    }
-
- out:
-    spin_unlock(&p->vbd_lock);
-    return rc;  
-}
-
-
-/*
- * Return information about the VBDs available for a given domain, or for all 
- * domains; in the general case the 'domain' argument will be 0 which means 
- * "information about the caller"; otherwise the 'domain' argument will 
- * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
- * these cases require the caller to be privileged.
- */
-long vbd_probe(vbd_probe_t *probe) 
-{
-    struct task_struct *p = NULL; 
-    unsigned long flags;
-    long ret = 0;  
-
-    if ( probe->domain != 0 )
-    { 
-        /* We can only probe for ourselves (unless we're privileged). */
-        if( (probe->domain != current->domain) && !IS_PRIV(current) )
-            return -EPERM; 
-
-        if ( (probe->domain != VBD_PROBE_ALL) &&
-             ((p = find_domain_by_id(probe->domain)) == NULL) )
-        {
-            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
-                    probe->domain); 
-            return -EINVAL; 
-        }
-    }
-    else
-    { 
-        /* Default is to probe for ourselves. */
-        p = current; 
-        get_task_struct(p); /* to mirror final put_task_struct */
-    }
-
-    if ( probe->domain == VBD_PROBE_ALL )
-    { 
-        read_lock_irqsave(&tasklist_lock, flags);
-        for_each_domain ( p )
-        {
-            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
-            { 
-                read_unlock_irqrestore(&tasklist_lock, flags);
-                goto out; 
-            }
-        }
-        read_unlock_irqrestore(&tasklist_lock, flags);
-    } 
-    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
-        goto out; 
-
- out: 
-    if ( ret != 0 )
-        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
-    if ( p != NULL )
-        put_task_struct(p); 
-    return ret; 
-}
-
-
-long vbd_info(vbd_info_t *info) 
-{
-    struct task_struct *p; 
-    xen_extent_le_t *x; 
-    xen_extent_t *extents; 
-    vbd_t *vbd = NULL;
-    rb_node_t *rb;
-    long ret = 0;  
-   
-    if ( (info->domain != current->domain) && !IS_PRIV(current) )
-        return -EPERM; 
-
-    if ( (p = find_domain_by_id(info->domain)) == NULL )
-    {
-        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
-                info->domain); 
-        return -EINVAL; 
-    }
-
-    spin_lock(&p->vbd_lock);
-
-    rb = p->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-        if ( info->vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( info->vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            break;
-    }
-
-    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
-    {
-        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
-        ret = -EINVAL; 
-        goto out; 
-    }
-
-    info->mode     = vbd->mode;
-    info->nextents = 0; 
-
-    extents = info->extents;
-    for ( x = vbd->extents; x != NULL; x = x->next )
-    {
-        if ( info->nextents == info->maxextents )
-            break;
-        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
-        {
-            DPRINTK("vbd_info: copy_to_user failed\n");
-            ret = -EFAULT;
-            goto out; 
-        } 
-        extents++;
-        info->nextents++;
-    }
-
- out: 
-    spin_unlock(&p->vbd_lock);
-    put_task_struct(p); 
-    return ret; 
-}
-
-
-int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
-{
-    xen_extent_le_t *x; 
-    vbd_t *vbd;
-    rb_node_t *rb;
-    xen_sector_t sec_off;
-    unsigned long nr_secs;
-
-    spin_lock(&p->vbd_lock);
-
-    rb = p->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, vbd_t, rb);
-        if ( pseg->dev < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( pseg->dev > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    DPRINTK("vbd_translate; domain %llu attempted to access "
-            "non-existent VBD.\n", p->domain); 
-
-    spin_unlock(&p->vbd_lock);
-    return -ENODEV; 
-
- found:
-
-    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
-         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
-    {
-        spin_unlock(&p->vbd_lock);
-        return -EACCES; 
-    }
-
-    /*
-     * Now iterate through the list of xen_extents, working out which should 
-     * be used to perform the translation.
-     */
-    sec_off = pseg->sector_number; 
-    nr_secs = pseg->nr_sects;
-    for ( x = vbd->extents; x != NULL; x = x->next )
-    { 
-        if ( sec_off < x->extent.nr_sectors )
-        {
-            pseg->dev = x->extent.device; 
-            pseg->sector_number = x->extent.start_sector + sec_off;
-            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
-                goto overrun;
-            spin_unlock(&p->vbd_lock);
-            return 1;
-        } 
-        sec_off -= x->extent.nr_sectors; 
-    }
-
-    DPRINTK("vbd_translate: end of vbd.\n");
-    spin_unlock(&p->vbd_lock);
-    return -EACCES; 
-
-    /*
-     * Here we deal with overrun onto the following extent. We don't deal with 
-     * overrun of more than one boundary since each request is restricted to 
-     * 2^9 512-byte sectors, so it should be trivial for control software to 
-     * ensure that extents are large enough to prevent excessive overrun.
-     */
- overrun:
-
-    /* Adjust length of first chunk to run to end of first extent. */
-    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
-
-    /* Set second chunk buffer and length to start where first chunk ended. */
-    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
-    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
-
-    /* Now move to the next extent. Check it exists and is long enough! */
-    if ( unlikely((x = x->next) == NULL) || 
-         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
-    {
-        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
-        spin_unlock(&p->vbd_lock);
-        return -EACCES;
-    }
-
-    /* Store the real device and start sector for the second chunk. */
-    pseg[1].dev           = x->extent.device;
-    pseg[1].sector_number = x->extent.start_sector;
-    
-    spin_unlock(&p->vbd_lock);
-    return 2;
-}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile

deleted file mode 100644 (file)

index 35986ca..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-O_TARGET := drv.o
-obj-y := block.o vbd.o
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c

deleted file mode 100644 (file)

index d00dd98..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/******************************************************************************
- * block.c
- * 
- * Xenolinux virtual block-device driver.
- * 
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- */
-
-#include "block.h"
-#include <linux/blk.h>
-#include <linux/cdrom.h>
-#include <linux/tqueue.h>
-#include <linux/sched.h>
-#include <scsi/scsi.h>
-
-#include <linux/interrupt.h>
-
-typedef unsigned char byte; /* from linux/ide.h */
-
-#define STATE_ACTIVE    0
-#define STATE_SUSPENDED 1
-#define STATE_CLOSED    2
-static unsigned int state = STATE_SUSPENDED;
-
-/* Dynamically-mapped IRQs. */
-static int xlblk_response_irq, xlblk_update_irq;
-
-static blk_ring_t *blk_ring;
-static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
-static BLK_RING_IDX req_prod;  /* Private request producer.         */
-
-/* We plug the I/O ring if the driver is suspended or if the ring is full. */
-#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
-                      (state != STATE_ACTIVE))
-
-
-/*
- * Request queues with outstanding work, but ring is currently full.
- * We need no special lock here, as we always access this with the
- * io_request_lock held. We only need a small maximum list.
- */
-#define MAX_PENDING 8
-static request_queue_t *pending_queues[MAX_PENDING];
-static int nr_pending;
-
-static kdev_t        sg_dev;
-static int           sg_operation = -1;
-static unsigned long sg_next_sect;
-#define DISABLE_SCATTERGATHER() (sg_operation = -1)
-
-static inline void signal_requests_to_xen(void)
-{
-    block_io_op_t op; 
-
-    DISABLE_SCATTERGATHER();
-    blk_ring->req_prod = req_prod;
-
-    op.cmd = BLOCK_IO_OP_SIGNAL; 
-    HYPERVISOR_block_io_op(&op);
-    return;
-}
-
-
-/*
- * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
- * 
- * Schedule a task for keventd to run, which will update the VBDs and perform 
- * the corresponding updates to our view of VBD state, so the XenoLinux will 
- * respond to changes / additions / deletions to the set of VBDs automatically.
- */
-static struct tq_struct update_tq;
-static void update_vbds_task(void *unused)
-{ 
-    xlvbd_update_vbds();
-}
-static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
-    update_tq.routine = update_vbds_task;
-    schedule_task(&update_tq);
-}
-
-
-int xen_block_open(struct inode *inode, struct file *filep)
-{
-    short xldev = inode->i_rdev; 
-    struct gendisk *gd = get_gendisk(xldev);
-    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-    short minor = MINOR(xldev); 
-
-    if ( gd->part[minor].nr_sects == 0 )
-    { 
-        /*
-         * Device either doesn't exist, or has zero capacity; we use a few
-         * cheesy heuristics to return the relevant error code
-         */
-        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
-             ((minor & (gd->max_p - 1)) != 0) )
-        { 
-            /*
-             * We have a real device, but no such partition, or we just have a
-             * partition number so guess this is the problem.
-             */
-            return -ENXIO;     /* no such device or address */
-        }
-        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
-        {
-            /* This is a removable device => assume that media is missing. */ 
-            return -ENOMEDIUM; /* media not present (this is a guess) */
-        } 
-        else
-        { 
-            /* Just go for the general 'no such device' error. */
-            return -ENODEV;    /* no such device */
-        }
-    }
-    
-    /* Update of usage count is protected by per-device semaphore. */
-    disk->usage++;
-
-    return 0;
-}
-
-
-int xen_block_release(struct inode *inode, struct file *filep)
-{
-    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
-
-    /*
-     * When usage drops to zero it may allow more VBD updates to occur.
-     * Update of usage count is protected by a per-device semaphore.
-     */
-    if ( --disk->usage == 0 )
-    {
-        update_tq.routine = update_vbds_task;
-        schedule_task(&update_tq);
-    }
-
-    return 0;
-}
-
-
-int xen_block_ioctl(struct inode *inode, struct file *filep,
-                          unsigned command, unsigned long argument)
-{
-    kdev_t dev = inode->i_rdev;
-    struct hd_geometry *geo = (struct hd_geometry *)argument;
-    struct gendisk *gd;     
-    struct hd_struct *part; 
-    int i;
-
-    /* NB. No need to check permissions. That is done for us. */
-    
-    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
-                  command, (long) argument, dev); 
-  
-    gd = get_gendisk(dev);
-    part = &gd->part[MINOR(dev)]; 
-
-    switch ( command )
-    {
-    case BLKGETSIZE:
-        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
-        return put_user(part->nr_sects, (unsigned long *) argument);
-
-    case BLKGETSIZE64:
-        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
-                      (u64)part->nr_sects * 512);
-        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
-
-    case BLKRRPART:                               /* re-read partition table */
-        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
-        return xen_block_revalidate(dev);
-
-    case BLKSSZGET:
-        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
-
-    case BLKBSZGET:                                        /* get block size */
-        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
-        break;
-
-    case BLKBSZSET:                                        /* set block size */
-        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
-        break;
-
-    case BLKRASET:                                         /* set read-ahead */
-        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
-        break;
-
-    case BLKRAGET:                                         /* get read-ahead */
-        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
-        break;
-
-    case HDIO_GETGEO:
-        /* note: these values are complete garbage */
-        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
-        if (!argument) return -EINVAL;
-        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
-        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
-        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
-        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
-        return 0;
-
-    case HDIO_GETGEO_BIG: 
-        /* note: these values are complete garbage */
-        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
-        if (!argument) return -EINVAL;
-        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
-        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
-        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
-        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
-        return 0;
-
-    case CDROMMULTISESSION:
-        DPRINTK("FIXME: support multisession CDs later\n");
-        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
-            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
-        return 0;
-
-    case SCSI_IOCTL_GET_BUS_NUMBER:
-        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
-        return -ENOSYS;
-
-    default:
-        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
-        return -ENOSYS;
-    }
-    
-    return 0;
-}
-
-/* check media change: should probably do something here in some cases :-) */
-int xen_block_check(kdev_t dev)
-{
-    DPRINTK("xen_block_check\n");
-    return 0;
-}
-
-int xen_block_revalidate(kdev_t dev)
-{
-    struct block_device *bd;
-    struct gendisk *gd;
-    xl_disk_t *disk;
-    unsigned long capacity;
-    int i, rc = 0;
-    
-    if ( (bd = bdget(dev)) == NULL )
-        return -EINVAL;
-
-    /*
-     * Update of partition info, and check of usage count, is protected
-     * by the per-block-device semaphore.
-     */
-    down(&bd->bd_sem);
-
-    if ( ((gd = get_gendisk(dev)) == NULL) ||
-         ((disk = xldev_to_xldisk(dev)) == NULL) ||
-         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
-    {
-        rc = -EINVAL;
-        goto out;
-    }
-
-    if ( disk->usage > 1 )
-    {
-        rc = -EBUSY;
-        goto out;
-    }
-
-    /* Only reread partition table if VBDs aren't mapped to partitions. */
-    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
-    {
-        for ( i = gd->max_p - 1; i >= 0; i-- )
-        {
-            invalidate_device(dev+i, 1);
-            gd->part[MINOR(dev+i)].start_sect = 0;
-            gd->part[MINOR(dev+i)].nr_sects   = 0;
-            gd->sizes[MINOR(dev+i)]           = 0;
-        }
-
-        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
-    }
-
- out:
-    up(&bd->bd_sem);
-    bdput(bd);
-    return rc;
-}
-
-
-/*
- * hypervisor_request
- *
- * request block io 
- * 
- * id: for guest use only.
- * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
- */
-static int hypervisor_request(unsigned long   id,
-                              int             operation,
-                              char *          buffer,
-                              unsigned long   sector_number,
-                              unsigned short  nr_sectors,
-                              kdev_t          device)
-{
-    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
-    struct gendisk *gd;
-    blk_ring_req_entry_t *req;
-    struct buffer_head *bh;
-
-    if ( unlikely(nr_sectors >= (1<<9)) )
-        BUG();
-    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
-        BUG();
-
-    if ( unlikely(state == STATE_CLOSED) )
-        return 1;
-
-    switch ( operation )
-    {
-
-    case XEN_BLOCK_READ:
-    case XEN_BLOCK_WRITE:
-        gd = get_gendisk(device); 
-
-        /*
-         * Update the sector_number we'll pass down as appropriate; note that
-         * we could sanity check that resulting sector will be in this
-         * partition, but this will happen in xen anyhow.
-         */
-        sector_number += gd->part[MINOR(device)].start_sect;
-
-        /*
-         * If this unit doesn't consist of virtual (i.e., Xen-specified)
-         * partitions then we clear the partn bits from the device number.
-         */
-        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
-               GENHD_FL_VIRT_PARTNS) )
-            device &= ~(gd->max_p - 1);
-
-        if ( (sg_operation == operation) &&
-             (sg_dev == device) &&
-             (sg_next_sect == sector_number) )
-        {
-            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
-            bh = (struct buffer_head *)id;
-            bh->b_reqnext = (struct buffer_head *)req->id;
-            req->id = id;
-            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
-            if ( ++req->nr_segments < MAX_BLK_SEGS )
-                sg_next_sect += nr_sectors;
-            else
-                DISABLE_SCATTERGATHER();
-            return 0;
-        }
-        else if ( RING_PLUGGED )
-        {
-            return 1;
-        }
-        else
-        {
-            sg_operation = operation;
-            sg_dev       = device;
-            sg_next_sect = sector_number + nr_sectors;
-        }
-        break;
-
-    default:
-        panic("unknown op %d\n", operation);
-    }
-
-    /* Fill out a communications ring structure. */
-    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
-    req->id            = id;
-    req->operation     = operation;
-    req->sector_number = (xen_sector_t)sector_number;
-    req->device        = device; 
-    req->nr_segments   = 1;
-    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
-    req_prod++;
-
-    return 0;
-}
-
-
-/*
- * do_xlblk_request
- *  read a block; request is in a request queue
- */
-void do_xlblk_request(request_queue_t *rq)
-{
-    struct request *req;
-    struct buffer_head *bh, *next_bh;
-    int rw, nsect, full, queued = 0;
-
-    DPRINTK("xlblk.c::do_xlblk_request\n"); 
-
-    while ( !rq->plugged && !list_empty(&rq->queue_head))
-    {
-        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
-            goto out;
-  
-        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
-                req, req->cmd, req->sector,
-                req->current_nr_sectors, req->nr_sectors, req->bh);
-
-        rw = req->cmd;
-        if ( rw == READA )
-            rw = READ;
-        if ( unlikely((rw != READ) && (rw != WRITE)) )
-            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
-
-        req->errors = 0;
-
-        bh = req->bh;
-        while ( bh != NULL )
-        {
-            next_bh = bh->b_reqnext;
-            bh->b_reqnext = NULL;
-
-            full = hypervisor_request(
-                (unsigned long)bh,
-                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
-                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
-
-            if ( full )
-            { 
-                bh->b_reqnext = next_bh;
-                pending_queues[nr_pending++] = rq;
-                if ( unlikely(nr_pending >= MAX_PENDING) )
-                    BUG();
-                goto out; 
-            }
-
-            queued++;
-
-            /* Dequeue the buffer head from the request. */
-            nsect = bh->b_size >> 9;
-            bh = req->bh = next_bh;
-            
-            if ( bh != NULL )
-            {
-                /* There's another buffer head to do. Update the request. */
-                req->hard_sector += nsect;
-                req->hard_nr_sectors -= nsect;
-                req->sector = req->hard_sector;
-                req->nr_sectors = req->hard_nr_sectors;
-                req->current_nr_sectors = bh->b_size >> 9;
-                req->buffer = bh->b_data;
-            }
-            else
-            {
-                /* That was the last buffer head. Finalise the request. */
-                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
-                    BUG();
-                blkdev_dequeue_request(req);
-                end_that_request_last(req);
-            }
-        }
-    }
-
- out:
-    if ( queued != 0 ) signal_requests_to_xen();
-}
-
-
-static void kick_pending_request_queues(void)
-{
-    /* We kick pending request queues if the ring is reasonably empty. */
-    if ( (nr_pending != 0) && 
-         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
-    {
-        /* Attempt to drain the queue, but bail if the ring becomes full. */
-        while ( (nr_pending != 0) && !RING_PLUGGED )
-            do_xlblk_request(pending_queues[--nr_pending]);
-    }
-}
-
-
-static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
-    BLK_RING_IDX i; 
-    unsigned long flags; 
-    struct buffer_head *bh, *next_bh;
-    
-    if ( unlikely(state == STATE_CLOSED) )
-        return;
-    
-    spin_lock_irqsave(&io_request_lock, flags);     
-
-    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
-    {
-        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
-        switch ( bret->operation )
-        {
-        case XEN_BLOCK_READ:
-        case XEN_BLOCK_WRITE:
-            if ( unlikely(bret->status != 0) )
-                DPRINTK("Bad return from blkdev data request: %lx\n",
-                        bret->status);
-            for ( bh = (struct buffer_head *)bret->id; 
-                  bh != NULL; 
-                  bh = next_bh )
-            {
-                next_bh = bh->b_reqnext;
-                bh->b_reqnext = NULL;
-                bh->b_end_io(bh, !bret->status);
-            }
-            break;
-     
-        default:
-            BUG();
-        }
-    }
-    
-    resp_cons = i;
-
-    kick_pending_request_queues();
-
-    spin_unlock_irqrestore(&io_request_lock, flags);
-}
-
-
-static void reset_xlblk_interface(void)
-{
-    block_io_op_t op; 
-
-    nr_pending = 0;
-
-    op.cmd = BLOCK_IO_OP_RESET;
-    if ( HYPERVISOR_block_io_op(&op) != 0 )
-        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
-
-    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
-    (void)HYPERVISOR_block_io_op(&op);
-
-    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
-    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
-    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
-
-    wmb();
-    state = STATE_ACTIVE;
-}
-
-
-int __init xlblk_init(void)
-{
-    int error; 
-
-    reset_xlblk_interface();
-
-    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
-    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
-
-    error = request_irq(xlblk_response_irq, xlblk_response_int, 
-                        SA_SAMPLE_RANDOM, "blkdev", NULL);
-    if ( error )
-    {
-        printk(KERN_ALERT "Could not allocate receive interrupt\n");
-        goto fail;
-    }
-
-    error = request_irq(xlblk_update_irq, xlblk_update_int,
-                        0, "blkdev", NULL);
-
-    if ( error )
-    {
-        printk(KERN_ALERT "Could not allocate block update interrupt\n");
-        goto fail;
-    }
-
-    (void)xlvbd_init();
-
-    return 0;
-
- fail:
-    return error;
-}
-
-
-static void __exit xlblk_cleanup(void)
-{
-    xlvbd_cleanup();
-    free_irq(xlblk_response_irq, NULL);
-    free_irq(xlblk_update_irq, NULL);
-    unbind_virq_from_irq(VIRQ_BLKDEV);
-    unbind_virq_from_irq(VIRQ_VBD_UPD);
-}
-
-
-#ifdef MODULE
-module_init(xlblk_init);
-module_exit(xlblk_cleanup);
-#endif
-
-
-void blkdev_suspend(void)
-{
-    state = STATE_SUSPENDED;
-    wmb();
-
-    while ( resp_cons != blk_ring->req_prod )
-    {
-        barrier();
-        current->state = TASK_INTERRUPTIBLE;
-        schedule_timeout(1);
-    }
-
-    wmb();
-    state = STATE_CLOSED;
-    wmb();
-
-    clear_fixmap(FIX_BLKRING_BASE);
-}
-
-
-void blkdev_resume(void)
-{
-    reset_xlblk_interface();
-    spin_lock_irq(&io_request_lock);
-    kick_pending_request_queues();
-    spin_unlock_irq(&io_request_lock);
-}
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h

deleted file mode 100644 (file)

index e41e039..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/******************************************************************************
- * block.h
- * 
- * Shared definitions between all levels of XenoLinux Virtual block devices.
- */
-
-#ifndef __XEN_DRIVERS_BLOCK_H__
-#define __XEN_DRIVERS_BLOCK_H__
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <linux/fs.h>
-#include <linux/hdreg.h>
-#include <linux/blkdev.h>
-#include <linux/major.h>
-
-#include <asm/hypervisor-ifs/hypervisor-if.h>
-#include <asm/hypervisor-ifs/vbd.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/uaccess.h>
-
-#if 0
-#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 0
-#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
-#else
-#define DPRINTK_IOCTL(_f, _a...) ((void)0)
-#endif
-
-/* Private gendisk->flags[] values. */
-#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
-#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
-
-/*
- * We have one of these per vbd, whether ide, scsi or 'other'.
- * They hang in an array off the gendisk structure. We may end up putting
- * all kinds of interesting stuff here :-)
- */
-typedef struct xl_disk {
-    int usage;
-} xl_disk_t;
-
-extern int xen_control_msg(int operration, char *buffer, int size);
-extern int xen_block_open(struct inode *inode, struct file *filep);
-extern int xen_block_release(struct inode *inode, struct file *filep);
-extern int xen_block_ioctl(struct inode *inode, struct file *filep,
-                                 unsigned command, unsigned long argument);
-extern int xen_block_check(kdev_t dev);
-extern int xen_block_revalidate(kdev_t dev);
-extern void do_xlblk_request (request_queue_t *rq); 
-
-extern void xlvbd_update_vbds(void);
-
-static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
-{
-    struct gendisk *gd = get_gendisk(xldev);
-    
-    if ( gd == NULL ) 
-        return NULL;
-    
-    return (xl_disk_t *)gd->real_devices + 
-        (MINOR(xldev) >> gd->minor_shift);
-}
-
-
-/* Virtual block-device subsystem. */
-extern int  xlvbd_init(void);
-extern void xlvbd_cleanup(void); 
-
-#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c

deleted file mode 100644 (file)

index e08b976..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/******************************************************************************
- * vbd.c
- * 
- * Xenolinux virtual block-device driver (xvd).
- * 
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- */
-
-#include "block.h"
-#include <linux/blk.h>
-
-/*
- * For convenience we distinguish between ide, scsi and 'other' (i.e.
- * potentially combinations of the two) in the naming scheme and in a few 
- * other places (like default readahead, etc).
- */
-#define XLIDE_MAJOR_NAME  "hd"
-#define XLSCSI_MAJOR_NAME "sd"
-#define XLVBD_MAJOR_NAME "xvd"
-
-#define XLIDE_DEVS_PER_MAJOR   2
-#define XLSCSI_DEVS_PER_MAJOR 16
-#define XLVBD_DEVS_PER_MAJOR  16
-
-#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
-#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
-
-#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
-#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
-
-#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
-#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
-
-/* The below are for the generic drivers/block/ll_rw_block.c code. */
-static int xlide_blksize_size[256];
-static int xlide_hardsect_size[256];
-static int xlide_max_sectors[256];
-static int xlscsi_blksize_size[256];
-static int xlscsi_hardsect_size[256];
-static int xlscsi_max_sectors[256];
-static int xlvbd_blksize_size[256];
-static int xlvbd_hardsect_size[256];
-static int xlvbd_max_sectors[256];
-
-/* Information from Xen about our VBDs. */
-#define MAX_VBDS 64
-static int nr_vbds;
-static xen_disk_t *vbd_info;
-
-static struct block_device_operations xlvbd_block_fops = 
-{
-    open:               xen_block_open,
-    release:            xen_block_release,
-    ioctl:              xen_block_ioctl,
-    check_media_change: xen_block_check,
-    revalidate:         xen_block_revalidate,
-};
-
-static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
-{
-    int error;
-    block_io_op_t op; 
-
-    /* Probe for disk information. */
-    memset(&op, 0, sizeof(op)); 
-    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
-    op.u.probe_params.domain    = 0; 
-    op.u.probe_params.xdi.max   = MAX_VBDS;
-    op.u.probe_params.xdi.disks = disk_info;
-    op.u.probe_params.xdi.count = 0;
-
-    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
-    {
-        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
-        return -1;
-    }
-
-    return op.u.probe_params.xdi.count;
-}
-
-/*
- * xlvbd_init_device - initialise a VBD device
- * @disk:              a xen_disk_t describing the VBD
- *
- * Takes a xen_disk_t * that describes a VBD the domain has access to.
- * Performs appropriate initialisation and registration of the device.
- *
- * Care needs to be taken when making re-entrant calls to ensure that
- * corruption does not occur.  Also, devices that are in use should not have
- * their details updated.  This is the caller's responsibility.
- */
-static int xlvbd_init_device(xen_disk_t *xd)
-{
-    int device = xd->device;
-    int major  = MAJOR(device); 
-    int minor  = MINOR(device);
-    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
-    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
-    char *major_name;
-    struct gendisk *gd;
-    struct block_device *bd;
-    xl_disk_t *disk;
-    int i, rc = 0, max_part, partno;
-    unsigned long capacity;
-
-    unsigned char buf[64];
-
-    if ( (bd = bdget(device)) == NULL )
-        return -1;
-
-    /*
-     * Update of partition info, and check of usage count, is protected
-     * by the per-block-device semaphore.
-     */
-    down(&bd->bd_sem);
-
-    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
-    {
-        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
-        rc = -1;
-        goto out;
-    }
-
-    if ( is_ide ) {
-
-       major_name = XLIDE_MAJOR_NAME; 
-       max_part   = XLIDE_MAX_PART;
-
-    } else if ( is_scsi ) {
-
-       major_name = XLSCSI_MAJOR_NAME;
-       max_part   = XLSCSI_MAX_PART;
-
-    } else if (XD_VIRTUAL(xd->info)) {
-
-       major_name = XLVBD_MAJOR_NAME;
-       max_part   = XLVBD_MAX_PART;
-
-    } else { 
-
-        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
-       printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
-              major, minor);
-       is_scsi    = 1; 
-       major_name = "cciss"; 
-       max_part   = XLSCSI_MAX_PART;
-
-    }
-    
-    partno = minor & (max_part - 1); 
-    
-    if ( (gd = get_gendisk(device)) == NULL )
-    {
-        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
-        if ( rc < 0 )
-        {
-            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
-            goto out;
-        }
-
-        if ( is_ide )
-        { 
-            blksize_size[major]  = xlide_blksize_size;
-            hardsect_size[major] = xlide_hardsect_size;
-            max_sectors[major]   = xlide_max_sectors;
-            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
-        } 
-        else if ( is_scsi )
-        { 
-            blksize_size[major]  = xlscsi_blksize_size;
-            hardsect_size[major] = xlscsi_hardsect_size;
-            max_sectors[major]   = xlscsi_max_sectors;
-            read_ahead[major]    = 0; /* XXX 8; -- guessing */
-        }
-        else
-        { 
-            blksize_size[major]  = xlvbd_blksize_size;
-            hardsect_size[major] = xlvbd_hardsect_size;
-            max_sectors[major]   = xlvbd_max_sectors;
-            read_ahead[major]    = 8;
-        }
-
-        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
-
-        /*
-         * Turn off barking 'headactive' mode. We dequeue buffer heads as
-         * soon as we pass them down to Xen.
-         */
-        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
-
-        /* Construct an appropriate gendisk structure. */
-        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
-        gd->major      = major;
-        gd->major_name = major_name; 
-    
-        gd->max_p      = max_part; 
-        if ( is_ide )
-        { 
-            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
-            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
-        } 
-        else if ( is_scsi )
-        { 
-            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
-            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
-        }
-        else
-        { 
-            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
-            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
-        }
-
-        /* 
-        ** The sizes[] and part[] arrays hold the sizes and other 
-        ** information about every partition with this 'major' (i.e. 
-        ** every disk sharing the 8 bit prefix * max partns per disk) 
-        */
-        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
-        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
-                            GFP_KERNEL);
-        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
-        memset(gd->part,  0, max_part * gd->nr_real 
-               * sizeof(struct hd_struct));
-
-
-        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
-                                   GFP_KERNEL);
-        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
-
-        gd->next   = NULL;            
-        gd->fops   = &xlvbd_block_fops;
-
-        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
-                             GFP_KERNEL);
-        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
-    
-        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
-        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
-
-        add_gendisk(gd);
-
-        blk_size[major] = gd->sizes;
-    }
-
-    if ( XD_READONLY(xd->info) )
-        set_device_ro(device, 1); 
-
-    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
-
-    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
-    capacity = (unsigned long)xd->capacity;
-
-    if ( partno != 0 )
-    {
-        /*
-         * If this was previously set up as a real disc we will have set 
-         * up partition-table information. Virtual partitions override 
-         * 'real' partitions, and the two cannot coexist on a device.
-         */
-        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
-             (gd->sizes[minor & ~(max_part-1)] != 0) )
-        {
-            /*
-             * Any non-zero sub-partition entries must be cleaned out before
-             * installing 'virtual' partition entries. The two types cannot
-             * coexist, and virtual partitions are favoured.
-             */
-            kdev_t dev = device & ~(max_part-1);
-            for ( i = max_part - 1; i > 0; i-- )
-            {
-                invalidate_device(dev+i, 1);
-                gd->part[MINOR(dev+i)].start_sect = 0;
-                gd->part[MINOR(dev+i)].nr_sects   = 0;
-                gd->sizes[MINOR(dev+i)]           = 0;
-            }
-            printk(KERN_ALERT
-                   "Virtual partitions found for /dev/%s - ignoring any "
-                   "real partition information we may have found.\n",
-                   disk_name(gd, MINOR(device), buf));
-        }
-
-        /* Need to skankily setup 'partition' information */
-        gd->part[minor].start_sect = 0; 
-        gd->part[minor].nr_sects   = capacity; 
-        gd->sizes[minor]           = capacity; 
-
-        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
-    }
-    else
-    {
-        gd->part[minor].nr_sects = capacity;
-        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
-        
-        /* Some final fix-ups depending on the device type */
-        switch ( XD_TYPE(xd->info) )
-        { 
-        case XD_TYPE_CDROM:
-        case XD_TYPE_FLOPPY: 
-        case XD_TYPE_TAPE:
-            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
-            printk(KERN_ALERT 
-                   "Skipping partition check on %s /dev/%s\n", 
-                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
-                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
-                    "floppy"), disk_name(gd, MINOR(device), buf)); 
-            break; 
-
-        case XD_TYPE_DISK:
-            /* Only check partitions on real discs (not virtual!). */
-            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
-            {
-                printk(KERN_ALERT
-                       "Skipping partition check on virtual /dev/%s\n",
-                       disk_name(gd, MINOR(device), buf));
-                break;
-            }
-            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
-            break; 
-
-        default:
-            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
-                   XD_TYPE(xd->info)); 
-            break; 
-        }
-    }
-
- out:
-    up(&bd->bd_sem);
-    bdput(bd);    
-    return rc;
-}
-
-
-/*
- * xlvbd_remove_device - remove a device node if possible
- * @device:       numeric device ID
- *
- * Updates the gendisk structure and invalidates devices.
- *
- * This is OK for now but in future, should perhaps consider where this should
- * deallocate gendisks / unregister devices.
- */
-static int xlvbd_remove_device(int device)
-{
-    int i, rc = 0, minor = MINOR(device);
-    struct gendisk *gd;
-    struct block_device *bd;
-    xl_disk_t *disk = NULL;
-
-    if ( (bd = bdget(device)) == NULL )
-        return -1;
-
-    /*
-     * Update of partition info, and check of usage count, is protected
-     * by the per-block-device semaphore.
-     */
-    down(&bd->bd_sem);
-
-    if ( ((gd = get_gendisk(device)) == NULL) ||
-         ((disk = xldev_to_xldisk(device)) == NULL) )
-        BUG();
-
-    if ( disk->usage != 0 )
-    {
-        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
-        rc = -1;
-        goto out;
-    }
- 
-    if ( (minor & (gd->max_p-1)) != 0 )
-    {
-        /* 1: The VBD is mapped to a partition rather than a whole unit. */
-        invalidate_device(device, 1);
-       gd->part[minor].start_sect = 0;
-        gd->part[minor].nr_sects   = 0;
-        gd->sizes[minor]           = 0;
-
-        /* Clear the consists-of-virtual-partitions flag if possible. */
-        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
-        for ( i = 1; i < gd->max_p; i++ )
-            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
-                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
-
-        /*
-         * If all virtual partitions are now gone, and a 'whole unit' VBD is
-         * present, then we can try to grok the unit's real partition table.
-         */
-        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
-             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
-             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
-        {
-            register_disk(gd,
-                          device&~(gd->max_p-1), 
-                          gd->max_p, 
-                          &xlvbd_block_fops,
-                          gd->part[minor&~(gd->max_p-1)].nr_sects);
-        }
-    }
-    else
-    {
-        /*
-         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
-         * NB. The partition entries are only cleared if there are no VBDs
-         * mapped to individual partitions on this unit.
-         */
-        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
-        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
-            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
-        while ( i >= 0 )
-        {
-            invalidate_device(device+i, 1);
-            gd->part[minor+i].start_sect = 0;
-            gd->part[minor+i].nr_sects   = 0;
-            gd->sizes[minor+i]           = 0;
-            i--;
-        }
-    }
-
- out:
-    up(&bd->bd_sem);
-    bdput(bd);
-    return rc;
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
-    int i, j, k, old_nr, new_nr;
-    xen_disk_t *old_info, *new_info, *merged_info;
-
-    old_info = vbd_info;
-    old_nr   = nr_vbds;
-
-    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
-    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
-    {
-        kfree(new_info);
-        return;
-    }
-
-    /*
-     * Final list maximum size is old list + new list. This occurs only when
-     * old list and new list do not overlap at all, and we cannot yet destroy
-     * VBDs in the old list because the usage counts are busy.
-     */
-    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
-
-    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
-    i = j = k = 0;
-
-    while ( (i < old_nr) && (j < new_nr) )
-    {
-        if ( old_info[i].device < new_info[j].device )
-        {
-            if ( xlvbd_remove_device(old_info[i].device) != 0 )
-                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
-            i++;
-        }
-        else if ( old_info[i].device > new_info[j].device )
-        {
-            if ( xlvbd_init_device(&new_info[j]) == 0 )
-                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
-            j++;
-        }
-        else
-        {
-            if ( ((old_info[i].capacity == new_info[j].capacity) &&
-                  (old_info[i].info == new_info[j].info)) ||
-                 (xlvbd_remove_device(old_info[i].device) != 0) )
-                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
-            else if ( xlvbd_init_device(&new_info[j]) == 0 )
-                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
-            i++; j++;
-        }
-    }
-
-    for ( ; i < old_nr; i++ )
-    {
-        if ( xlvbd_remove_device(old_info[i].device) != 0 )
-            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
-    }
-
-    for ( ; j < new_nr; j++ )
-    {
-        if ( xlvbd_init_device(&new_info[j]) == 0 )
-            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
-    }
-
-    vbd_info = merged_info;
-    nr_vbds  = k;
-
-    kfree(old_info);
-    kfree(new_info);
-}
-
-
-/*
- * Set up all the linux device goop for the virtual block devices (vbd's) that 
- * xen tells us about. Note that although from xen's pov VBDs are addressed 
- * simply an opaque 16-bit device number, the domain creation tools 
- * conventionally allocate these numbers to correspond to those used by 'real' 
- * linux -- this is just for convenience as it means e.g. that the same 
- * /etc/fstab can be used when booting with or without xen.
- */
-int __init xlvbd_init(void)
-{
-    int i;
-    
-    /*
-     * If compiled as a module, we don't support unloading yet. We therefore 
-     * permanently increment the reference count to disallow it.
-     */
-    SET_MODULE_OWNER(&xlvbd_block_fops);
-    MOD_INC_USE_COUNT;
-
-    /* Initialize the global arrays. */
-    for ( i = 0; i < 256; i++ ) 
-    {
-        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
-        xlide_blksize_size[i]  = 1024;
-        xlide_hardsect_size[i] = 512;
-        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
-
-        /* from the generic scsi disk code (drivers/scsi/sd.c) */
-        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
-        xlscsi_hardsect_size[i] = 512;
-        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
-
-        /* we don't really know what to set these too since it depends */
-        xlvbd_blksize_size[i]  = 512;
-        xlvbd_hardsect_size[i] = 512;
-        xlvbd_max_sectors[i]   = 128;
-    }
-
-    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
-    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
-
-    if ( nr_vbds < 0 )
-    {
-        kfree(vbd_info);
-        vbd_info = NULL;
-        nr_vbds  = 0;
-    }
-    else
-    {
-        for ( i = 0; i < nr_vbds; i++ )
-            xlvbd_init_device(&vbd_info[i]);
-    }
-
-    return 0;
-}
-
-
-#ifdef MODULE
-module_init(xlvbd_init);
-#endif
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h

deleted file mode 100644 (file)

index 9fded2b..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/******************************************************************************
- * vblkif.h
- * 
- * Unified block-device I/O interface for Xen guest OSes.
- * 
- * Copyright (c) 2003-2004, Keir Fraser
- */
-
-#ifndef __SHARED_VBLKIF_H__
-#define __SHARED_VBLKIF_H__
-
-#define blkif_vdev_t   u16
-#define blkif_sector_t u64
-
-#define BLKIF_OP_READ      0
-#define BLKIF_OP_WRITE     1
-#define BLKIF_OP_PROBE     2
-
-/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
-#define BLKIF_RING_SIZE        64
-
-/*
- * Maximum scatter/gather segments per request.
- * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
- * NB. This could be 12 if the ring indexes weren't stored in the same page.
- */
-#define BLKIF_REQUEST_MAX_SEGMENTS 11
-
-typedef struct {
-    unsigned char  operation;        /* BLKIF_OP_???                         */
-    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
-    blkif_vdev_t   device;           /* only for read/write requests         */
-    unsigned long  id;               /* private guest value, echoed in resp  */
-    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
-    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
-    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
-} blkif_request_t;
-
-typedef struct {
-    unsigned long   id;              /* copied from request */
-    unsigned char   operation;       /* copied from request */
-    int             status;          /* BLKIF_RSP_???       */
-} blkif_response_t;
-
-#define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
-#define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
-
-/*
- * We use a special capitalised type name because it is _essential_ that all 
- * arithmetic on indexes is done on an integer type of the correct size.
- */
-typedef unsigned int BLKIF_RING_IDX;
-
-/*
- * Ring indexes are 'free running'. That is, they are not stored modulo the
- * size of the ring buffer. The following macro converts a free-running counter
- * into a value that can directly index a ring-buffer array.
- */
-#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
-
-typedef struct {
-    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
-    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
-    union {
-        blkif_request_t  req;
-        blkif_response_t resp;
-    } ring[BLKIF_RING_SIZE];
-} blkif_ring_t;
-
-
-/*
- * BLKIF_OP_PROBE:
- * The request format for a probe request is constrained as follows:
- *  @operation   == BLKIF_OP_PROBE
- *  @nr_segments == size of probe buffer in pages
- *  @device      == unused (zero)
- *  @id          == any value (echoed in response message)
- *  @sector_num  == unused (zero)
- *  @buffer_and_sects == list of page-aligned, page-sized buffers.
- *                       (i.e., nr_sects == 8).
- * 
- * The response is a list of vdisk_t elements copied into the out-of-band
- * probe buffer. On success the response status field contains the number
- * of vdisk_t elements.
- */
-
-/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
-#define VDISK_TYPE_FLOPPY  0x00
-#define VDISK_TYPE_TAPE    0x01
-#define VDISK_TYPE_CDROM   0x05
-#define VDISK_TYPE_OPTICAL 0x07
-#define VDISK_TYPE_DISK    0x20 
-
-#define VDISK_TYPE_MASK    0x3F
-#define VDISK_TYPE(_x)     ((_x) & VDISK_TYPE_MASK) 
-
-/* The top two bits of the type field encode various flags. */
-#define VDISK_FLAG_RO      0x40
-#define VDISK_FLAG_VIRT    0x80
-#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
-#define VDISK_VIRTUAL(_x)  ((_x) & VDISK_FLAG_VIRT) 
-
-typedef struct {
-    blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
-    unsigned short info;         /* Device type and flags (VDISK_*).     */
-    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
-} vdisk_t;
-
-#endif /* __SHARED_VBLKIF_H__ */
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile

deleted file mode 100644 (file)

index 20c8192..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-
-O_TARGET := drv.o
-
-subdir-y += frontend
-obj-y    += frontend/drv.o
-
-subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
-obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
-
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile

deleted file mode 100644 (file)

index 032d02d..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-O_TARGET := drv.o
-obj-y := main.o
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c

deleted file mode 100644 (file)

index b0e77ab..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/vnetif/backend/main.c
- * 
- * Back-end of the driver for virtual block devices. This portion of the
- * driver exports a 'unified' block-device interface that can be accessed
- * by any operating system that implements a compatible front end. A 
- * reference front-end implementation can be found in:
- *  arch/xen/drivers/vnetif/frontend
- * 
- * Copyright (c) 2004, K A Fraser
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-static int __init init_module(void)
-{
-    return 0;
-}
-
-static void cleanup_module(void)
-{
-}
-
-module_init(init_module);
-module_exit(cleanup_module);
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile

deleted file mode 100644 (file)

index 304c2e7..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-O_TARGET := drv.o
-obj-y := vnetif.o
-include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c

deleted file mode 100644 (file)

index d1a4b21..0000000
--- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
+++ /dev/null
@@ -1,565 +0,0 @@
-/******************************************************************************
- * vnetif.c
- * 
- * Virtual network driver for XenoLinux.
- * 
- * Copyright (c) 2002-2004, K A Fraser
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
-
-static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
-static void network_tx_buf_gc(struct net_device *dev);
-static void network_alloc_rx_buffers(struct net_device *dev);
-static void cleanup_module(void);
-
-/* Dynamically-mapped IRQs. */
-static int network_irq, debug_irq;
-
-static struct list_head dev_list;
-
-struct net_private
-{
-    struct list_head list;
-    struct net_device *dev;
-
-    struct net_device_stats stats;
-    NET_RING_IDX rx_resp_cons, tx_resp_cons;
-    unsigned int net_ring_fixmap_idx, tx_full;
-    net_ring_t  *net_ring;
-    net_idx_t   *net_idx;
-    spinlock_t   tx_lock;
-    unsigned int idx; /* Domain-specific index of this VIF. */
-
-    unsigned int rx_bufs_to_notify;
-
-#define STATE_ACTIVE    0
-#define STATE_SUSPENDED 1
-#define STATE_CLOSED    2
-    unsigned int state;
-
-    /*
-     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
-     * array is an index into a chain of free entries.
-     */
-    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
-    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
-};
-
-/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
-#define ADD_ID_TO_FREELIST(_list, _id)             \
-    (_list)[(_id)] = (_list)[0];                   \
-    (_list)[0]     = (void *)(unsigned long)(_id);
-#define GET_ID_FROM_FREELIST(_list)                \
- ({ unsigned long _id = (unsigned long)(_list)[0]; \
-    (_list)[0]  = (_list)[_id];                    \
-    (unsigned short)_id; })
-
-
-static void _dbg_network_int(struct net_device *dev)
-{
-    struct net_private *np = dev->priv;
-
-    if ( np->state == STATE_CLOSED )
-        return;
-    
-    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
-           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
-           " tx_event=0x%08x, state=%d\n",
-           np->tx_full, np->tx_resp_cons, 
-           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
-           np->net_idx->tx_event,
-           test_bit(__LINK_STATE_XOFF, &dev->state));
-    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
-           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
-           np->rx_resp_cons, np->net_idx->rx_req_prod,
-           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
-}
-
-
-static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
-{
-    struct list_head *ent;
-    struct net_private *np;
-    list_for_each ( ent, &dev_list )
-    {
-        np = list_entry(ent, struct net_private, list);
-        _dbg_network_int(np->dev);
-    }
-}
-
-
-static int network_open(struct net_device *dev)
-{
-    struct net_private *np = dev->priv;
-    netop_t netop;
-    int i, ret;
-
-    netop.cmd = NETOP_RESET_RINGS;
-    netop.vif = np->idx;
-    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
-    {
-        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
-        return ret;
-    }
-
-    netop.cmd = NETOP_GET_VIF_INFO;
-    netop.vif = np->idx;
-    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
-    {
-        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
-        return ret;
-    }
-
-    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
-
-    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
-               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
-    np->net_ring = (net_ring_t *)fix_to_virt(
-        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
-    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
-
-    np->rx_bufs_to_notify = 0;
-    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
-    memset(&np->stats, 0, sizeof(np->stats));
-    spin_lock_init(&np->tx_lock);
-    memset(np->net_ring, 0, sizeof(*np->net_ring));
-    memset(np->net_idx, 0, sizeof(*np->net_idx));
-
-    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
-        np->tx_skbs[i] = (void *)(i+1);
-    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
-        np->rx_skbs[i] = (void *)(i+1);
-
-    wmb();
-    np->state = STATE_ACTIVE;
-
-    network_alloc_rx_buffers(dev);
-
-    netif_start_queue(dev);
-
-    MOD_INC_USE_COUNT;
-
-    return 0;
-}
-
-
-static void network_tx_buf_gc(struct net_device *dev)
-{
-    NET_RING_IDX i, prod;
-    unsigned short id;
-    struct net_private *np = dev->priv;
-    struct sk_buff *skb;
-    tx_entry_t *tx_ring = np->net_ring->tx_ring;
-
-    do {
-        prod = np->net_idx->tx_resp_prod;
-
-        for ( i = np->tx_resp_cons; i != prod; i++ )
-        {
-            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
-            skb = np->tx_skbs[id];
-            ADD_ID_TO_FREELIST(np->tx_skbs, id);
-            dev_kfree_skb_any(skb);
-        }
-        
-        np->tx_resp_cons = prod;
-        
-        /*
-         * Set a new event, then check for race with update of tx_cons. Note
-         * that it is essential to schedule a callback, no matter how few
-         * buffers are pending. Even if there is space in the transmit ring,
-         * higher layers may be blocked because too much data is outstanding:
-         * in such cases notification from Xen is likely to be the only kick
-         * that we'll get.
-         */
-        np->net_idx->tx_event = 
-            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
-        mb();
-    }
-    while ( prod != np->net_idx->tx_resp_prod );
-
-    if ( np->tx_full && 
-         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
-    {
-        np->tx_full = 0;
-        if ( np->state == STATE_ACTIVE )
-            netif_wake_queue(dev);
-    }
-}
-
-
-static inline pte_t *get_ppte(void *addr)
-{
-    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
-    pgd = pgd_offset_k(   (unsigned long)addr);
-    pmd = pmd_offset(pgd, (unsigned long)addr);
-    pte = pte_offset(pmd, (unsigned long)addr);
-    return pte;
-}
-
-
-static void network_alloc_rx_buffers(struct net_device *dev)
-{
-    unsigned short id;
-    struct net_private *np = dev->priv;
-    struct sk_buff *skb;
-    netop_t netop;
-    NET_RING_IDX i = np->net_idx->rx_req_prod;
-
-    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
-         unlikely(np->state != STATE_ACTIVE) )
-        return;
-
-    do {
-        skb = dev_alloc_skb(RX_BUF_SIZE);
-        if ( unlikely(skb == NULL) )
-            break;
-
-        skb->dev = dev;
-
-        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
-            panic("alloc_skb needs to provide us page-aligned buffers.");
-
-        id = GET_ID_FROM_FREELIST(np->rx_skbs);
-        np->rx_skbs[id] = skb;
-
-        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
-        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
-            virt_to_machine(get_ppte(skb->head));
-
-        np->rx_bufs_to_notify++;
-    }
-    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
-
-    /*
-     * We may have allocated buffers which have entries outstanding in the page
-     * update queue -- make sure we flush those first!
-     */
-    flush_page_update_queue();
-
-    np->net_idx->rx_req_prod = i;
-    np->net_idx->rx_event    = np->rx_resp_cons + 1;
-        
-    /* Batch Xen notifications. */
-    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
-    {
-        netop.cmd = NETOP_PUSH_BUFFERS;
-        netop.vif = np->idx;
-        (void)HYPERVISOR_net_io_op(&netop);
-        np->rx_bufs_to_notify = 0;
-    }
-}
-
-
-static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-    unsigned short id;
-    struct net_private *np = (struct net_private *)dev->priv;
-    tx_req_entry_t *tx;
-    netop_t netop;
-    NET_RING_IDX i;
-
-    if ( unlikely(np->tx_full) )
-    {
-        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
-        netif_stop_queue(dev);
-        return -ENOBUFS;
-    }
-
-    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
-                  PAGE_SIZE) )
-    {
-        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
-        if ( unlikely(new_skb == NULL) )
-            return 1;
-        skb_put(new_skb, skb->len);
-        memcpy(new_skb->data, skb->data, skb->len);
-        dev_kfree_skb(skb);
-        skb = new_skb;
-    }   
-    
-    spin_lock_irq(&np->tx_lock);
-
-    i = np->net_idx->tx_req_prod;
-
-    id = GET_ID_FROM_FREELIST(np->tx_skbs);
-    np->tx_skbs[id] = skb;
-
-    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
-
-    tx->id   = id;
-    tx->addr = phys_to_machine(virt_to_phys(skb->data));
-    tx->size = skb->len;
-
-    wmb();
-    np->net_idx->tx_req_prod = i + 1;
-
-    network_tx_buf_gc(dev);
-
-    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
-    {
-        np->tx_full = 1;
-        netif_stop_queue(dev);
-    }
-
-    spin_unlock_irq(&np->tx_lock);
-
-    np->stats.tx_bytes += skb->len;
-    np->stats.tx_packets++;
-
-    /* Only notify Xen if there are no outstanding responses. */
-    mb();
-    if ( np->net_idx->tx_resp_prod == i )
-    {
-        netop.cmd = NETOP_PUSH_BUFFERS;
-        netop.vif = np->idx;
-        (void)HYPERVISOR_net_io_op(&netop);
-    }
-
-    return 0;
-}
-
-
-static inline void _network_interrupt(struct net_device *dev)
-{
-    struct net_private *np = dev->priv;
-    unsigned long flags;
-    struct sk_buff *skb;
-    rx_resp_entry_t *rx;
-    NET_RING_IDX i;
-
-    if ( unlikely(np->state == STATE_CLOSED) )
-        return;
-    
-    spin_lock_irqsave(&np->tx_lock, flags);
-    network_tx_buf_gc(dev);
-    spin_unlock_irqrestore(&np->tx_lock, flags);
-
- again:
-    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
-    {
-        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
-
-        skb = np->rx_skbs[rx->id];
-        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
-
-        if ( unlikely(rx->status != RING_STATUS_OK) )
-        {
-            /* Gate this error. We get a (valid) slew of them on suspend. */
-            if ( np->state == STATE_ACTIVE )
-                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
-            dev_kfree_skb_any(skb);
-            continue;
-        }
-
-        /*
-         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
-         * shared info is hidden at the back of the data area (presumably so it
-         * can be shared), but on page flip it gets very spunked.
-         */
-        atomic_set(&(skb_shinfo(skb)->dataref), 1);
-        skb_shinfo(skb)->nr_frags = 0;
-        skb_shinfo(skb)->frag_list = NULL;
-                                
-        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
-            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
-
-        skb->data = skb->tail = skb->head + rx->offset;
-        skb_put(skb, rx->size);
-        skb->protocol = eth_type_trans(skb, dev);
-
-        np->stats.rx_packets++;
-
-        np->stats.rx_bytes += rx->size;
-        netif_rx(skb);
-        dev->last_rx = jiffies;
-    }
-
-    np->rx_resp_cons = i;
-
-    network_alloc_rx_buffers(dev);
-    
-    /* Deal with hypervisor racing our resetting of rx_event. */
-    mb();
-    if ( np->net_idx->rx_resp_prod != i )
-        goto again;
-}
-
-
-static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
-{
-    struct list_head *ent;
-    struct net_private *np;
-    list_for_each ( ent, &dev_list )
-    {
-        np = list_entry(ent, struct net_private, list);
-        _network_interrupt(np->dev);
-    }
-}
-
-
-static int network_close(struct net_device *dev)
-{
-    struct net_private *np = dev->priv;
-    netop_t netop;
-
-    np->state = STATE_SUSPENDED;
-    wmb();
-
-    netif_stop_queue(np->dev);
-
-    netop.cmd = NETOP_FLUSH_BUFFERS;
-    netop.vif = np->idx;
-    (void)HYPERVISOR_net_io_op(&netop);
-
-    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
-            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
-    {
-        barrier();
-        current->state = TASK_INTERRUPTIBLE;
-        schedule_timeout(1);
-    }
-
-    wmb();
-    np->state = STATE_CLOSED;
-    wmb();
-
-    /* Now no longer safe to take interrupts for this device. */
-    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
-
-    MOD_DEC_USE_COUNT;
-
-    return 0;
-}
-
-
-static struct net_device_stats *network_get_stats(struct net_device *dev)
-{
-    struct net_private *np = (struct net_private *)dev->priv;
-    return &np->stats;
-}
-
-
-static int __init init_module(void)
-{
-#if 0
-    int i, fixmap_idx=-1, err;
-    struct net_device *dev;
-    struct net_private *np;
-    netop_t netop;
-
-    INIT_LIST_HEAD(&dev_list);
-
-    network_irq = bind_virq_to_irq(VIRQ_NET);
-    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
-
-    err = request_irq(network_irq, network_interrupt, 
-                      SA_SAMPLE_RANDOM, "network", NULL);
-    if ( err )
-    {
-        printk(KERN_WARNING "Could not allocate network interrupt\n");
-        goto fail;
-    }
-    
-    err = request_irq(debug_irq, dbg_network_int, 
-                      SA_SHIRQ, "net_dbg", &dbg_network_int);
-    if ( err )
-        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
-
-    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
-    {
-        /* If the VIF is invalid then the query hypercall will fail. */
-        netop.cmd = NETOP_GET_VIF_INFO;
-        netop.vif = i;
-        if ( HYPERVISOR_net_io_op(&netop) != 0 )
-            continue;
-
-        /* We actually only support up to 4 vifs right now. */
-        if ( ++fixmap_idx == 4 )
-            break;
-
-        dev = alloc_etherdev(sizeof(struct net_private));
-        if ( dev == NULL )
-        {
-            err = -ENOMEM;
-            goto fail;
-        }
-
-        np = dev->priv;
-        np->state               = STATE_CLOSED;
-        np->net_ring_fixmap_idx = fixmap_idx;
-        np->idx                 = i;
-
-        SET_MODULE_OWNER(dev);
-        dev->open            = network_open;
-        dev->hard_start_xmit = network_start_xmit;
-        dev->stop            = network_close;
-        dev->get_stats       = network_get_stats;
-
-        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
-
-        if ( (err = register_netdev(dev)) != 0 )
-        {
-            kfree(dev);
-            goto fail;
-        }
-
-        np->dev = dev;
-        list_add(&np->list, &dev_list);
-    }
-
-    return 0;
-
- fail:
-    cleanup_module();
-    return err;
-#endif
-    return 0;
-}
-
-
-static void cleanup_module(void)
-{
-    struct net_private *np;
-    struct net_device *dev;
-
-    while ( !list_empty(&dev_list) )
-    {
-        np = list_entry(dev_list.next, struct net_private, list);
-        list_del(&np->list);
-        dev = np->dev;
-        unregister_netdev(dev);
-        kfree(dev);
-    }
-
-    free_irq(network_irq, NULL);
-    free_irq(debug_irq, NULL);
-
-    unbind_virq_from_irq(VIRQ_NET);
-    unbind_virq_from_irq(VIRQ_DEBUG);
-}
-
-
-module_init(init_module);
-module_exit(cleanup_module);
author	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)
committer	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Mon, 26 Apr 2004 10:02:49 +0000 (10:02 +0000)
.rootkeys		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/Makefile		patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c	[new file with mode: 0644]	patch \| blob
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile	[deleted file]	patch \| blob \| history
xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c	[deleted file]	patch \| blob \| history