#include "common.h"
-long __vbd_create(struct task_struct *p,
- unsigned short vdevice,
- unsigned char mode,
- unsigned char type)
+void vbd_create(blkif_vbd_create_t *create)
{
- vbd_t *vbd;
- rb_node_t **rb_p, *rb_parent = NULL;
- long ret = 0;
+ vbd_t *vbd;
+ rb_node_t **rb_p, *rb_parent = NULL;
+ blkif_t *blkif;
+ blkif_vdev_t vdevice = create->vdevice;
- spin_lock(&p->vbd_lock);
+ blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n",
+ create->domid, create->blkif_handle);
+ create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ spin_lock(&blkif->vbd_lock);
- rb_p = &p->vbd_rb.rb_node;
+ rb_p = &blkif->vbd_rb.rb_node;
while ( *rb_p != NULL )
{
rb_parent = *rb_p;
else
{
DPRINTK("vbd_create attempted for already existing vbd\n");
- ret = -EINVAL;
+ create->status = BLKIF_STATUS_VBD_EXISTS;
goto out;
}
}
if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
{
DPRINTK("vbd_create: out of memory\n");
- ret = -ENOMEM;
+ create->status = BLKIF_STATUS_OUT_OF_MEMORY;
goto out;
}
vbd->vdevice = vdevice;
- vbd->mode = mode;
- vbd->type = type;
+ vbd->mode = create->mode;
+ vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
vbd->extents = NULL;
rb_link_node(&vbd->rb, rb_parent, rb_p);
- rb_insert_color(&vbd->rb, &p->vbd_rb);
+ rb_insert_color(&vbd->rb, &blkif->vbd_rb);
+
+ create->status = BLKIF_STATUS_OKAY;
out:
- spin_unlock(&p->vbd_lock);
- return ret;
+ spin_unlock(&blkif->vbd_lock);
+ blkif_put(blkif);
}
-long vbd_create(vbd_create_t *create)
+/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
+void vbd_grow(blkif_vbd_grow_t *grow)
{
- struct task_struct *p;
- long rc;
-
- if ( unlikely(!IS_PRIV(current)) )
- return -EPERM;
+ blkif_t *blkif;
+ xen_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
+ blkif_vdev_t vdevice = grow->vdevice;
- if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
+ blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
+ if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_create attempted for non-existent domain %llu\n",
- create->domain);
- return -EINVAL;
+ DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n",
+ grow->domid, grow->blkif_handle);
+ grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ return;
}
- rc = __vbd_create(p, create->vdevice, create->mode,
- XD_TYPE_DISK | XD_FLAG_VIRT);
-
- put_task_struct(p);
-
- return rc;
-}
-
-
-long __vbd_grow(struct task_struct *p,
- unsigned short vdevice,
- xen_extent_t *extent)
-{
- xen_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- long ret = 0;
+ spin_lock(&blkif->vbd_lock);
- spin_lock(&p->vbd_lock);
-
- rb = p->vbd_rb.rb_node;
+ rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
- ret = -EINVAL;
+ grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
goto out;
}
if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
{
DPRINTK("vbd_grow: out of memory\n");
- ret = -ENOMEM;
+ grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
goto out;
}
- x->extent.device = extent->device;
- x->extent.start_sector = extent->start_sector;
- x->extent.nr_sectors = extent->nr_sectors;
- x->next = (xen_extent_le_t *)NULL;
+ x->extent.device = grow->extent.device;
+ x->extent.sector_start = grow->extent.sector_start;
+ x->extent.sector_length = grow->extent.sector_length;
+ x->next = (xen_extent_le_t *)NULL;
for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
continue;
*px = x;
- out:
- spin_unlock(&p->vbd_lock);
- return ret;
-}
-
-
-/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-long vbd_grow(vbd_grow_t *grow)
-{
- struct task_struct *p;
- long rc;
-
- if ( unlikely(!IS_PRIV(current)) )
- return -EPERM;
-
- if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
- {
- DPRINTK("vbd_grow: attempted for non-existent domain %llu\n",
- grow->domain);
- return -EINVAL;
- }
-
- rc = __vbd_grow(p, grow->vdevice, &grow->extent);
-
- put_task_struct(p);
+ grow->status = BLKIF_STATUS_OKAY;
- return rc;
+ out:
+ spin_unlock(&blkif->vbd_lock);
+ blkif_put(blkif);
}
-long vbd_shrink(vbd_shrink_t *shrink)
+void vbd_shrink(blkif_vbd_shrink_t *shrink)
{
- struct task_struct *p;
+ blkif_t *blkif;
xen_extent_le_t **px, *x;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- long ret = 0;
-
- if ( !IS_PRIV(current) )
- return -EPERM;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
+ blkif_vdev_t vdevice = shrink->vdevice;
- if ( (p = find_domain_by_id(shrink->domain)) == NULL )
+ blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
+ if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_shrink attempted for non-existent domain %llu\n",
- shrink->domain);
- return -EINVAL;
+ DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n",
+ shrink->domid, shrink->blkif_handle);
+ shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ return;
}
- spin_lock(&p->vbd_lock);
+ spin_lock(&blkif->vbd_lock);
- rb = p->vbd_rb.rb_node;
+ rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
- if ( shrink->vdevice < vbd->vdevice )
+ if ( vdevice < vbd->vdevice )
rb = rb->rb_left;
- else if ( shrink->vdevice > vbd->vdevice )
+ else if ( vdevice > vbd->vdevice )
rb = rb->rb_right;
else
break;
}
- if ( unlikely(vbd == NULL) ||
- unlikely(vbd->vdevice != shrink->vdevice) ||
- unlikely(vbd->extents == NULL) )
+ if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
+ {
+ shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
+ goto out;
+ }
+
+ if ( unlikely(vbd->extents == NULL) )
{
- DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n");
- ret = -EINVAL;
+ shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
goto out;
}
*px = x->next;
kfree(x);
- out:
- spin_unlock(&p->vbd_lock);
- put_task_struct(p);
- return ret;
-}
-
-
-long vbd_setextents(vbd_setextents_t *setextents)
-{
- struct task_struct *p;
- xen_extent_t e;
- xen_extent_le_t *new_extents, *x, *t;
- vbd_t *vbd = NULL;
- rb_node_t *rb;
- int i;
- long ret = 0;
-
- if ( !IS_PRIV(current) )
- return -EPERM;
-
- if ( (p = find_domain_by_id(setextents->domain)) == NULL )
- {
- DPRINTK("vbd_setextents attempted for non-existent domain %llu\n",
- setextents->domain);
- return -EINVAL;
- }
-
- spin_lock(&p->vbd_lock);
-
- rb = p->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, vbd_t, rb);
- if ( setextents->vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( setextents->vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- break;
- }
-
- if ( unlikely(vbd == NULL) ||
- unlikely(vbd->vdevice != setextents->vdevice) )
- {
- DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* Construct the new extent list. */
- new_extents = NULL;
- for ( i = setextents->nr_extents - 1; i >= 0; i-- )
- {
- if ( unlikely(copy_from_user(&e,
- &setextents->extents[i],
- sizeof(e)) != 0) )
- {
- DPRINTK("vbd_setextents: copy_from_user failed\n");
- ret = -EFAULT;
- goto free_and_out;
- }
-
- if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL))
- == NULL) )
- {
- DPRINTK("vbd_setextents: out of memory\n");
- ret = -ENOMEM;
- goto free_and_out;
- }
-
- x->extent = e;
- x->next = new_extents;
-
- new_extents = x;
- }
-
- /* Delete the old extent list _after_ successfully creating the new. */
- for ( x = vbd->extents; x != NULL; x = t )
- {
- t = x->next;
- kfree(x);
- }
-
- /* Make the new list visible. */
- vbd->extents = new_extents;
+ shrink->status = BLKIF_STATUS_OKAY;
out:
- spin_unlock(&p->vbd_lock);
- put_task_struct(p);
- return ret;
-
- free_and_out:
- /* Failed part-way through the new list. Delete all that we managed. */
- for ( x = new_extents; x != NULL; x = t )
- {
- t = x->next;
- kfree(x);
- }
- goto out;
+ spin_unlock(&blkif->vbd_lock);
+ blkif_put(blkif);
}
-long vbd_delete(vbd_delete_t *delete)
+void vbd_destroy(blkif_vbd_destroy_t *destroy)
{
- struct task_struct *p;
- vbd_t *vbd;
- rb_node_t *rb;
+ blkif_t *blkif;
+ vbd_t *vbd;
+ rb_node_t *rb;
xen_extent_le_t *x, *t;
+ blkif_vdev_t vdevice = destroy->vdevice;
- if( !IS_PRIV(current) )
- return -EPERM;
-
- if ( (p = find_domain_by_id(delete->domain)) == NULL )
+ blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
+ if ( unlikely(blkif == NULL) )
{
- DPRINTK("vbd_delete attempted for non-existent domain %llu\n",
- delete->domain);
- return -EINVAL;
+ DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n",
+ destroy->domid, destroy->blkif_handle);
+ destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
+ return;
}
- spin_lock(&p->vbd_lock);
+ spin_lock(&blkif->vbd_lock);
- rb = p->vbd_rb.rb_node;
+ rb = blkif->vbd_rb.rb_node;
while ( rb != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
- if ( delete->vdevice < vbd->vdevice )
+ if ( vdevice < vbd->vdevice )
rb = rb->rb_left;
- else if ( delete->vdevice > vbd->vdevice )
+ else if ( vdevice > vbd->vdevice )
rb = rb->rb_right;
else
goto found;
}
- DPRINTK("vbd_delete attempted for non-existing VBD.\n");
-
- spin_unlock(&p->vbd_lock);
- put_task_struct(p);
- return -EINVAL;
+ destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
+ goto out;
found:
- rb_erase(rb, &p->vbd_rb);
+ rb_erase(rb, &blkif->vbd_rb);
x = vbd->extents;
kfree(vbd);
x = t;
}
- spin_unlock(&p->vbd_lock);
- put_task_struct(p);
- return 0;
+ out:
+ spin_unlock(&blkif->vbd_lock);
+ blkif_put(blkif);
}
-void destroy_all_vbds(struct task_struct *p)
+void destroy_all_vbds(blkif_t *blkif)
{
vbd_t *vbd;
rb_node_t *rb;
xen_extent_le_t *x, *t;
- spin_lock(&p->vbd_lock);
+ spin_lock(&blkif->vbd_lock);
- while ( (rb = p->vbd_rb.rb_node) != NULL )
+ while ( (rb = blkif->vbd_rb.rb_node) != NULL )
{
vbd = rb_entry(rb, vbd_t, rb);
- rb_erase(rb, &p->vbd_rb);
+ rb_erase(rb, &blkif->vbd_rb);
x = vbd->extents;
kfree(vbd);
}
}
- spin_unlock(&p->vbd_lock);
+ spin_unlock(&blkif->vbd_lock);
}
--- /dev/null
+/******************************************************************************
+ * vblkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __SHARED_VBLKIF_H__
+#define __SHARED_VBLKIF_H__
+
+#define blkif_vdev_t u16
+#define blkif_sector_t u64
+
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+#define BLKIF_OP_PROBE 2
+
+/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
+#define BLKIF_RING_SIZE 64
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_REQUEST_MAX_SEGMENTS 11
+
+typedef struct {
+ unsigned char operation; /* BLKIF_OP_??? */
+ unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */
+ blkif_vdev_t device; /* only for read/write requests */
+ unsigned long id; /* private guest value, echoed in resp */
+ xen_sector_t sector_number; /* start sector idx on disk (r/w only) */
+ /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */
+ unsigned long buffer_and_sects[MAX_BLK_SEGS];
+} blkif_request_t;
+
+typedef struct {
+ unsigned long id; /* copied from request */
+ unsigned char operation; /* copied from request */
+ int status; /* BLKIF_RSP_??? */
+} blkif_response_t;
+
+#define BLKIF_RSP_ERROR -1 /* non-specific 'error' */
+#define BLKIF_RSP_OKAY 0 /* non-specific 'okay' */
+
+/*
+ * We use a special capitalised type name because it is _essential_ that all
+ * arithmetic on indexes is done on an integer type of the correct size.
+ */
+typedef unsigned int BLKIF_RING_IDX;
+
+/*
+ * Ring indexes are 'free running'. That is, they are not stored modulo the
+ * size of the ring buffer. The following macro converts a free-running counter
+ * into a value that can directly index a ring-buffer array.
+ */
+#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
+
+typedef struct {
+ BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */
+ BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */
+ union {
+ blkif_request_t req;
+ blkif_response_t resp;
+ } ring[BLKIF_RING_SIZE];
+} blkif_ring_t;
+
+
+/*
+ * BLKIF_OP_PROBE:
+ * The request format for a probe request is constrained as follows:
+ * @operation == BLKIF_OP_PROBE
+ * @nr_segments == size of probe buffer in pages
+ * @device == unused (zero)
+ * @id == any value (echoed in response message)
+ * @sector_num == unused (zero)
+ * @buffer_and_sects == list of page-aligned, page-sized buffers.
+ * (i.e., nr_sects == 8).
+ *
+ * The response is a list of vdisk_t elements copied into the out-of-band
+ * probe buffer. On success the response status field contains the number
+ * of vdisk_t elements.
+ */
+
+/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
+#define VDISK_TYPE_FLOPPY 0x00
+#define VDISK_TYPE_TAPE 0x01
+#define VDISK_TYPE_CDROM 0x05
+#define VDISK_TYPE_OPTICAL 0x07
+#define VDISK_TYPE_DISK 0x20
+
+#define VDISK_TYPE_MASK 0x3F
+#define VDISK_TYPE(_x) ((_x) & VDISK_TYPE_MASK)
+
+/* The top two bits of the type field encode various flags. */
+#define VDISK_FLAG_RO 0x40
+#define VDISK_FLAG_VIRT 0x80
+#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
+#define VDISK_VIRTUAL(_x) ((_x) & VDISK_FLAG_VIRT)
+
+typedef struct {
+ blkif_vdev_t device; /* Device number (opaque 16 bit value). */
+ unsigned short info; /* Device type and flags (VDISK_*). */
+ xen_sector_t capacity; /* Size in terms of 512-byte sectors. */
+} vdisk_t;
+
+#endif /* __SHARED_VBLKIF_H__ */