xc.domain_destroy ( dom=id )
sys.exit()
- # will the domain have IO privileges?
- if pci_device_list != []: io_priv = True
- else: io_priv = False
-
if restore:
- ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1, io_priv=%d )' % (builder_fn, io_priv))
+ ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn )
if ret < 0:
print "Error restoring domain"
print "Return code = " + str(ret)
sys.exit()
else:
- ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"], io_priv=%d )' % (builder_fn, io_priv) )
+ ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn )
if ret < 0:
print "Error building Linux guest OS: "
print "Return code = " + str(ret)
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn,
- int io_priv);
+ unsigned int control_evtchn);
int xc_netbsd_build(int xc_handle,
u64 domid,
const char *image_name,
const char *cmdline,
- unsigned int control_evtchn,
- int io_priv);
+ unsigned int control_evtchn);
int xc_bvtsched_global_set(int xc_handle,
unsigned long ctx_allow);
dom0_builddomain_t *builddomain,
const char *cmdline,
unsigned long shared_info_frame,
- unsigned int control_evtchn,
- int io_priv)
+ unsigned int control_evtchn)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
memset(start_info, 0, sizeof(*start_info));
start_info->nr_pages = nr_pages;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = io_priv ? SIF_PRIVILEGED : 0;
+ start_info->flags = 0;
start_info->pt_base = vpt_start;
start_info->nr_pt_frames = nr_pt_pages;
start_info->mfn_list = vphysmap_start;
const char *image_name,
const char *ramdisk_name,
const char *cmdline,
- unsigned int control_evtchn,
- int io_priv)
+ unsigned int control_evtchn)
{
dom0_op_t launch_op, op;
int initrd_fd = -1;
&vstartinfo_start, &vkern_entry,
&launch_op.u.builddomain, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn, io_priv) < 0 )
+ control_evtchn) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
dom0_builddomain_t *builddomain,
const char *cmdline,
unsigned long shared_info_frame,
- unsigned int control_evtchn,
- int io_priv)
+ unsigned int control_evtchn)
{
l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
start_info->mod_len = symtab_len;
start_info->nr_pages = tot_pages;
start_info->shared_info = shared_info_frame << PAGE_SHIFT;
- start_info->flags = io_priv ? SIF_PRIVILEGED : 0;
+ start_info->flags = 0;
start_info->domain_controller_evtchn = control_evtchn;
strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
start_info->cmd_line[MAX_CMDLINE-1] = '\0';
u64 domid,
const char *image_name,
const char *cmdline,
- unsigned int control_evtchn,
- int io_priv)
+ unsigned int control_evtchn)
{
dom0_op_t launch_op, op;
unsigned long load_addr;
&virt_startinfo_addr,
&load_addr, &launch_op.u.builddomain, cmdline,
op.u.getdomaininfo.shared_info_frame,
- control_evtchn, io_priv) < 0 )
+ control_evtchn) < 0 )
{
ERROR("Error constructing guest OS");
goto error_out;
u64 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn, io_priv = 0;
+ int control_evtchn;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", "io_priv",
- NULL };
+ "image", "ramdisk", "cmdline", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list,
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list,
&dom, &control_evtchn,
- &image, &ramdisk, &cmdline, &io_priv) )
+ &image, &ramdisk, &cmdline) )
return NULL;
if ( xc_linux_build(xc->xc_handle, dom, image,
- ramdisk, cmdline, control_evtchn, io_priv) != 0 )
+ ramdisk, cmdline, control_evtchn) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
u64 dom;
char *image, *ramdisk = NULL, *cmdline = "";
- int control_evtchn, io_priv = 0;
+ int control_evtchn;
static char *kwd_list[] = { "dom", "control_evtchn",
- "image", "ramdisk", "cmdline", "io_priv",
- NULL };
+ "image", "ramdisk", "cmdline", NULL };
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list,
&dom, &control_evtchn,
- &image, &ramdisk, &cmdline, &io_priv) )
+ &image, &ramdisk, &cmdline) )
return NULL;
if ( xc_netbsd_build(xc->xc_handle, dom, image,
- cmdline, control_evtchn, io_priv) != 0 )
+ cmdline, control_evtchn) != 0 )
return PyErr_SetFromErrno(xc_error);
Py_INCREF(zero);
" dom [long]: Identifier of domain to build into.\n"
" image [str]: Name of kernel image file. May be gzipped.\n"
" ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
- " cmdline [str, n/a]: Kernel parameters, if any.\n"
- " io_priv [boolean]: Does the domain have IO privileges?\n\n"
+ " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "netbsd_build",
"Build a new NetBSD guest OS.\n"
" dom [long]: Identifier of domain to build into.\n"
" image [str]: Name of kernel image file. May be gzipped.\n"
- " cmdline [str, n/a]: Kernel parameters, if any.\n"
- " io_priv [boolean]: Does the domain have IO privileges?\n\n"
+ " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "bvtsched_global_set",
(PyCFunction)pyxc_bvtsched_global_set,
METH_VARARGS | METH_KEYWORDS, "\n"
"Set global tuning parameters for Borrowed Virtual Time scheduler.\n"
- " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n"
+ " ctx_allow [int]: Minimal guaranteed quantum.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "bvtsched_global_get",
METH_VARARGS | METH_KEYWORDS, "\n"
"Set per-domain tuning parameters for Borrowed Virtual Time scheduler.\n"
" dom [long]: Identifier of domain to be tuned.\n"
- " mcuadv [int]: Internal BVT parameter.\n"
- " warp [int]: Internal BVT parameter.\n"
- " warpl [int]: Internal BVT parameter.\n"
- " warpu [int]: Internal BVT parameter.\n\n"
+ " mcuadv [int]: Proportional to the inverse of the domain's weight.\n"
+ " warp [int]: How far to warp domain's EVT on unblock.\n"
+ " warpl [int]: How long the domain can run warped.\n"
+ " warpu [int]: How long before the domain can warp again.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
{ "bvtsched_domain_get",
irq_guest_action_t *action;
int rc = 0;
- if ( !IS_PRIV(p) )
+ if ( !IS_CAPABLE_PHYSDEV(p) )
return -EPERM;
spin_lock_irqsave(&desc->lock, flags);
: /* no output */ \
:"r" (thread->debugreg[register]))
+
void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id();
execution_context_t *stack_ec = get_execution_context();
-
+ int i;
+
__cli();
/* Switch guest general-register state. */
}
}
+ if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) {
+ if ( next_p->io_bitmap != NULL ) {
+ /* Copy in the appropriate parts of the IO bitmap. We use the
+ * selector to copy only the interesting parts of the bitmap. */
+
+ u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */
+
+ if ( prev_p->io_bitmap != NULL)
+ {
+ old_sel = prev_p->io_bitmap_sel;
+
+ /* Replace any areas of the IO bitmap that had bits cleared. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+ memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+ }
+
+ /* Copy in any regions of the new task's bitmap that have bits
+ * clear and we haven't already dealt with. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ {
+ if ( test_bit(i, &old_sel)
+ && !test_bit(i, &next_p->io_bitmap_sel) )
+ memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+ }
+
+ tss->bitmap = IO_BITMAP_OFFSET;
+
+ } else {
+ /* In this case, we're switching FROM a task with IO port access,
+ * to a task that doesn't use the IO bitmap. We set any TSS bits
+ * that might have been cleared, ready for future use. */
+ for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+ if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+ memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+ 0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+
+ /*
+ * a bitmap offset pointing outside of the TSS limit
+ * causes a nicely controllable SIGSEGV if a process
+ * tries to use a port IO instruction. The first
+ * sys_ioperm() call sets up the bitmap properly.
+ */
+ tss->bitmap = INVALID_IO_BITMAP_OFFSET;
+ }
+ }
+
+
/* Switch page tables. */
write_ptbase(&next_p->mm);
tlb_clocktick();
void set_tss_desc(unsigned int n, void *addr)
{
- _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
+ _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 8299, 0x89);
}
void __init trap_init(void)
{
struct task_struct *p;
struct pci_dev *pdev;
- int rc = 0;
+ int i, j, rc = 0;
if ( !IS_PRIV(current) )
BUG();
return -ESRCH;
/* Make the domain privileged. */
- set_bit(PF_PRIVILEGED, &p->flags);
+ set_bit(PF_PHYSDEV, &p->flags);
/* Grant write access to the specified device. */
if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
INFO("XXX can't give access to bridge devices yet\n");
+ /* Now, setup access to the IO ports and memory regions for the device. */
+
+ if ( p->io_bitmap == NULL )
+ {
+ p->io_bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if ( p->io_bitmap == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(p->io_bitmap, 0xFF, IO_BITMAP_BYTES);
+
+ p->io_bitmap_sel = ~0ULL;
+ }
+
+ for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
+ {
+ struct resource *r = &pdev->resource[i];
+
+ if ( r->flags & IORESOURCE_IO )
+ {
+ /* Give the domain access to the IO ports it needs. Currently,
+ * this will allow all processes in that domain access to those
+ * ports as well. This will do for now, since driver domains don't
+ * run untrusted processes! */
+ INFO("Giving domain %llu IO resources (%lx - %lx) "
+ "for device %s\n", dom, r->start, r->end, pdev->slot_name);
+ for ( j = r->start; j < r->end + 1; j++ )
+ {
+ clear_bit(j, p->io_bitmap);
+ /* Record that we cleared a bit using bit n of the selector:
+ * n = (j / (4 bytes in a word * 8 bits in a byte))
+ * / number of words per selector bit
+ */
+ clear_bit((j / (8 * 4)) / IOBMP_SELBIT_LWORDS,
+ &p->io_bitmap_sel);
+ }
+ }
+ else if ( r->flags & IORESOURCE_MEM )
+ {
+ /* allow domain to map IO memory for this device */
+ INFO("Giving domain %llu memory resources (%lx - %lx) "
+ "for device %s\n", dom, r->start, r->end, pdev->slot_name);
+ for ( j = r->start; j < r->end + 1; j += PAGE_SIZE )
+ SHARE_PFN_WITH_DOMAIN(frame_table + (j >> PAGE_SHIFT), p);
+ }
+ }
+
+
out:
put_task_struct(p);
return rc;
*pdev = NULL;
- if ( !IS_PRIV(p) )
- return -EPERM; /* no pci acces permission */
+ if ( !IS_CAPABLE_PHYSDEV(p) )
+ return -EPERM; /* no pci access permission */
if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
return -EINVAL;
dev->slot_name);
}
}
+
+ set_bit(PF_PHYSDEV, &p->flags);
}
#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
/*
- * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ * Size of io_bitmap in longwords:
+ * For Xen we support the full 8kbyte IO bitmap but use the io_bitmap_sel field
+ * of the task_struct to avoid a full 8kbyte copy when switching to / from
+ * domains with bits cleared.
*/
-#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_SIZE 2048
#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4)
#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
#define INVALID_IO_BITMAP_OFFSET 0x8000
0,0,0,0,0,0, /* ds,fs,gs */ \
0,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
- {~0, } /* ioperm */ \
+ { [0 ... IO_BITMAP_SIZE] = ~0UL }, /* ioperm */ \
}
struct mm_struct {
#define PF_IDLETASK 4 /* Is this one of the per-CPU idle domains? */
#define PF_PRIVILEGED 5 /* Is this domain privileged? */
#define PF_CONSOLEWRITEBUG 6 /* Has this domain used the obsolete console? */
+#define PF_PHYSDEV 7 /* May this domain do IO to physical devices? */
#include <xen/vif.h>
#include <xen/vbd.h>
#define IS_PRIV(_p) (test_bit(PF_PRIVILEGED, &(_p)->flags))
+#define IS_CAPABLE_PHYSDEV(_p) (test_bit(PF_PHYSDEV, &(_p)->flags))
struct task_struct;
spinlock_t pcidev_lock;
struct list_head pcidev_list;
+ /* The following IO bitmap stuff is x86-dependent. */
+ u64 io_bitmap_sel; /* Selector to tell us which part of the IO bitmap are
+ * "interesting" (i.e. have clear bits) */
+
+ /* Handy macro - number of bytes of the IO bitmap, per selector bit. */
+#define IOBMP_SELBIT_LWORDS ( IO_BITMAP_SIZE / 64 )
+ unsigned long *io_bitmap; /* Pointer to task's IO bitmap or NULL */
+
unsigned long flags;
atomic_t refcnt;