bitkeeper revision 1.1236.25.9 (42335c497Bt0QbOvYK3fGa02eZ4_Sw)
authorcl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Sat, 12 Mar 2005 21:16:57 +0000 (21:16 +0000)
committercl349@firebug.cl.cam.ac.uk <cl349@firebug.cl.cam.ac.uk>
Sat, 12 Mar 2005 21:16:57 +0000 (21:16 +0000)
Update to Linux 2.6.11.
Signed-off-by: Christian Limpach <chris@xensource.com>
39 files changed:
.rootkeys
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c [deleted file]
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c [deleted file]
linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c [deleted file]
linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c [deleted file]
linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h [deleted file]
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h [deleted file]
linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h [deleted file]
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c [new file with mode: 0644]
linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h [new file with mode: 0644]
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h [new file with mode: 0644]
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h [new file with mode: 0644]

index be7ae7e789382b1bc5810a40b8d25ba1abc982c7..6f0b7f4aeac8c5a08ac2a7ceb656392d13c469e3 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.29-xen-sparse/mm/swapfile.c
 41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.29-xen-sparse/mm/vmalloc.c
 41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.29-xen-sparse/net/core/skbuff.c
-41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
-41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
-41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
-418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
-41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
-41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
-41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
-41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
-41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
-41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
-41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h
-41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c
-41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c
-41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c
-41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c
-41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h
-41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
-4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
 40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.11-xen-sparse/arch/xen/Kconfig
 40f56237utH41NPukqHksuNf29IC9A linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers
 40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.11-xen-sparse/arch/xen/Makefile
 40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
 40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
 40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
+41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
 40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
 41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c
 4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
 40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
 40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
 40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c
+41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
+41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
 40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
 40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile
 40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
 412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
 414c113396tK1HTVeUalm3u-1DF16g linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c
+418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
 3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c
 41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.11-xen-sparse/drivers/Makefile
 4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.11-xen-sparse/drivers/char/mem.c
 40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
 40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
 40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
+41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
+41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
+41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
+41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
+41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
+41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
 40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.11-xen-sparse/drivers/xen/console/Makefile
 3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.11-xen-sparse/drivers/xen/console/console.c
 40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile
 405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
 4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile
 3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
+41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
+41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
+41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
+41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
+41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
+41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
 412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h
 40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
 4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
 40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
 40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
 40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
+41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
 4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
 40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
 40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h
 412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h
 40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
 40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h
+4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
 40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
deleted file mode 100644 (file)
index 6cd16cc..0000000
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- *     linux/arch/i386/kernel/irq.c
- *
- *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the lowest level x86-specific interrupt
- * entry, irq-stacks and irq statistics code. All the remaining
- * irq logic is done by the generic kernel/irq/ code and
- * by the x86-specific irq controller code. (e.g. i8259.c and
- * io_apic.c.)
- */
-
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-
-#ifndef CONFIG_X86_LOCAL_APIC
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
-       printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-#endif
-
-#ifdef CONFIG_4KSTACKS
-/*
- * per-CPU IRQ handling contexts (thread information and stack)
- */
-union irq_ctx {
-       struct thread_info      tinfo;
-       u32                     stack[THREAD_SIZE/sizeof(u32)];
-};
-
-static union irq_ctx *hardirq_ctx[NR_CPUS];
-static union irq_ctx *softirq_ctx[NR_CPUS];
-#endif
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
-{      
-       /* high bits used in ret_from_ code */
-       int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
-#ifdef CONFIG_4KSTACKS
-       union irq_ctx *curctx, *irqctx;
-       u32 *isp;
-#endif
-
-       irq_enter();
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-       /* Debugging check for stack overflow: is there less than 1KB free? */
-       {
-               long esp;
-
-               __asm__ __volatile__("andl %%esp,%0" :
-                                       "=r" (esp) : "0" (THREAD_SIZE - 1));
-               if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
-                       printk("do_IRQ: stack overflow: %ld\n",
-                               esp - sizeof(struct thread_info));
-                       dump_stack();
-               }
-       }
-#endif
-
-#ifdef CONFIG_4KSTACKS
-
-       curctx = (union irq_ctx *) current_thread_info();
-       irqctx = hardirq_ctx[smp_processor_id()];
-
-       /*
-        * this is where we switch to the IRQ stack. However, if we are
-        * already using the IRQ stack (because we interrupted a hardirq
-        * handler) we can't do that and just have to keep using the
-        * current stack (which is the irq stack already after all)
-        */
-       if (curctx != irqctx) {
-               int arg1, arg2, ebx;
-
-               /* build the stack frame on the IRQ stack */
-               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
-               irqctx->tinfo.task = curctx->tinfo.task;
-               irqctx->tinfo.previous_esp = current_stack_pointer;
-
-               asm volatile(
-                       "       xchgl   %%ebx,%%esp      \n"
-                       "       call    __do_IRQ         \n"
-                       "       movl   %%ebx,%%esp      \n"
-                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
-                       :  "0" (irq),   "1" (regs),  "2" (isp)
-                       : "memory", "cc", "ecx"
-               );
-       } else
-#endif
-               __do_IRQ(irq, regs);
-
-       irq_exit();
-
-       return 1;
-}
-
-#ifdef CONFIG_4KSTACKS
-
-/*
- * These should really be __section__(".bss.page_aligned") as well, but
- * gcc's 3.0 and earlier don't handle that correctly.
- */
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
-               __attribute__((__aligned__(THREAD_SIZE)));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
-               __attribute__((__aligned__(THREAD_SIZE)));
-
-/*
- * allocate per-cpu stacks for hardirq and for softirq processing
- */
-void irq_ctx_init(int cpu)
-{
-       union irq_ctx *irqctx;
-
-       if (hardirq_ctx[cpu])
-               return;
-
-       irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
-       irqctx->tinfo.task              = NULL;
-       irqctx->tinfo.exec_domain       = NULL;
-       irqctx->tinfo.cpu               = cpu;
-       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
-       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
-
-       hardirq_ctx[cpu] = irqctx;
-
-       irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
-       irqctx->tinfo.task              = NULL;
-       irqctx->tinfo.exec_domain       = NULL;
-       irqctx->tinfo.cpu               = cpu;
-       irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
-       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
-
-       softirq_ctx[cpu] = irqctx;
-
-       printk("CPU %u irqstacks, hard=%p soft=%p\n",
-               cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
-}
-
-extern asmlinkage void __do_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
-       unsigned long flags;
-       struct thread_info *curctx;
-       union irq_ctx *irqctx;
-       u32 *isp;
-
-       if (in_interrupt())
-               return;
-
-       local_irq_save(flags);
-
-       if (local_softirq_pending()) {
-               curctx = current_thread_info();
-               irqctx = softirq_ctx[smp_processor_id()];
-               irqctx->tinfo.task = curctx->task;
-               irqctx->tinfo.previous_esp = current_stack_pointer;
-
-               /* build the stack frame on the softirq stack */
-               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
-
-               asm volatile(
-                       "       xchgl   %%ebx,%%esp     \n"
-                       "       call    __do_softirq    \n"
-                       "       movl    %%ebx,%%esp     \n"
-                       : "=b"(isp)
-                       : "0"(isp)
-                       : "memory", "cc", "edx", "ecx", "eax"
-               );
-       }
-
-       local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(do_softirq);
-#endif
-
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-       int i = *(loff_t *) v, j;
-       struct irqaction * action;
-       unsigned long flags;
-
-       if (i == 0) {
-               seq_printf(p, "           ");
-               for (j=0; j<NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "CPU%d       ",j);
-               seq_putc(p, '\n');
-       }
-
-       if (i < NR_IRQS) {
-               spin_lock_irqsave(&irq_desc[i].lock, flags);
-               action = irq_desc[i].action;
-               if (!action)
-                       goto skip;
-               seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-               seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
-               seq_printf(p, " %14s", irq_desc[i].handler->typename);
-               seq_printf(p, "  %s", action->name);
-
-               for (action=action->next; action; action = action->next)
-                       seq_printf(p, ", %s", action->name);
-
-               seq_putc(p, '\n');
-skip:
-               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-       } else if (i == NR_IRQS) {
-               seq_printf(p, "NMI: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ", nmi_count(j));
-               seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
-               seq_printf(p, "LOC: ");
-               for (j = 0; j < NR_CPUS; j++)
-                       if (cpu_online(j))
-                               seq_printf(p, "%10u ",
-                                       irq_stat[j].apic_timer_irqs);
-               seq_putc(p, '\n');
-#endif
-               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
-               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
-       }
-       return 0;
-}
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
deleted file mode 100644 (file)
index 9fabbfe..0000000
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- *     Intel SMP support routines.
- *
- *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     This code is released under the GNU General Public License version 2 or
- *     later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/irq.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#if 0
-#include <mach_apic.h>
-#endif
-#include <asm-xen/evtchn.h>
-
-#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
-
-/*
- *     Some notes on x86 processor bugs affecting SMP operation:
- *
- *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- *     The Linux implications for SMP are handled as follows:
- *
- *     Pentium III / [Xeon]
- *             None of the E1AP-E3AP errata are visible to the user.
- *
- *     E1AP.   see PII A1AP
- *     E2AP.   see PII A2AP
- *     E3AP.   see PII A3AP
- *
- *     Pentium II / [Xeon]
- *             None of the A1AP-A3AP errata are visible to the user.
- *
- *     A1AP.   see PPro 1AP
- *     A2AP.   see PPro 2AP
- *     A3AP.   see PPro 7AP
- *
- *     Pentium Pro
- *             None of 1AP-9AP errata are visible to the normal user,
- *     except occasional delivery of 'spurious interrupt' as trap #15.
- *     This is very rare and a non-problem.
- *
- *     1AP.    Linux maps APIC as non-cacheable
- *     2AP.    worked around in hardware
- *     3AP.    fixed in C0 and above steppings microcode update.
- *             Linux does not use excessive STARTUP_IPIs.
- *     4AP.    worked around in hardware
- *     5AP.    symmetric IO mode (normal Linux operation) not affected.
- *             'noapic' mode has vector 0xf filled out properly.
- *     6AP.    'noapic' mode might be affected - fixed in later steppings
- *     7AP.    We do not assume writes to the LVT deassering IRQs
- *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
- *     9AP.    We do not use mixed mode
- *
- *     Pentium
- *             There is a marginal case where REP MOVS on 100MHz SMP
- *     machines with B stepping processors can fail. XXX should provide
- *     an L1cache=Writethrough or L1cache=off option.
- *
- *             B stepping CPUs may hang. There are hardware work arounds
- *     for this. We warn about it in case your board doesn't have the work
- *     arounds. Basically thats so I can tell anyone with a B stepping
- *     CPU and SMP problems "tough".
- *
- *     Specific items [From Pentium Processor Specification Update]
- *
- *     1AP.    Linux doesn't use remote read
- *     2AP.    Linux doesn't trust APIC errors
- *     3AP.    We work around this
- *     4AP.    Linux never generated 3 interrupts of the same priority
- *             to cause a lost local interrupt.
- *     5AP.    Remote read is never used
- *     6AP.    not affected - worked around in hardware
- *     7AP.    not affected - worked around in hardware
- *     8AP.    worked around in hardware - we get explicit CS errors if not
- *     9AP.    only 'noapic' mode affected. Might generate spurious
- *             interrupts, we log only the first one and count the
- *             rest silently.
- *     10AP.   not affected - worked around in hardware
- *     11AP.   Linux reads the APIC between writes to avoid this, as per
- *             the documentation. Make sure you preserve this as it affects
- *             the C stepping chips too.
- *     12AP.   not affected - worked around in hardware
- *     13AP.   not affected - worked around in hardware
- *     14AP.   we always deassert INIT during bootup
- *     15AP.   not affected - worked around in hardware
- *     16AP.   not affected - worked around in hardware
- *     17AP.   not affected - worked around in hardware
- *     18AP.   not affected - worked around in hardware
- *     19AP.   not affected - worked around in BIOS
- *
- *     If this sounds worrying believe me these bugs are either ___RARE___,
- *     or are signal timing bugs worked around in hardware and there's
- *     about nothing of note with C stepping upwards.
- */
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
-       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
-       return SET_APIC_DEST_FIELD(mask);
-}
-
-DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
-
-static inline void __send_IPI_one(unsigned int cpu, int vector)
-{
-       unsigned int evtchn;
-
-       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
-       // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
-       if (evtchn) {
-#if 0
-               shared_info_t *s = HYPERVISOR_shared_info;
-               while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
-                      synch_test_bit(evtchn, &s->evtchn_mask[0]))
-                       ;
-#endif
-               notify_via_evtchn(evtchn);
-       } else
-               printk("send_IPI to unbound port %d/%d",
-                      cpu, vector);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
-       int cpu;
-
-       switch (shortcut) {
-       case APIC_DEST_SELF:
-               __send_IPI_one(smp_processor_id(), vector);
-               break;
-       case APIC_DEST_ALLBUT:
-               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-                       if (cpu == smp_processor_id())
-                               continue;
-                       if (cpu_isset(cpu, cpu_online_map)) {
-                               __send_IPI_one(cpu, vector);
-                       }
-               }
-               break;
-       default:
-               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
-                      vector);
-               break;
-       }
-}
-
-void fastcall send_IPI_self(int vector)
-{
-       __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(cpumask_t mask, int vector)
-{
-       unsigned long flags;
-       unsigned int cpu;
-
-       local_irq_save(flags);
-
-       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-               if (cpu_isset(cpu, mask)) {
-                       __send_IPI_one(cpu, vector);
-               }
-       }
-
-       local_irq_restore(flags);
-}
-
-inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
-{
-
-       send_IPI_mask_bitmask(mask, vector);
-}
-
-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-
-/*
- *     Smarter SMP flushing macros. 
- *             c/o Linus Torvalds.
- *
- *     These mean you can really definitely utterly forget about
- *     writing to user space from interrupts. (Its not allowed anyway).
- *
- *     Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
-#define FLUSH_ALL      0xffffffff
-
-/*
- * We cannot call mmdrop() because we are in interrupt context, 
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-static inline void leave_mm (unsigned long cpu)
-{
-       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-               BUG();
-       cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-       load_cr3(swapper_pg_dir);
-}
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *     Stop ipi delivery for the old mm. This is not synchronized with
- *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
- *     for the wrong mm, and in the worst case we perform a superflous
- *     tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *     was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- *     Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- *     Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *     flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *     Atomically set the bit [other cpus will start sending flush ipis],
- *     and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
-                                    struct pt_regs *regs)
-{
-       unsigned long cpu;
-
-       cpu = get_cpu();
-
-       if (!cpu_isset(cpu, flush_cpumask))
-               goto out;
-               /* 
-                * This was a BUG() but until someone can quote me the
-                * line from the intel manual that guarantees an IPI to
-                * multiple CPUs is retried _only_ on the erroring CPUs
-                * its staying as a return
-                *
-                * BUG();
-                */
-                
-       if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-               if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-                       if (flush_va == FLUSH_ALL)
-                               local_flush_tlb();
-                       else
-                               __flush_tlb_one(flush_va);
-               } else
-                       leave_mm(cpu);
-       }
-       smp_mb__before_clear_bit();
-       cpu_clear(cpu, flush_cpumask);
-       smp_mb__after_clear_bit();
-out:
-       put_cpu_no_resched();
-
-       return IRQ_HANDLED;
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
-                                               unsigned long va)
-{
-       cpumask_t tmp;
-       /*
-        * A couple of (to be removed) sanity checks:
-        *
-        * - we do not send IPIs to not-yet booted CPUs.
-        * - current CPU must not be in mask
-        * - mask must exist :)
-        */
-       BUG_ON(cpus_empty(cpumask));
-
-       cpus_and(tmp, cpumask, cpu_online_map);
-       BUG_ON(!cpus_equal(cpumask, tmp));
-       BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-       BUG_ON(!mm);
-
-       /*
-        * i'm not happy about this global shared spinlock in the
-        * MM hot path, but we'll see how contended it is.
-        * Temporarily this turns IRQs off, so that lockups are
-        * detected by the NMI watchdog.
-        */
-       spin_lock(&tlbstate_lock);
-       
-       flush_mm = mm;
-       flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
-       atomic_set_mask(cpumask, &flush_cpumask);
-#else
-       {
-               int k;
-               unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
-               unsigned long *cpu_mask = (unsigned long *)&cpumask;
-               for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
-                       atomic_set_mask(cpu_mask[k], &flush_mask[k]);
-       }
-#endif
-       /*
-        * We have to send the IPI only to
-        * CPUs affected.
-        */
-       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
-       while (!cpus_empty(flush_cpumask))
-               /* nothing. lockup detection does not belong here */
-               mb();
-
-       flush_mm = NULL;
-       flush_va = 0;
-       spin_unlock(&tlbstate_lock);
-}
-       
-void flush_tlb_current_task(void)
-{
-       struct mm_struct *mm = current->mm;
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       local_flush_tlb();
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-       preempt_enable();
-}
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       if (current->active_mm == mm) {
-               if (current->mm)
-                       local_flush_tlb();
-               else
-                       leave_mm(smp_processor_id());
-       }
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
-       preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       cpumask_t cpu_mask;
-
-       preempt_disable();
-       cpu_mask = mm->cpu_vm_mask;
-       cpu_clear(smp_processor_id(), cpu_mask);
-
-       if (current->active_mm == mm) {
-               if(current->mm)
-                       __flush_tlb_one(va);
-               else
-                       leave_mm(smp_processor_id());
-       }
-
-       if (!cpus_empty(cpu_mask))
-               flush_tlb_others(cpu_mask, mm, va);
-
-       preempt_enable();
-}
-
-static void do_flush_tlb_all(void* info)
-{
-       unsigned long cpu = smp_processor_id();
-
-       __flush_tlb_all();
-       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-               leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-void smp_send_reschedule(int cpu)
-{
-       send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
-
-struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       atomic_t started;
-       atomic_t finished;
-       int wait;
-};
-
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-                       int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-{
-       struct call_data_struct data;
-       int cpus = num_online_cpus()-1;
-
-       if (!cpus)
-               return 0;
-
-       /* Can deadlock when called with interrupts disabled */
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       atomic_set(&data.started, 0);
-       data.wait = wait;
-       if (wait)
-               atomic_set(&data.finished, 0);
-
-       spin_lock(&call_lock);
-       call_data = &data;
-       mb();
-       
-       /* Send a message to all other CPUs and wait for them to respond */
-       send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-       /* Wait for response */
-       while (atomic_read(&data.started) != cpus)
-               barrier();
-
-       if (wait)
-               while (atomic_read(&data.finished) != cpus)
-                       barrier();
-       spin_unlock(&call_lock);
-
-       return 0;
-}
-
-static void stop_this_cpu (void * dummy)
-{
-       /*
-        * Remove this CPU:
-        */
-       cpu_clear(smp_processor_id(), cpu_online_map);
-       local_irq_disable();
-#if 1
-       xxprint("stop_this_cpu disable_local_APIC\n");
-#else
-       disable_local_APIC();
-#endif
-       if (cpu_data[smp_processor_id()].hlt_works_ok)
-               for(;;) __asm__("hlt");
-       for (;;);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-void smp_send_stop(void)
-{
-       smp_call_function(stop_this_cpu, NULL, 1, 0);
-
-       local_irq_disable();
-#if 1
-       xxprint("smp_send_stop disable_local_APIC\n");
-#else
-       disable_local_APIC();
-#endif
-       local_irq_enable();
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
-                                    struct pt_regs *regs)
-{
-
-       return IRQ_HANDLED;
-}
-
-#include <linux/kallsyms.h>
-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
-                                       struct pt_regs *regs)
-{
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       int wait = call_data->wait;
-
-       /*
-        * Notify initiating CPU that I've grabbed the data and am
-        * about to execute the function
-        */
-       mb();
-       atomic_inc(&call_data->started);
-       /*
-        * At this point the info structure may be out of scope unless wait==1
-        */
-       irq_enter();
-       (*func)(info);
-       irq_exit();
-
-       if (wait) {
-               mb();
-               atomic_inc(&call_data->finished);
-       }
-
-       return IRQ_HANDLED;
-}
-
diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
deleted file mode 100644 (file)
index 4dd03ba..0000000
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- *     x86 SMP booting functions
- *
- *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     Much of the core SMP work is based on previous work by Thomas Radke, to
- *     whom a great many thanks are extended.
- *
- *     Thanks to Intel for making available several different Pentium,
- *     Pentium Pro and Pentium-II/Xeon MP machines.
- *     Original development of Linux SMP code supported by Caldera.
- *
- *     This code is released under the GNU General Public License version 2 or
- *     later.
- *
- *     Fixes
- *             Felix Koop      :       NR_CPUS used properly
- *             Jose Renau      :       Handle single CPU case.
- *             Alan Cox        :       By repeated request 8) - Total BogoMIPS report.
- *             Greg Wright     :       Fix for kernel stacks panic.
- *             Erich Boleyn    :       MP v1.4 and additional changes.
- *     Matthias Sattler        :       Changes for 2.1 kernel map.
- *     Michel Lespinasse       :       Changes for 2.1 kernel map.
- *     Michael Chastain        :       Change trampoline.S to gnu as.
- *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
- *             Ingo Molnar     :       Added APIC timers, based on code
- *                                     from Jose Renau
- *             Ingo Molnar     :       various cleanups and rewrites
- *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
- *             Martin J. Bligh :       Added support for multi-quad systems
- *             Dave Jones      :       Report invalid combinations of Athlon CPUs.
-*              Rusty Russell   :       Hacked into shape for new "hotplug" boot process. */
-
-#include <linux/module.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/irq.h>
-#include <linux/bootmem.h>
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-
-#if 1
-#define Dprintk(args...)
-#else
-#include <mach_apic.h>
-#endif
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
-
-/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
-
-/* bitmap of online cpus */
-cpumask_t cpu_online_map;
-
-static cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-static cpumask_t smp_commenced_mask;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-u8 x86_cpu_to_apicid[NR_CPUS] =
-                       { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-#if 0
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end  [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __init setup_trampoline(void)
-{
-       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
-       return virt_to_phys(trampoline_base);
-}
-#endif
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-#if 1
-       int cpu;
-
-       for (cpu = 1; cpu < NR_CPUS; cpu++) {
-               cpu_gdt_descr[cpu].address = (unsigned long)
-                       alloc_bootmem_low_pages(PAGE_SIZE);
-               /* XXX free unused pages later */
-       }
-#else
-       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
-       /*
-        * Has to be in very low memory so we can execute
-        * real-mode AP code.
-        */
-       if (__pa(trampoline_base) >= 0x9F000)
-               BUG();
-       /*
-        * Make the SMP trampoline executable:
-        */
-       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-#endif
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __init smp_store_cpu_info(int id)
-{
-       struct cpuinfo_x86 *c = cpu_data + id;
-
-       *c = boot_cpu_data;
-       if (id!=0)
-               identify_cpu(c);
-       /*
-        * Mask B, Pentium, but not Pentium MMX
-        */
-       if (c->x86_vendor == X86_VENDOR_INTEL &&
-           c->x86 == 5 &&
-           c->x86_mask >= 1 && c->x86_mask <= 4 &&
-           c->x86_model <= 3)
-               /*
-                * Remember we have B step Pentia with bugs
-                */
-               smp_b_stepping = 1;
-
-       /*
-        * Certain Athlons might work (for various values of 'work') in SMP
-        * but they are not certified as MP capable.
-        */
-       if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
-               /* Athlon 660/661 is valid. */  
-               if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
-                       goto valid_k7;
-
-               /* Duron 670 is valid */
-               if ((c->x86_model==7) && (c->x86_mask==0))
-                       goto valid_k7;
-
-               /*
-                * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
-                * It's worth noting that the A5 stepping (662) of some Athlon XP's
-                * have the MP bit set.
-                * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
-                */
-               if (((c->x86_model==6) && (c->x86_mask>=2)) ||
-                   ((c->x86_model==7) && (c->x86_mask>=1)) ||
-                    (c->x86_model> 7))
-                       if (cpu_has_mp)
-                               goto valid_k7;
-
-               /* If we get here, it's not a certified SMP capable AMD system. */
-               tainted |= TAINT_UNSAFE_SMP;
-       }
-
-valid_k7:
-       ;
-}
-
-#if 0
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
-{
-       int i;
-       unsigned long long t0;
-       unsigned long long sum, avg;
-       long long delta;
-       unsigned long one_usec;
-       int buggy = 0;
-
-       printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
-       /* convert from kcyc/sec to cyc/usec */
-       one_usec = cpu_khz / 1000;
-
-       atomic_set(&tsc_start_flag, 1);
-       wmb();
-
-       /*
-        * We loop a few times to get a primed instruction cache,
-        * then the last pass is more or less synchronized and
-        * the BP and APs set their cycle counters to zero all at
-        * once. This reduces the chance of having random offsets
-        * between the processors, and guarantees that the maximum
-        * delay between the cycle counters is never bigger than
-        * the latency of information-passing (cachelines) between
-        * two CPUs.
-        */
-       for (i = 0; i < NR_LOOPS; i++) {
-               /*
-                * all APs synchronize but they loop on '== num_cpus'
-                */
-               while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
-                       mb();
-               atomic_set(&tsc_count_stop, 0);
-               wmb();
-               /*
-                * this lets the APs save their current TSC:
-                */
-               atomic_inc(&tsc_count_start);
-
-               rdtscll(tsc_values[smp_processor_id()]);
-               /*
-                * We clear the TSC in the last loop:
-                */
-               if (i == NR_LOOPS-1)
-                       write_tsc(0, 0);
-
-               /*
-                * Wait for all APs to leave the synchronization point:
-                */
-               while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
-                       mb();
-               atomic_set(&tsc_count_start, 0);
-               wmb();
-               atomic_inc(&tsc_count_stop);
-       }
-
-       sum = 0;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (cpu_isset(i, cpu_callout_map)) {
-                       t0 = tsc_values[i];
-                       sum += t0;
-               }
-       }
-       avg = sum;
-       do_div(avg, num_booting_cpus());
-
-       sum = 0;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_isset(i, cpu_callout_map))
-                       continue;
-               delta = tsc_values[i] - avg;
-               if (delta < 0)
-                       delta = -delta;
-               /*
-                * We report bigger than 2 microseconds clock differences.
-                */
-               if (delta > 2*one_usec) {
-                       long realdelta;
-                       if (!buggy) {
-                               buggy = 1;
-                               printk("\n");
-                       }
-                       realdelta = delta;
-                       do_div(realdelta, one_usec);
-                       if (tsc_values[i] < avg)
-                               realdelta = -realdelta;
-
-                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
-               }
-
-               sum += delta;
-       }
-       if (!buggy)
-               printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
-       int i;
-
-       /*
-        * Not every cpu is online at the time
-        * this gets called, so we first wait for the BP to
-        * finish SMP initialization:
-        */
-       while (!atomic_read(&tsc_start_flag)) mb();
-
-       for (i = 0; i < NR_LOOPS; i++) {
-               atomic_inc(&tsc_count_start);
-               while (atomic_read(&tsc_count_start) != num_booting_cpus())
-                       mb();
-
-               rdtscll(tsc_values[smp_processor_id()]);
-               if (i == NR_LOOPS-1)
-                       write_tsc(0, 0);
-
-               atomic_inc(&tsc_count_stop);
-               while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
-       }
-}
-#undef NR_LOOPS
-#endif
-
-extern void calibrate_delay(void);
-
-static atomic_t init_deasserted;
-
-void __init smp_callin(void)
-{
-       int cpuid, phys_id;
-       unsigned long timeout;
-
-#if 0
-       /*
-        * If waken up by an INIT in an 82489DX configuration
-        * we may get here before an INIT-deassert IPI reaches
-        * our local APIC.  We have to wait for the IPI or we'll
-        * lock up on an APIC access.
-        */
-       wait_for_init_deassert(&init_deasserted);
-#endif
-
-       /*
-        * (This works even if the APIC is not enabled.)
-        */
-       phys_id = smp_processor_id();
-       cpuid = smp_processor_id();
-       if (cpu_isset(cpuid, cpu_callin_map)) {
-               printk("huh, phys CPU#%d, CPU#%d already present??\n",
-                                       phys_id, cpuid);
-               BUG();
-       }
-       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
-       /*
-        * STARTUP IPIs are fragile beasts as they might sometimes
-        * trigger some glue motherboard logic. Complete APIC bus
-        * silence for 1 second, this overestimates the time the
-        * boot CPU is spending to send the up to 2 STARTUP IPIs
-        * by a factor of two. This should be enough.
-        */
-
-       /*
-        * Waiting 2s total for startup (udelay is not yet working)
-        */
-       timeout = jiffies + 2*HZ;
-       while (time_before(jiffies, timeout)) {
-               /*
-                * Has the boot CPU finished it's STARTUP sequence?
-                */
-               if (cpu_isset(cpuid, cpu_callout_map))
-                       break;
-               rep_nop();
-       }
-
-       if (!time_before(jiffies, timeout)) {
-               printk("BUG: CPU%d started up but did not get a callout!\n",
-                       cpuid);
-               BUG();
-       }
-
-#if 0
-       /*
-        * the boot CPU has finished the init stage and is spinning
-        * on callin_map until we finish. We are free to set up this
-        * CPU, first the APIC. (this is probably redundant on most
-        * boards)
-        */
-
-       Dprintk("CALLIN, before setup_local_APIC().\n");
-       smp_callin_clear_local_apic();
-       setup_local_APIC();
-#endif
-       map_cpu_to_logical_apicid();
-
-       local_irq_enable();
-
-       /*
-        * Get our bogomips.
-        */
-       calibrate_delay();
-       Dprintk("Stack at about %p\n",&cpuid);
-
-       /*
-        * Save our processor parameters
-        */
-       smp_store_cpu_info(cpuid);
-
-#if 0
-       disable_APIC_timer();
-#endif
-       local_irq_disable();
-       /*
-        * Allow the master to continue.
-        */
-       cpu_set(cpuid, cpu_callin_map);
-
-#if 0
-       /*
-        *      Synchronize the TSC with the BP
-        */
-       if (cpu_has_tsc && cpu_khz)
-               synchronize_tsc_ap();
-#endif
-}
-
-int cpucount;
-
-extern int cpu_idle(void);
-
-
-static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
-                                        struct pt_regs *regs)
-{
-
-       return IRQ_HANDLED;
-}
-
-static struct irqaction local_irq_debug = {
-       local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
-       NULL, NULL
-};
-
-void local_setup_debug(void)
-{
-       (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
-}
-
-
-extern void local_setup_timer(void);
-
-/*
- * Activate a secondary processor.
- */
-int __init start_secondary(void *unused)
-{
-       /*
-        * Dont put anything before smp_callin(), SMP
-        * booting is too fragile that we want to limit the
-        * things done here to the most necessary things.
-        */
-       cpu_init();
-       smp_callin();
-       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
-               rep_nop();
-       local_setup_timer();
-       local_setup_debug();    /* XXX */
-       smp_intr_init();
-       local_irq_enable();
-       /*
-        * low-memory mappings have been cleared, flush them from
-        * the local TLBs too.
-        */
-       local_flush_tlb();
-       cpu_set(smp_processor_id(), cpu_online_map);
-       wmb();
-       if (0) {
-               char *msg2 = "delay2\n";
-               int timeout;
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       udelay(1000);
-                       if (timeout == 2000) {
-                               (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
-                               timeout = 0;
-                       }
-               }
-       }
-       return cpu_idle();
-}
-
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __init initialize_secondary(void)
-{
-       /*
-        * We don't actually need to load the full TSS,
-        * basically just the stack pointer and the eip.
-        */
-
-       asm volatile(
-               "movl %0,%%esp\n\t"
-               "jmp *%1"
-               :
-               :"r" (current->thread.esp),"r" (current->thread.eip));
-}
-
-extern struct {
-       void * esp;
-       unsigned short ss;
-} stack_start;
-
-#ifdef CONFIG_NUMA
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
-                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
-
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
-       printk("Mapping cpu %d to node %d\n", cpu, node);
-       cpu_set(cpu, node_2_cpu_mask[node]);
-       cpu_2_node[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
-{
-       int node;
-
-       printk("Unmapping cpu %d from all nodes\n", cpu);
-       for (node = 0; node < MAX_NUMNODES; node ++)
-               cpu_clear(cpu, node_2_cpu_mask[node]);
-       cpu_2_node[cpu] = 0;
-}
-#else /* !CONFIG_NUMA */
-
-#define map_cpu_to_node(cpu, node)     ({})
-#define unmap_cpu_to_node(cpu) ({})
-
-#endif /* CONFIG_NUMA */
-
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-void map_cpu_to_logical_apicid(void)
-{
-       int cpu = smp_processor_id();
-       int apicid = smp_processor_id();
-
-       cpu_2_logical_apicid[cpu] = apicid;
-       map_cpu_to_node(cpu, apicid_to_node(apicid));
-}
-
-void unmap_cpu_to_logical_apicid(int cpu)
-{
-       cpu_2_logical_apicid[cpu] = BAD_APICID;
-       unmap_cpu_to_node(cpu);
-}
-
-#if APIC_DEBUG
-static inline void __inquire_remote_apic(int apicid)
-{
-       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
-       char *names[] = { "ID", "VERSION", "SPIV" };
-       int timeout, status;
-
-       printk("Inquiring remote APIC #%d...\n", apicid);
-
-       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
-               printk("... APIC #%d %s: ", apicid, names[i]);
-
-               /*
-                * Wait for idle.
-                */
-               apic_wait_icr_idle();
-
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
-               timeout = 0;
-               do {
-                       udelay(100);
-                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
-               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
-               switch (status) {
-               case APIC_ICR_RR_VALID:
-                       status = apic_read(APIC_RRR);
-                       printk("%08x\n", status);
-                       break;
-               default:
-                       printk("failed\n");
-               }
-       }
-}
-#endif
-
-#if 0
-#ifdef WAKE_SECONDARY_VIA_NMI
-/* 
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-static int __init
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
-{
-       unsigned long send_status = 0, accept_status = 0;
-       int timeout, maxlvt;
-
-       /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
-       /* Boot on the stack */
-       /* Kick the second */
-       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       /*
-        * Give the other CPU some time to accept the IPI.
-        */
-       udelay(200);
-       /*
-        * Due to the Pentium erratum 3AP.
-        */
-       maxlvt = get_maxlvt();
-       if (maxlvt > 3) {
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-       }
-       accept_status = (apic_read(APIC_ESR) & 0xEF);
-       Dprintk("NMI sent.\n");
-
-       if (send_status)
-               printk("APIC never delivered???\n");
-       if (accept_status)
-               printk("APIC delivery error (%lx).\n", accept_status);
-
-       return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
-       unsigned long send_status = 0, accept_status = 0;
-       int maxlvt, timeout, num_starts, j;
-
-       /*
-        * Be paranoid about clearing APIC errors.
-        */
-       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-
-       Dprintk("Asserting INIT.\n");
-
-       /*
-        * Turn INIT on target chip
-        */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-       /*
-        * Send IPI
-        */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-                               | APIC_DM_INIT);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       mdelay(10);
-
-       Dprintk("Deasserting INIT.\n");
-
-       /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-       /* Send IPI */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
-       Dprintk("Waiting for send to finish...\n");
-       timeout = 0;
-       do {
-               Dprintk("+");
-               udelay(100);
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-       } while (send_status && (timeout++ < 1000));
-
-       atomic_set(&init_deasserted, 1);
-
-       /*
-        * Should we send STARTUP IPIs ?
-        *
-        * Determine this based on the APIC version.
-        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
-        */
-       if (APIC_INTEGRATED(apic_version[phys_apicid]))
-               num_starts = 2;
-       else
-               num_starts = 0;
-
-       /*
-        * Run STARTUP IPI loop.
-        */
-       Dprintk("#startup loops: %d.\n", num_starts);
-
-       maxlvt = get_maxlvt();
-
-       for (j = 1; j <= num_starts; j++) {
-               Dprintk("Sending STARTUP #%d.\n",j);
-               apic_read_around(APIC_SPIV);
-               apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-               Dprintk("After apic_write.\n");
-
-               /*
-                * STARTUP IPI
-                */
-
-               /* Target chip */
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-               /* Boot on the stack */
-               /* Kick the second */
-               apic_write_around(APIC_ICR, APIC_DM_STARTUP
-                                       | (start_eip >> 12));
-
-               /*
-                * Give the other CPU some time to accept the IPI.
-                */
-               udelay(300);
-
-               Dprintk("Startup point 1.\n");
-
-               Dprintk("Waiting for send to finish...\n");
-               timeout = 0;
-               do {
-                       Dprintk("+");
-                       udelay(100);
-                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-               } while (send_status && (timeout++ < 1000));
-
-               /*
-                * Give the other CPU some time to accept the IPI.
-                */
-               udelay(200);
-               /*
-                * Due to the Pentium erratum 3AP.
-                */
-               if (maxlvt > 3) {
-                       apic_read_around(APIC_SPIV);
-                       apic_write(APIC_ESR, 0);
-               }
-               accept_status = (apic_read(APIC_ESR) & 0xEF);
-               if (send_status || accept_status)
-                       break;
-       }
-       Dprintk("After Startup.\n");
-
-       if (send_status)
-               printk("APIC never delivered???\n");
-       if (accept_status)
-               printk("APIC delivery error (%lx).\n", accept_status);
-
-       return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_INIT */
-#endif
-
-extern cpumask_t cpu_initialized;
-
-static int __init do_boot_cpu(int apicid)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
- */
-{
-       struct task_struct *idle;
-       unsigned long boot_error;
-       int timeout, cpu;
-       unsigned long start_eip;
-#if 0
-       unsigned short nmi_high = 0, nmi_low = 0;
-#endif
-       full_execution_context_t ctxt;
-       extern void startup_32_smp(void);
-       extern void hypervisor_callback(void);
-       extern void failsafe_callback(void);
-       extern int smp_trap_init(trap_info_t *);
-       int i;
-
-       cpu = ++cpucount;
-       /*
-        * We can't use kernel_thread since we must avoid to
-        * reschedule the child.
-        */
-       idle = fork_idle(cpu);
-       if (IS_ERR(idle))
-               panic("failed fork for CPU %d", cpu);
-       idle->thread.eip = (unsigned long) start_secondary;
-       /* start_eip had better be page-aligned! */
-       start_eip = (unsigned long)startup_32_smp;
-
-       /* So we see what's up   */
-       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-       /* Stack for startup_32 can be just as for start_secondary onwards */
-       stack_start.esp = (void *) idle->thread.esp;
-
-       irq_ctx_init(cpu);
-
-       /*
-        * This grunge runs the startup process for
-        * the targeted processor.
-        */
-
-       atomic_set(&init_deasserted, 0);
-
-#if 1
-       if (cpu_gdt_descr[0].size > PAGE_SIZE)
-               BUG();
-       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
-       memcpy((void *)cpu_gdt_descr[cpu].address,
-              (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
-               memset((char *)cpu_gdt_descr[cpu].address +
-                      FIRST_RESERVED_GDT_ENTRY * 8, 0,
-                      NR_RESERVED_GDT_ENTRIES * 8);
-
-       memset(&ctxt, 0, sizeof(ctxt));
-
-       ctxt.cpu_ctxt.ds = __USER_DS;
-       ctxt.cpu_ctxt.es = __USER_DS;
-       ctxt.cpu_ctxt.fs = 0;
-       ctxt.cpu_ctxt.gs = 0;
-       ctxt.cpu_ctxt.ss = __KERNEL_DS;
-       ctxt.cpu_ctxt.cs = __KERNEL_CS;
-       ctxt.cpu_ctxt.eip = start_eip;
-       ctxt.cpu_ctxt.esp = idle->thread.esp;
-       ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
-
-       /* FPU is set up to default initial state. */
-       memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
-       /* Virtual IDT is empty at start-of-day. */
-       for ( i = 0; i < 256; i++ )
-       {
-               ctxt.trap_ctxt[i].vector = i;
-               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
-       }
-       ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
-
-       /* No LDT. */
-       ctxt.ldt_ents = 0;
-
-       {
-               unsigned long va;
-               int f;
-
-               for (va = cpu_gdt_descr[cpu].address, f = 0;
-                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
-                    va += PAGE_SIZE, f++) {
-                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
-                       make_page_readonly((void *)va);
-               }
-               ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
-               flush_page_update_queue();
-       }
-
-       /* Ring 1 stack is the initial stack. */
-       ctxt.kernel_ss  = __KERNEL_DS;
-       ctxt.kernel_esp = idle->thread.esp;
-
-       /* Callback handlers. */
-       ctxt.event_callback_cs     = __KERNEL_CS;
-       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-       ctxt.failsafe_callback_cs  = __KERNEL_CS;
-       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
-       ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
-
-       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
-
-       if (!boot_error) {
-               /*
-                * allow APs to start initializing.
-                */
-               Dprintk("Before Callout %d.\n", cpu);
-               cpu_set(cpu, cpu_callout_map);
-               Dprintk("After Callout %d.\n", cpu);
-
-               /*
-                * Wait 5s total for a response
-                */
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       if (cpu_isset(cpu, cpu_callin_map))
-                               break;  /* It has booted */
-                       udelay(100);
-               }
-
-               if (cpu_isset(cpu, cpu_callin_map)) {
-                       /* number CPUs logically, starting from 1 (BSP is 0) */
-                       Dprintk("OK.\n");
-                       printk("CPU%d: ", cpu);
-                       print_cpu_info(&cpu_data[cpu]);
-                       Dprintk("CPU has booted.\n");
-               } else {
-                       boot_error= 1;
-               }
-       }
-       x86_cpu_to_apicid[cpu] = apicid;
-       if (boot_error) {
-               /* Try to put things back the way they were before ... */
-               unmap_cpu_to_logical_apicid(cpu);
-               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-               cpucount--;
-       }
-
-#else
-       Dprintk("Setting warm reset code and vector.\n");
-
-       store_NMI_vector(&nmi_high, &nmi_low);
-
-       smpboot_setup_warm_reset_vector(start_eip);
-
-       /*
-        * Starting actual IPI sequence...
-        */
-       boot_error = wakeup_secondary_cpu(apicid, start_eip);
-
-       if (!boot_error) {
-               /*
-                * allow APs to start initializing.
-                */
-               Dprintk("Before Callout %d.\n", cpu);
-               cpu_set(cpu, cpu_callout_map);
-               Dprintk("After Callout %d.\n", cpu);
-
-               /*
-                * Wait 5s total for a response
-                */
-               for (timeout = 0; timeout < 50000; timeout++) {
-                       if (cpu_isset(cpu, cpu_callin_map))
-                               break;  /* It has booted */
-                       udelay(100);
-               }
-
-               if (cpu_isset(cpu, cpu_callin_map)) {
-                       /* number CPUs logically, starting from 1 (BSP is 0) */
-                       Dprintk("OK.\n");
-                       printk("CPU%d: ", cpu);
-                       print_cpu_info(&cpu_data[cpu]);
-                       Dprintk("CPU has booted.\n");
-               } else {
-                       boot_error= 1;
-                       if (*((volatile unsigned char *)trampoline_base)
-                                       == 0xA5)
-                               /* trampoline started but...? */
-                               printk("Stuck ??\n");
-                       else
-                               /* trampoline code not run */
-                               printk("Not responding.\n");
-                       inquire_remote_apic(apicid);
-               }
-       }
-       x86_cpu_to_apicid[cpu] = apicid;
-       if (boot_error) {
-               /* Try to put things back the way they were before ... */
-               unmap_cpu_to_logical_apicid(cpu);
-               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-               cpucount--;
-       }
-
-       /* mark "stuck" area as not stuck */
-       *((volatile unsigned long *)trampoline_base) = 0;
-#endif
-
-       return boot_error;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-static void smp_tune_scheduling (void)
-{
-       unsigned long cachesize;       /* kB   */
-       unsigned long bandwidth = 350; /* MB/s */
-       /*
-        * Rough estimation for SMP scheduling, this is the number of
-        * cycles it takes for a fully memory-limited process to flush
-        * the SMP-local cache.
-        *
-        * (For a P5 this pretty much means we will choose another idle
-        *  CPU almost always at wakeup time (this is due to the small
-        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
-        *  the cache size)
-        */
-
-       if (!cpu_khz) {
-               /*
-                * this basically disables processor-affinity
-                * scheduling on SMP without a TSC.
-                */
-               cacheflush_time = 0;
-               return;
-       } else {
-               cachesize = boot_cpu_data.x86_cache_size;
-               if (cachesize == -1) {
-                       cachesize = 16; /* Pentiums, 2x8kB cache */
-                       bandwidth = 100;
-               }
-
-               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
-       }
-
-       cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
-
-       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
-               (long)cacheflush_time/(cpu_khz/1000),
-               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
-       printk("task migration cache decay timeout: %ld msecs.\n",
-               cache_decay_ticks);
-}
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-#if 0
-static int boot_cpu_logical_apicid;
-#endif
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
-       int cpu, kicked;
-       unsigned long bogosum = 0;
-#if 0
-       int apicid, bit;
-#endif
-
-       /*
-        * Setup boot CPU information
-        */
-       smp_store_cpu_info(0); /* Final full version of the data */
-       printk("CPU%d: ", 0);
-       print_cpu_info(&cpu_data[0]);
-
-#if 0
-       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-       boot_cpu_logical_apicid = logical_smp_processor_id();
-       x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
-#else
-       // boot_cpu_physical_apicid = 0;
-       // boot_cpu_logical_apicid = 0;
-       x86_cpu_to_apicid[0] = 0;
-#endif
-
-       current_thread_info()->cpu = 0;
-       smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
-
-       /*
-        * If we couldn't find an SMP configuration at boot time,
-        * get out of here now!
-        */
-       if (!smp_found_config /* && !acpi_lapic) */) {
-               printk(KERN_NOTICE "SMP motherboard not detected.\n");
-               smpboot_clear_io_apic_irqs();
-#if 0
-               phys_cpu_present_map = physid_mask_of_physid(0);
-               if (APIC_init_uniprocessor())
-                       printk(KERN_NOTICE "Local APIC not detected."
-                                          " Using dummy APIC emulation.\n");
-#endif
-               map_cpu_to_logical_apicid();
-               return;
-       }
-
-#if 0
-       /*
-        * Should not be necessary because the MP table should list the boot
-        * CPU too, but we do it for the sake of robustness anyway.
-        * Makes no sense to do this check in clustered apic mode, so skip it
-        */
-       if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
-               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-                               boot_cpu_physical_apicid);
-               physid_set(hard_smp_processor_id(), phys_cpu_present_map);
-       }
-
-       /*
-        * If we couldn't find a local APIC, then get out of here now!
-        */
-       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-                       boot_cpu_physical_apicid);
-               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-               smpboot_clear_io_apic_irqs();
-               phys_cpu_present_map = physid_mask_of_physid(0);
-               return;
-       }
-
-       verify_local_APIC();
-#endif
-
-       /*
-        * If SMP should be disabled, then really disable it!
-        */
-       if (!max_cpus) {
-               HYPERVISOR_shared_info->n_vcpu = 1;
-               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
-               smpboot_clear_io_apic_irqs();
-#if 0
-               phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
-               return;
-       }
-
-       smp_intr_init();
-
-#if 0
-       connect_bsp_APIC();
-       setup_local_APIC();
-#endif
-       map_cpu_to_logical_apicid();
-#if 0
-
-
-       setup_portio_remap();
-
-       /*
-        * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
-        *
-        * In clustered apic mode, phys_cpu_present_map is a constructed thus:
-        * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
-        * clustered apic ID.
-        */
-       Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-#endif
-       Dprintk("CPU present map: %lx\n",
-               (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
-
-       kicked = 1;
-       for (cpu = 1; kicked < NR_CPUS &&
-                    cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
-               if (max_cpus <= cpucount+1)
-                       continue;
-
-               if (do_boot_cpu(cpu))
-                       printk("CPU #%d not responding - cannot use it.\n",
-                                                               cpu);
-               else
-                       ++kicked;
-       }
-
-#if 0
-       /*
-        * Cleanup possible dangling ends...
-        */
-       smpboot_restore_warm_reset_vector();
-#endif
-
-       /*
-        * Allow the user to impress friends.
-        */
-       Dprintk("Before bogomips.\n");
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
-               if (cpu_isset(cpu, cpu_callout_map))
-                       bogosum += cpu_data[cpu].loops_per_jiffy;
-       printk(KERN_INFO
-               "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-               cpucount+1,
-               bogosum/(500000/HZ),
-               (bogosum/(5000/HZ))%100);
-       
-       Dprintk("Before bogocount - setting activated=1.\n");
-
-       if (smp_b_stepping)
-               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
-
-       /*
-        * Don't taint if we are running SMP kernel on a single non-MP
-        * approved Athlon
-        */
-       if (tainted & TAINT_UNSAFE_SMP) {
-               if (cpucount)
-                       printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
-               else
-                       tainted &= ~TAINT_UNSAFE_SMP;
-       }
-
-       Dprintk("Boot done.\n");
-
-       /*
-        * construct cpu_sibling_map[], so that we can tell sibling CPUs
-        * efficiently.
-        */
-       for (cpu = 0; cpu < NR_CPUS; cpu++)
-               cpus_clear(cpu_sibling_map[cpu]);
-
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               int siblings = 0;
-               int i;
-               if (!cpu_isset(cpu, cpu_callout_map))
-                       continue;
-
-               if (smp_num_siblings > 1) {
-                       for (i = 0; i < NR_CPUS; i++) {
-                               if (!cpu_isset(i, cpu_callout_map))
-                                       continue;
-                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                                       siblings++;
-                                       cpu_set(i, cpu_sibling_map[cpu]);
-                               }
-                       }
-               } else {
-                       siblings++;
-                       cpu_set(cpu, cpu_sibling_map[cpu]);
-               }
-
-               if (siblings != smp_num_siblings)
-                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
-       }
-
-#if 0
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               check_nmi_watchdog();
-
-       smpboot_setup_io_apic();
-
-       setup_boot_APIC_clock();
-
-       /*
-        * Synchronize the TSC with the AP
-        */
-       if (cpu_has_tsc && cpucount && cpu_khz)
-               synchronize_tsc_bp();
-#endif
-}
-
-/* These are wrappers to interface to the new boot process.  Someone
-   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-       smp_boot_cpus(max_cpus);
-}
-
-void __devinit smp_prepare_boot_cpu(void)
-{
-       cpu_set(smp_processor_id(), cpu_online_map);
-       cpu_set(smp_processor_id(), cpu_callout_map);
-}
-
-int __devinit __cpu_up(unsigned int cpu)
-{
-       /* This only works at boot for x86.  See "rewrite" above. */
-       if (cpu_isset(cpu, smp_commenced_mask)) {
-               local_irq_enable();
-               return -ENOSYS;
-       }
-
-       /* In case one didn't come up */
-       if (!cpu_isset(cpu, cpu_callin_map)) {
-               local_irq_enable();
-               return -EIO;
-       }
-
-       local_irq_enable();
-       /* Unleash the CPU! */
-       cpu_set(cpu, smp_commenced_mask);
-       while (!cpu_isset(cpu, cpu_online_map))
-               mb();
-       return 0;
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-#if 1
-#else
-#ifdef CONFIG_X86_IO_APIC
-       setup_ioapic_dest();
-#endif
-       zap_low_mappings();
-       /*
-        * Disable executability of the SMP trampoline:
-        */
-       set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
-}
-
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction reschedule_irq = {
-       smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
-       NULL, NULL
-};
-
-extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction invalidate_irq = {
-       smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
-       NULL, NULL
-};
-
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction call_function_irq = {
-       smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
-       "call_function", NULL, NULL
-};
-
-void __init smp_intr_init(void)
-{
-
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
-           &reschedule_irq);
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
-           &invalidate_irq);
-       (void)setup_irq(
-           bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
-           &call_function_irq);
-}
diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
deleted file mode 100644 (file)
index 51addc6..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright (C) 2004, Christian Limpach */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/threads.h>
-
-unsigned int __initdata maxcpus = NR_CPUS;
-
-
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
-       printk("setup_profiling_timer\n");
-
-       return 0;
-}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
deleted file mode 100644 (file)
index 80b7ca0..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-
-obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
-
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
deleted file mode 100644 (file)
index a9a0067..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/******************************************************************************
- * blktap.c
- * 
- * XenLinux virtual block-device tap.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- * Based on the original split block driver:
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- * 
- * Note that unlike the split block driver code, this driver has been developed
- * strictly for Linux 2.6
- */
-
-#include "blktap.h"
-
-int __init xlblktap_init(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t fe_st;
-    blkif_be_driver_status_t be_st;
-
-    printk(KERN_INFO "Initialising Xen block tap device\n");
-
-    DPRINTK("   tap - Backend connection init:\n");
-
-
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Frontend connection init:\n");
-    
-    active_reqs_init();
-    blkif_interface_init();
-    blkdev_schedule_init();
-    
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_BE;
-    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_be_driver_status_t);
-    be_st.status   = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &be_st, sizeof(be_st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
-    DPRINTK("   tap - Userland channel init:\n");
-
-    blktap_init();
-
-    DPRINTK("Blkif tap device initialized.\n");
-
-    return 0;
-}
-
-#if 0 /* tap doesn't handle suspend/resume */
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-    ctrl_msg_t               cmsg;
-    blkif_fe_driver_status_t st;    
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_BLKIF_FE;
-    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
-    cmsg.length    = sizeof(blkif_fe_driver_status_t);
-    st.status      = BLKIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-#endif
-
-__initcall(xlblktap_init);
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
deleted file mode 100644 (file)
index eb084e8..0000000
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * blktap.h
- * 
- * Interfaces for the Xen block tap driver.
- * 
- * (c) 2004, Andrew Warfield, University of Cambridge
- * 
- */
-
-#ifndef __BLKTAP_H__
-#define __BLKTAP_H__
-
-#include <linux/version.h>
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <asm-xen/ctrl_if.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/hypervisor.h>
-#include <asm-xen/xen-public/io/blkif.h>
-#include <asm-xen/xen-public/io/ring.h>
-
-/* Used to signal to the backend that this is a tap domain. */
-#define BLKTAP_COOKIE 0xbeadfeed
-
-/* -------[ debug / pretty printing ]--------------------------------- */
-
-#if 0
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-
-
-/* -------[ state descriptors ]--------------------------------------- */
-
-#define BLKIF_STATE_CLOSED       0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED    2
-
-/* -------[ connection tracking ]------------------------------------- */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-extern spinlock_t blkif_io_lock;
-
-typedef struct blkif_st {
-    /* Unique identifier for this interface. */
-    domid_t             domid;
-    unsigned int        handle;
-    /* Physical parameters of the comms window. */
-    unsigned long       shmem_frame;
-    unsigned int        evtchn;
-    int                 irq;
-    /* Comms information. */
-    blkif_back_ring_t   blk_ring;
-    
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */    
-    u8                  disconnect_rspid;
-    struct blkif_st    *hash_next;
-    struct list_head    blkdev_list;
-    spinlock_t          blk_ring_lock;
-    atomic_t            refcnt;
-    struct work_struct work;
-} blkif_t;
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_disconnect_complete(blkif_t *blkif);
-#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            blkif_disconnect_complete(_b);        \
-    } while (0)
-
-
-/* -------[ active request tracking ]--------------------------------- */
-
-typedef struct {
-    blkif_t       *blkif;
-    unsigned long  id;
-    int            nr_pages;
-    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-    int            next_free;
-} active_req_t;
-
-typedef unsigned int ACTIVE_RING_IDX;
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
-
-extern inline unsigned int ID_TO_IDX(unsigned long id) 
-{ 
-    return ( id & 0x0000ffff );
-}
-
-extern inline domid_t ID_TO_DOM(unsigned long id) 
-{ 
-    return (id >> 16); 
-}
-
-void active_reqs_init(void);
-
-/* -------[ interposition -> character device interface ]------------- */
-
-/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
-
-/* blktap IOCTLs:                                                      */
-#define BLKTAP_IOCTL_KICK_FE         1
-#define BLKTAP_IOCTL_KICK_BE         2
-#define BLKTAP_IOCTL_SETMODE         3
-#define BLKTAP_IOCTL_PRINT_IDXS      100  
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
-#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
-#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
-#define BLKTAP_MODE_COPY_FE          0x00000004
-#define BLKTAP_MODE_COPY_BE          0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
-           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
-           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
-           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
-    return (
-        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
-        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
-        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
-        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
-        );
-}
-
-
-
-/* -------[ Mappings to User VMA ]------------------------------------ */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-extern struct vm_area_struct *blktap_vma;
-
-/* The following are from blkback.c and should probably be put in a
- * header and included from there.
- * The mmap area described here is where attached data pages eill be mapped.
- */
-extern unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
-    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-/* immediately before the mmap area, we have a bunch of pages reserved
- * for shared memory rings.
- */
-
-#define RING_PAGES 3 /* Ctrl, Front, and Back */ 
-extern unsigned long rings_vstart;
-
-
-/* -------[ Here be globals ]----------------------------------------- */
-extern unsigned long blktap_mode;
-
-/* Connection to a single backend domain. */
-extern blkif_front_ring_t blktap_be_ring;
-extern unsigned int blktap_be_evtchn;
-extern unsigned int blktap_be_state;
-
-/* User ring status. */
-extern unsigned long blktap_ring_ok;
-
-/* -------[ ...and function prototypes. ]----------------------------- */
-
-/* init function for character device interface.                       */
-int blktap_init(void);
-
-/* init function for the blkif cache. */
-void __init blkif_interface_init(void);
-void __init blkdev_schedule_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
-/* interfaces to the char driver, passing messages to and from apps.   */
-void blktap_kick_user(void);
-
-/* user ring access functions: */
-int blktap_write_fe_ring(blkif_request_t *req);
-int blktap_write_be_ring(blkif_response_t *rsp);
-int blktap_write_ctrl_ring(ctrl_msg_t *msg);
-
-/* fe/be ring access functions: */
-int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
-int write_req_to_be_ring(blkif_request_t *req);
-
-/* event notification functions */
-void kick_fe_domain(blkif_t *blkif);
-void kick_be_domain(void);
-
-/* Interrupt handlers. */
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs);
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
-
-/* Control message receiver. */
-extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-
-/* debug */
-void print_vm_ring_idxs(void);
-        
-#define __BLKINT_H__
-#endif
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
deleted file mode 100644 (file)
index 9ce74c7..0000000
+++ /dev/null
@@ -1,517 +0,0 @@
-/******************************************************************************
- * blktap_controlmsg.c
- * 
- * XenLinux virtual block-device tap.
- * Control interfaces to the frontend and backend drivers.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-#include "blktap.h"
-
-static char *blkif_state_name[] = {
-    [BLKIF_STATE_CLOSED]       = "closed",
-    [BLKIF_STATE_DISCONNECTED] = "disconnected",
-    [BLKIF_STATE_CONNECTED]    = "connected",
-};
-
-static char * blkif_status_name[] = {
-    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
-    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
-    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
-    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
-};
-
-static unsigned blktap_be_irq;
-unsigned int    blktap_be_state = BLKIF_STATE_CLOSED;
-unsigned int    blktap_be_evtchn;
-
-/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static kmem_cache_t *blkif_cachep;
-static blkif_t      *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
-        blkif = blkif->hash_next;
-    return blkif;
-}
-
-static void __blkif_disconnect_complete(void *arg)
-{
-    blkif_t              *blkif = (blkif_t *)arg;
-    ctrl_msg_t            cmsg;
-    blkif_be_disconnect_t disc;
-
-    /*
-     * These can't be done in blkif_disconnect() because at that point there
-     * may be outstanding requests at the disc whose asynchronous responses
-     * must still be notified to the remote driver.
-     */
-    unbind_evtchn_from_irq(blkif->evtchn);
-    vfree(blkif->blk_ring.sring);
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_BLKIF_BE;
-    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
-    cmsg.id           = blkif->disconnect_rspid;
-    cmsg.length       = sizeof(blkif_be_disconnect_t);
-    disc.domid        = blkif->domid;
-    disc.blkif_handle = blkif->handle;
-    disc.status       = BLKIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'blkif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( blkif->status != DISCONNECTING )
-        BUG();
-    blkif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
-}
-
-void blkif_disconnect_complete(blkif_t *blkif)
-{
-    INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
-    schedule_work(&blkif->work);
-}
-
-void blkif_ptfe_create(blkif_be_create_t *create)
-{
-    blkif_t      *blkif, **pblkif;
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-
-
-    /* May want to store info on the connecting domain here. */
-
-    DPRINTK("PT got BE_CREATE\n");
-
-    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
-    {
-        DPRINTK("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    /* blkif struct init code from blkback.c */
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;  
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 0);
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTK("Could not create blkif: already exists\n");
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(blkif_cachep, blkif);
-            return;
-        }
-        pblkif = &(*pblkif)->hash_next;
-    }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
-{
-    /* Clear anything that we initialized above. */
-
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    DPRINTK("PT got BE_DESTROY\n");
-    
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pblkif = blkif->hash_next;
-    kmem_cache_free(blkif_cachep, blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_ptfe_connect(blkif_be_connect_t *connect)
-{
-    domid_t        domid  = connect->domid;
-    unsigned int   handle = connect->blkif_handle;
-    unsigned int   evtchn = connect->evtchn;
-    unsigned long  shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t       prot;
-    int            error;
-    blkif_t       *blkif;
-    blkif_sring_t *sring;
-
-    DPRINTK("PT got BE_CONNECT\n");
-
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
-                connect->domid, connect->blkif_handle); 
-        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        WPRINTK("BE_CONNECT: error! (%d)\n", error);
-        if ( error == -ENOMEM ) 
-            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT ) {
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-            WPRINTK("BE_CONNECT: MAPPING error!\n");
-        }
-        else
-            connect->status = BLKIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    if ( blkif->status != DISCONNECTED )
-    {
-        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring);
-    
-    blkif->evtchn        = evtchn;
-    blkif->irq           = bind_evtchn_to_irq(evtchn);
-    blkif->shmem_frame   = shmem_frame;
-    blkif->status        = CONNECTED;
-    blkif_get(blkif);
-
-    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
-
-    connect->status = BLKIF_BE_STATUS_OKAY;
-}
-
-int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    unsigned int  handle = disconnect->blkif_handle;
-    blkif_t      *blkif;
-
-    DPRINTK("PT got BE_DISCONNECT\n");
-    
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_disconnect attempted for non-existent blkif"
-                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
-        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( blkif->status == CONNECTED )
-    {
-        blkif->status = DISCONNECTING;
-        blkif->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        free_irq(blkif->irq, blkif);
-        blkif_deschedule(blkif);
-        blkif_put(blkif);
-        return 0; /* Caller should not send response message. */
-    }
-
-    disconnect->status = BLKIF_BE_STATUS_OKAY;
-    return 1;
-}
-
-/*-----[ Control Messages to/from Backend VM ]----------------------------*/
-
-/* Tell the controller to bring up the interface. */
-static void blkif_ptbe_send_interface_connect(void)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
-        .length  = sizeof(blkif_fe_interface_connect_t),
-    };
-    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-    msg->handle      = 0;
-    msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
-    
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_ptbe_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_ptbe_disconnect(void)
-{
-    blkif_sring_t *sring;
-    
-    sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blktap_be_ring, sring);
-    blktap_be_state  = BLKIF_STATE_DISCONNECTED;
-    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
-    blkif_ptbe_send_interface_connect();
-}
-
-static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
-{
-    int err = 0;
-    
-    blktap_be_evtchn = status->evtchn;
-    blktap_be_irq    = bind_evtchn_to_irq(blktap_be_evtchn);
-
-    err = request_irq(blktap_be_irq, blkif_ptbe_int, 
-                      SA_SAMPLE_RANDOM, "blkif", NULL);
-    if ( err ) {
-       WPRINTK("blkfront request_irq failed (%d)\n", err);
-        return;
-    } else {
-       /* transtion to connected in case we need to do a 
-           a partion probe on a whole disk */
-        blktap_be_state = BLKIF_STATE_CONNECTED;
-    }
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
-    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
-           blkif_status_name[status->status],
-           blkif_state_name[blktap_be_state]);
-}
-
-static void blkif_ptbe_status(
-    blkif_fe_interface_status_t *status)
-{
-    if ( status->handle != 0 )
-    {
-        DPRINTK("Status change on unsupported blkif %d\n",
-               status->handle);
-        return;
-    }
-
-    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
-    
-    switch ( status->status )
-    {
-    case BLKIF_INTERFACE_STATUS_CLOSED:
-        switch ( blktap_be_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_ptbe_close();
-            break;
-        }
-        break;
-        
-    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
-        switch ( blktap_be_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            blkif_ptbe_disconnect();
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
-            unexpected(status);
-            break;
-        }
-        break;
-        
-    case BLKIF_INTERFACE_STATUS_CONNECTED:
-        switch ( blktap_be_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            blkif_ptbe_disconnect();
-            blkif_ptbe_connect(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-            blkif_ptbe_connect(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_ptbe_connect(status);
-            break;
-        }
-        break;
-
-   case BLKIF_INTERFACE_STATUS_CHANGED:
-        switch ( blktap_be_state )
-        {
-        case BLKIF_STATE_CLOSED:
-        case BLKIF_STATE_DISCONNECTED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            /* vbd_update(); */
-            /* tap doesn't really get state changes... */
-            unexpected(status);
-            break;
-        }
-       break;
-       
-    default:
-        DPRINTK("Status change to unknown value %d\n", status->status);
-        break;
-    }
-}
-
-/*-----[ All control messages enter here: ]-------------------------------*/
-
-void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->type )
-    {
-    case CMSG_BLKIF_FE:
-
-        switch ( msg->subtype )
-        {
-        case CMSG_BLKIF_FE_INTERFACE_STATUS:
-            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
-                goto parse_error;
-            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
-            break;        
-
-        default:
-            goto parse_error;
-        }
-
-    case CMSG_BLKIF_BE:
-        
-        /* send a copy of the message to user if wanted */
-        
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-            
-            blktap_write_ctrl_ring(msg);
-        }
-        
-        switch ( msg->subtype )
-        {
-        case CMSG_BLKIF_BE_CREATE:
-            if ( msg->length != sizeof(blkif_be_create_t) )
-                goto parse_error;
-            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
-            break; 
-        case CMSG_BLKIF_BE_DESTROY:
-            if ( msg->length != sizeof(blkif_be_destroy_t) )
-                goto parse_error;
-            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
-            break;        
-        case CMSG_BLKIF_BE_CONNECT:
-            if ( msg->length != sizeof(blkif_be_connect_t) )
-                goto parse_error;
-            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
-            break;        
-        case CMSG_BLKIF_BE_DISCONNECT:
-            if ( msg->length != sizeof(blkif_be_disconnect_t) )
-                goto parse_error;
-            if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
-                    msg->id) )
-                return;
-            break;        
-
-        /* We just ignore anything to do with vbds for now. */
-        
-        case CMSG_BLKIF_BE_VBD_CREATE:
-            DPRINTK("PT got VBD_CREATE\n");
-            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_DESTROY:
-            DPRINTK("PT got VBD_DESTROY\n");
-            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_GROW:
-            DPRINTK("PT got VBD_GROW\n");
-            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        case CMSG_BLKIF_BE_VBD_SHRINK:
-            DPRINTK("PT got VBD_SHRINK\n");
-            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
-                = BLKIF_BE_STATUS_OKAY;
-            break;
-        default:
-            goto parse_error;
-        }
-    }
-
-    ctrl_if_send_response(msg);
-    return;
-
- parse_error:
-    msg->length = 0;
-    ctrl_if_send_response(msg);
-}
-
-/*-----[ Initialization ]-------------------------------------------------*/
-
-void __init blkif_interface_init(void)
-{
-    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
-                                     0, 0, NULL, NULL);
-    memset(blkif_hash, 0, sizeof(blkif_hash));
-    
-    blktap_be_ring.sring = NULL;
-}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
deleted file mode 100644 (file)
index e88c562..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-/******************************************************************************
- * blktap_datapath.c
- * 
- * XenLinux virtual block-device tap.
- * Block request routing data path.
- * 
- * Copyright (c) 2004, Andrew Warfield
- * -- see full header in blktap.c
- */
-#include "blktap.h"
-#include <asm-xen/evtchn.h>
-
-/*-----[ The data paths ]-------------------------------------------------*/
-
-/* Connection to a single backend domain. */
-blkif_front_ring_t blktap_be_ring;
-
-/*-----[ Tracking active requests ]---------------------------------------*/
-
-/* this must be the same as MAX_PENDING_REQS in blkback.c */
-#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
-
-active_req_t     active_reqs[MAX_ACTIVE_REQS];
-ACTIVE_RING_IDX  active_req_ring[MAX_ACTIVE_REQS];
-spinlock_t       active_req_lock = SPIN_LOCK_UNLOCKED;
-ACTIVE_RING_IDX  active_prod, active_cons;
-#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
-#define ACTIVE_IDX(_ar) (_ar - active_reqs)
-#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
-
-inline active_req_t *get_active_req(void) 
-{
-    ACTIVE_RING_IDX idx;
-    active_req_t *ar;
-    unsigned long flags;
-        
-    ASSERT(active_cons != active_prod);   
-    
-    spin_lock_irqsave(&active_req_lock, flags);
-    idx =  active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
-    ar = &active_reqs[idx];
-    spin_unlock_irqrestore(&active_req_lock, flags);
-    
-    return ar;
-}
-
-inline void free_active_req(active_req_t *ar) 
-{
-    unsigned long flags;
-        
-    spin_lock_irqsave(&active_req_lock, flags);
-    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
-    spin_unlock_irqrestore(&active_req_lock, flags);
-}
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
-{
-    return &active_reqs[idx];   
-}
-
-void active_reqs_init(void)
-{
-    ACTIVE_RING_IDX i;
-    
-    active_cons = 0;
-    active_prod = MAX_ACTIVE_REQS;
-    memset(active_reqs, 0, sizeof(active_reqs));
-    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
-        active_req_ring[i] = i;
-}
-
-/* Requests passing through the tap to the backend hijack the id field
- * in the request message.  In it we put the AR index _AND_ the fe domid.
- * the domid is used by the backend to map the pages properly.
- */
-
-static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
-    return ( (fe_dom << 16) | idx );
-}
-
-/*-----[ Ring helpers ]---------------------------------------------------*/
-
-inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
-{
-    blkif_response_t *resp_d;
-    active_req_t *ar;
-    
-    ar = &active_reqs[ID_TO_IDX(rsp->id)];
-    rsp->id = ar->id;
-            
-    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
-            blkif->blk_ring.rsp_prod_pvt);
-    memcpy(resp_d, rsp, sizeof(blkif_response_t));
-    wmb();
-    blkif->blk_ring.rsp_prod_pvt++;
-            
-    blkif_put(ar->blkif);
-    free_active_req(ar);
-    
-    return 0;
-}
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
-    blkif_request_t *req_d;
-
-    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
-        WPRINTK("Tap trying to access an unconnected backend!\n");
-        return 0;
-    }
-    
-    req_d = RING_GET_REQUEST(&blktap_be_ring,
-            blktap_be_ring.req_prod_pvt);
-    memcpy(req_d, req, sizeof(blkif_request_t));
-    wmb();
-    blktap_be_ring.req_prod_pvt++;
-            
-    return 0;
-}
-
-void kick_fe_domain(blkif_t *blkif) 
-{
-    RING_PUSH_RESPONSES(&blkif->blk_ring);
-    notify_via_evtchn(blkif->evtchn);
-    DPRINTK("notified FE(dom %u)\n", blkif->domid);
-    
-}
-
-void kick_be_domain(void)
-{
-    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 
-        return;
-    
-    wmb(); /* Ensure that the frontend can see the requests. */
-    RING_PUSH_REQUESTS(&blktap_be_ring);
-    notify_via_evtchn(blktap_be_evtchn);
-    DPRINTK("notified BE\n");
-}
-
-/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
-
-/*-----[ Scheduler list maint -from blkback ]--- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
-    return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
-    unsigned long flags;
-    if ( !__on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( __on_blkdev_list(blkif) )
-    {
-        list_del(&blkif->blkdev_list);
-        blkif->blkdev_list.next = NULL;
-        blkif_put(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
-    unsigned long flags;
-    if ( __on_blkdev_list(blkif) ) return;
-    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
-    {
-        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-        blkif_get(blkif);
-    }
-    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
-/*-----[ Scheduler functions - from blkback ]--- */
-
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-
-static int blkio_schedule(void *arg)
-{
-    DECLARE_WAITQUEUE(wq, current);
-
-    blkif_t          *blkif;
-    struct list_head *ent;
-
-    daemonize(
-        "xentapd"
-        );
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&blkio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 
-             list_empty(&blkio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&blkio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
-                !list_empty(&blkio_schedule_list) )
-        {
-            ent = blkio_schedule_list.next;
-            blkif = list_entry(ent, blkif_t, blkdev_list);
-            blkif_get(blkif);
-            remove_from_blkdev_list(blkif);
-            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
-                add_to_blkdev_list_tail(blkif);
-            blkif_put(blkif);
-        }
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-        /* Push the batch through to disc. */
-        run_task_queue(&tq_disk);
-#endif
-    }
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
-         !list_empty(&blkio_schedule_list) ) 
-        wake_up(&blkio_schedule_wait);
-}
-
-void blkif_deschedule(blkif_t *blkif)
-{
-    remove_from_blkdev_list(blkif);
-}
-
-void __init blkdev_schedule_init(void)
-{
-    spin_lock_init(&blkio_schedule_list_lock);
-    INIT_LIST_HEAD(&blkio_schedule_list);
-
-    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-}
-    
-/*-----[ Interrupt entry from a frontend ]------ */
-
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    blkif_t *blkif = dev_id;
-
-    add_to_blkdev_list_tail(blkif);
-    maybe_trigger_blkio_schedule();
-    return IRQ_HANDLED;
-}
-
-/*-----[ Other Frontend Ring functions ]-------- */
-
-/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
-{
-    /* we have pending messages from the real frontend. */
-
-    blkif_request_t *req_s;
-    RING_IDX i, rp;
-    unsigned long flags;
-    active_req_t *ar;
-    int more_to_do = 0;
-    int notify_be = 0, notify_user = 0;
-    
-    DPRINTK("PT got FE interrupt.\n");
-
-    if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
-    
-    /* lock both rings */
-    spin_lock_irqsave(&blkif_io_lock, flags);
-
-    rp = blkif->blk_ring.sring->req_prod;
-    rmb();
-    
-    for ( i = blkif->blk_ring.req_cons; 
-         (i != rp) && 
-            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
-          i++ )
-    {
-        
-        if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 
-        {
-            more_to_do = 1;
-            break;
-        }
-        
-        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
-        /* This is a new request:  
-         * Assign an active request record, and remap the id. 
-         */
-        ar = get_active_req();
-        ar->id = req_s->id;
-        ar->nr_pages = req_s->nr_segments; 
-        blkif_get(blkif);
-        ar->blkif = blkif;
-        req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
-        /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
-
-        /* FE -> BE interposition point is here. */
-        
-        /* ------------------------------------------------------------- */
-        /* BLKIF_OP_PROBE_HACK:                                          */
-        /* Signal to the backend that we are a tap domain.               */
-
-        if (req_s->operation == BLKIF_OP_PROBE) {
-            DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
-            req_s->frame_and_sects[1] = BLKTAP_COOKIE;
-        }
-
-        /* ------------------------------------------------------------- */
-
-        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-            
-            /* Copy the response message to UFERing */
-            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
-            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
-
-            DPRINTK("req->UFERing\n"); 
-            blktap_write_fe_ring(req_s);
-            notify_user = 1;
-        }
-
-        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
-            
-            /* be included to prevent noise from the fe when its off */
-            /* copy the request message to the BERing */
-
-            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
-                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
-                    (unsigned)blktap_be_ring.req_prod_pvt & 
-                    (RING_SIZE((&blktap_be_ring)-1)));
-            
-            write_req_to_be_ring(req_s);
-            notify_be = 1;
-        }
-    }
-
-    blkif->blk_ring.req_cons = i;
-    
-    /* unlock rings */
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-    
-    if (notify_user)
-        blktap_kick_user();
-    if (notify_be)
-        kick_be_domain();
-    
-    return more_to_do;
-}
-
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
-                                  struct pt_regs *ptregs)
-{
-    blkif_response_t  *resp_s;
-    blkif_t *blkif;
-    RING_IDX rp, i;
-    unsigned long flags;
-
-    DPRINTK("PT got BE interrupt.\n");
-
-    /* lock both rings */
-    spin_lock_irqsave(&blkif_io_lock, flags);
-    
-    rp = blktap_be_ring.sring->rsp_prod;
-    rmb();
-      
-    for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
-    {
-        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
-        
-        /* BE -> FE interposition point is here. */
-    
-        blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
-        
-        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
-        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
-
-            /* Copy the response message to UBERing */
-            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
-            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
-
-            DPRINTK("rsp->UBERing\n"); 
-            blktap_write_be_ring(resp_s);
-            blktap_kick_user();
-
-        }
-       
-        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
-        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
-               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
-            
-            /* (fe included to prevent random interference from the BE) */
-            /* Copy the response message to FERing */
-         
-            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
-                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
-                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
-                    (RING_SIZE((&blkif->blk_ring)-1)));
-
-            write_resp_to_fe_ring(blkif, resp_s);
-            kick_fe_domain(blkif);
-
-        }
-    }
-    
-    blktap_be_ring.rsp_cons = i;
-    
-
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-    
-    return IRQ_HANDLED;
-}
-
-/* Debug : print the current ring indices. */
-
-void print_vm_ring_idxs(void)
-{
-    int i;
-    blkif_t *blkif;
-            
-    WPRINTK("FE Rings: \n---------\n");
-    for ( i = 0; i < 50; i++) { 
-        blkif = blkif_find_by_handle((domid_t)i, 0);
-        if (blkif != NULL) {
-            if (blkif->blk_ring.sring != NULL) {
-                WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
-                    "| req_prod: %2d, rsp_prod: %2d\n", i, 
-                    blkif->blk_ring.req_cons,
-                    blkif->blk_ring.rsp_prod_pvt,
-                    blkif->blk_ring.sring->req_prod,
-                    blkif->blk_ring.sring->rsp_prod);
-            } else {
-                WPRINTK("%2d: [no device channel yet]\n", i);
-            }
-        }
-    }
-    if (blktap_be_ring.sring != NULL) {
-        WPRINTK("BE Ring: \n--------\n");
-        WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
-            "| req_prod: %2d, rsp_prod: %2d\n",
-            blktap_be_ring.rsp_cons,
-            blktap_be_ring.req_prod_pvt,
-            blktap_be_ring.sring->req_prod,
-            blktap_be_ring.sring->rsp_prod);
-    }
-}        
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
deleted file mode 100644 (file)
index 3cc307f..0000000
+++ /dev/null
@@ -1,489 +0,0 @@
-/******************************************************************************
- * blktap_userdev.c
- * 
- * XenLinux virtual block-device tap.
- * Control interface between the driver and a character device.
- * 
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/gfp.h>
-#include <linux/poll.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
-
-#include "blktap.h"
-
-
-unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
-
-/* Only one process may open /dev/xen/blktap at any time. */
-static unsigned long blktap_dev_inuse;
-unsigned long blktap_ring_ok; /* make this ring->state */
-
-/* for poll: */
-static wait_queue_head_t blktap_wait;
-
-/* Where things are inside the device mapping. */
-struct vm_area_struct *blktap_vma;
-unsigned long mmap_vstart;
-unsigned long rings_vstart;
-
-/* Rings up to user space. */
-static blkif_front_ring_t blktap_ufe_ring;
-static blkif_back_ring_t  blktap_ube_ring;
-static ctrl_front_ring_t  blktap_uctrl_ring;
-
-/* local prototypes */
-static int blktap_read_fe_ring(void);
-static int blktap_read_be_ring(void);
-
-/* -------[ blktap vm ops ]------------------------------------------- */
-
-static struct page *blktap_nopage(struct vm_area_struct *vma,
-                                             unsigned long address,
-                                             int *type)
-{
-    /*
-     * if the page has not been mapped in by the driver then generate
-     * a SIGBUS to the domain.
-     */
-
-    force_sig(SIGBUS, current);
-
-    return 0;
-}
-
-struct vm_operations_struct blktap_vm_ops = {
-    nopage:   blktap_nopage,
-};
-
-/* -------[ blktap file ops ]----------------------------------------- */
-
-static int blktap_open(struct inode *inode, struct file *filp)
-{
-    blkif_sring_t *sring;
-    ctrl_sring_t *csring;
-    
-    if ( test_and_set_bit(0, &blktap_dev_inuse) )
-        return -EBUSY;
-
-    printk(KERN_ALERT "blktap open.\n");
-    
-    /* Allocate the ctrl ring. */
-    csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
-    if (csring == NULL)
-        goto fail_nomem;
-
-    SetPageReserved(virt_to_page(csring));
-    
-    SHARED_RING_INIT(csring);
-    FRONT_RING_INIT(&blktap_uctrl_ring, csring);
-
-
-    /* Allocate the fe ring. */
-    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
-    if (sring == NULL)
-        goto fail_free_ctrl;
-
-    SetPageReserved(virt_to_page(sring));
-    
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blktap_ufe_ring, sring);
-
-    /* Allocate the be ring. */
-    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
-    if (sring == NULL)
-        goto fail_free_fe;
-
-    SetPageReserved(virt_to_page(sring));
-    
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blktap_ube_ring, sring);
-
-    DPRINTK(KERN_ALERT "blktap open.\n");
-
-    return 0;
-    
- fail_free_ctrl:
-    free_page( (unsigned long) blktap_uctrl_ring.sring);
-
- fail_free_fe:
-    free_page( (unsigned long) blktap_ufe_ring.sring);
-
- fail_nomem:
-    return -ENOMEM;
-}
-
-static int blktap_release(struct inode *inode, struct file *filp)
-{
-    blktap_dev_inuse = 0;
-    blktap_ring_ok = 0;
-
-    printk(KERN_ALERT "blktap closed.\n");
-
-    /* Free the ring page. */
-    ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
-    free_page((unsigned long) blktap_uctrl_ring.sring);
-
-    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
-    free_page((unsigned long) blktap_ufe_ring.sring);
-
-    ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
-    free_page((unsigned long) blktap_ube_ring.sring);
-    
-    return 0;
-}
-
-/* Note on mmap:
- * remap_pfn_range sets VM_IO on vma->vm_flags.  In trying to make libaio
- * work to do direct page access from userspace, this ended up being a
- * problem.  The bigger issue seems to be that there is no way to map
- * a foreign page in to user space and have the virtual address of that 
- * page map sanely down to a mfn.
- * Removing the VM_IO flag results in a loop in get_user_pages, as 
- * pfn_valid() always fails on a foreign page.
- */
-static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-    int size;
-
-    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
-           vma->vm_start, vma->vm_end);
-
-    vma->vm_ops = &blktap_vm_ops;
-
-    size = vma->vm_end - vma->vm_start;
-    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
-        printk(KERN_INFO 
-               "blktap: you _must_ map exactly %d pages!\n",
-               MMAP_PAGES + RING_PAGES);
-        return -EAGAIN;
-    }
-
-    size >>= PAGE_SHIFT;
-    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
-    
-    rings_vstart = vma->vm_start;
-    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
-    
-    /* Map the ring pages to the start of the region and reserve it. */
-
-    /* not sure if I really need to do this... */
-    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-    DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
-    if (remap_pfn_range(vma, vma->vm_start, 
-                         __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
-    }
-
-
-    DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
-    if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 
-                         __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("be_ring: remap_pfn_range failure!\n");
-    }
-
-    DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
-    if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 
-                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
-                         PAGE_SIZE, vma->vm_page_prot)) {
-        WPRINTK("fe_ring: remap_pfn_range failure!\n");
-    }
-            
-    blktap_vma = vma;
-    blktap_ring_ok = 1;
-
-    return 0;
-}
-
-static int blktap_ioctl(struct inode *inode, struct file *filp,
-                        unsigned int cmd, unsigned long arg)
-{
-    switch(cmd) {
-    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
-        return blktap_read_fe_ring();
-
-    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
-        return blktap_read_be_ring();
-
-    case BLKTAP_IOCTL_SETMODE:
-        if (BLKTAP_MODE_VALID(arg)) {
-            blktap_mode = arg;
-            /* XXX: may need to flush rings here. */
-            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
-            return 0;
-        }
-    case BLKTAP_IOCTL_PRINT_IDXS:
-        {
-            print_vm_ring_idxs();
-            WPRINTK("User Rings: \n-----------\n");
-            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
-                            "| req_prod: %2d, rsp_prod: %2d\n",
-                            blktap_ufe_ring.rsp_cons,
-                            blktap_ufe_ring.req_prod_pvt,
-                            blktap_ufe_ring.sring->req_prod,
-                            blktap_ufe_ring.sring->rsp_prod);
-            WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
-                            "| req_prod: %2d, rsp_prod: %2d\n",
-                            blktap_ube_ring.req_cons,
-                            blktap_ube_ring.rsp_prod_pvt,
-                            blktap_ube_ring.sring->req_prod,
-                            blktap_ube_ring.sring->rsp_prod);
-            
-        }
-    }
-    return -ENOIOCTLCMD;
-}
-
-static unsigned int blktap_poll(struct file *file, poll_table *wait)
-{
-        poll_wait(file, &blktap_wait, wait);
-
-        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
-             RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
-             RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
-
-            RING_PUSH_REQUESTS(&blktap_uctrl_ring);
-            RING_PUSH_REQUESTS(&blktap_ufe_ring);
-            RING_PUSH_RESPONSES(&blktap_ube_ring);
-            return POLLIN | POLLRDNORM;
-        }
-
-        return 0;
-}
-
-void blktap_kick_user(void)
-{
-    /* blktap_ring->req_prod = blktap_req_prod; */
-    wake_up_interruptible(&blktap_wait);
-}
-
-static struct file_operations blktap_fops = {
-    owner:    THIS_MODULE,
-    poll:     blktap_poll,
-    ioctl:    blktap_ioctl,
-    open:     blktap_open,
-    release:  blktap_release,
-    mmap:     blktap_mmap,
-};
-    
-/*-----[ Data to/from user space ]----------------------------------------*/
-
-
-int blktap_write_fe_ring(blkif_request_t *req)
-{
-    blkif_request_t *target;
-    int error, i;
-
-    /*
-     * This is called to pass a request from the real frontend domain's
-     * blkif ring to the character device.
-     */
-
-    if ( ! blktap_ring_ok ) {
-        DPRINTK("blktap: ufe_ring not ready for a request!\n");
-        return 0;
-    }
-
-    if ( RING_FULL(&blktap_ufe_ring) ) {
-        DPRINTK("blktap: fe_ring is full, can't add.\n");
-        return 0;
-    }
-
-    target = RING_GET_REQUEST(&blktap_ufe_ring,
-            blktap_ufe_ring.req_prod_pvt);
-    memcpy(target, req, sizeof(*req));
-
-    /* Attempt to map the foreign pages directly in to the application */
-    for (i=0; i<target->nr_segments; i++) {
-
-        error = direct_remap_area_pages(blktap_vma->vm_mm, 
-                                        MMAP_VADDR(ID_TO_IDX(req->id), i), 
-                                        target->frame_and_sects[i] & PAGE_MASK,
-                                        PAGE_SIZE,
-                                        blktap_vma->vm_page_prot,
-                                        ID_TO_DOM(req->id));
-        if ( error != 0 ) {
-            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
-            /* the request is now dropped on the floor. */
-            return 0;
-        }
-    }
-    
-    blktap_ufe_ring.req_prod_pvt++;
-    
-    return 0;
-}
-
-int blktap_write_be_ring(blkif_response_t *rsp)
-{
-    blkif_response_t *target;
-
-    /*
-     * This is called to pass a request from the real backend domain's
-     * blkif ring to the character device.
-     */
-
-    if ( ! blktap_ring_ok ) {
-        DPRINTK("blktap: be_ring not ready for a request!\n");
-        return 0;
-    }
-
-    /* No test for fullness in the response direction. */
-
-    target = RING_GET_RESPONSE(&blktap_ube_ring,
-            blktap_ube_ring.rsp_prod_pvt);
-    memcpy(target, rsp, sizeof(*rsp));
-
-    /* no mapping -- pages were mapped in blktap_write_fe_ring() */
-
-    blktap_ube_ring.rsp_prod_pvt++;
-    
-    return 0;
-}
-
-static void blktap_fast_flush_area(int idx, int nr_pages)
-{
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    int               i;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-        mcl[i].op = __HYPERVISOR_update_va_mapping;
-        mcl[i].args[0] = MMAP_VADDR(idx, i);
-        mcl[i].args[1] = 0;
-        mcl[i].args[2] = 0;
-    }
-
-    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
-    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-        BUG();
-}
-
-static int blktap_read_fe_ring(void)
-{
-    /* This is called to read responses from the UFE ring. */
-
-    RING_IDX i, rp;
-    blkif_response_t *resp_s;
-    blkif_t *blkif;
-    active_req_t *ar;
-
-    DPRINTK("blktap_read_fe_ring()\n");
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
-        /* for each outstanding message on the UFEring  */
-        rp = blktap_ufe_ring.sring->rsp_prod;
-        rmb();
-        
-        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
-        {
-            resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
-            
-            DPRINTK("resp->fe_ring\n");
-            ar = lookup_active_req(ID_TO_IDX(resp_s->id));
-            blkif = ar->blkif;
-            blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
-            write_resp_to_fe_ring(blkif, resp_s);
-            kick_fe_domain(blkif);
-        }
-        
-        blktap_ufe_ring.rsp_cons = i;
-    }
-    return 0;
-}
-
-static int blktap_read_be_ring(void)
-{
-    /* This is called to read requests from the UBE ring. */
-
-    RING_IDX i, rp;
-    blkif_request_t *req_s;
-
-    DPRINTK("blktap_read_be_ring()\n");
-
-    /* if we are forwarding from UFERring to FERing */
-    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
-
-        /* for each outstanding message on the UFEring  */
-        rp = blktap_ube_ring.sring->req_prod;
-        rmb();
-        for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
-        {
-            req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
-
-            DPRINTK("req->be_ring\n");
-            write_req_to_be_ring(req_s);
-            kick_be_domain();
-        }
-        
-        blktap_ube_ring.req_cons = i;
-    }
-
-    return 0;
-}
-
-int blktap_write_ctrl_ring(ctrl_msg_t *msg)
-{
-    ctrl_msg_t *target;
-
-    if ( ! blktap_ring_ok ) {
-        DPRINTK("blktap: be_ring not ready for a request!\n");
-        return 0;
-    }
-
-    /* No test for fullness in the response direction. */
-
-    target = RING_GET_REQUEST(&blktap_uctrl_ring,
-            blktap_uctrl_ring.req_prod_pvt);
-    memcpy(target, msg, sizeof(*msg));
-
-    blktap_uctrl_ring.req_prod_pvt++;
-    
-    /* currently treat the ring as unidirectional. */
-    blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
-    
-    return 0;
-       
-}
-
-/* -------[ blktap module setup ]------------------------------------- */
-
-static struct miscdevice blktap_miscdev = {
-    .minor        = BLKTAP_MINOR,
-    .name         = "blktap",
-    .fops         = &blktap_fops,
-    .devfs_name   = "misc/blktap",
-};
-
-int blktap_init(void)
-{
-    int err;
-
-    err = misc_register(&blktap_miscdev);
-    if ( err != 0 )
-    {
-        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
-        return err;
-    }
-
-    init_waitqueue_head(&blktap_wait);
-
-
-    return 0;
-}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h b/linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h
deleted file mode 100644 (file)
index bcab204..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-
-#ifndef __USBIF__BACKEND__COMMON_H__
-#define __USBIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
-#include <asm-xen/hypervisor.h>
-
-#include <asm-xen/xen-public/io/usbif.h>
-
-#if 0
-#define ASSERT(_p) \
-    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
-    __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
-                           __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-typedef struct usbif_priv_st usbif_priv_t;
-
-struct usbif_priv_st {
-    /* Unique identifier for this interface. */
-    domid_t          domid;
-    unsigned int     handle;
-    /* Physical parameters of the comms window. */
-    unsigned long    shmem_frame;
-    unsigned int     evtchn;
-    int              irq;
-    /* Comms Information */
-    usbif_back_ring_t usb_ring;
-    /* Private fields. */
-    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
-    /*
-     * DISCONNECT response is deferred until pending requests are ack'ed.
-     * We therefore need to store the id from the original request.
-     */
-    u8                   disconnect_rspid;
-    usbif_priv_t        *hash_next;
-    struct list_head     usbif_list;
-    spinlock_t           usb_ring_lock;
-    atomic_t             refcnt;
-
-    struct work_struct work;
-};
-
-void usbif_create(usbif_be_create_t *create);
-void usbif_destroy(usbif_be_destroy_t *destroy);
-void usbif_connect(usbif_be_connect_t *connect);
-int  usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
-void usbif_disconnect_complete(usbif_priv_t *up);
-
-void usbif_release_port(usbif_be_release_port_t *msg);
-int usbif_claim_port(usbif_be_claim_port_t *msg);
-void usbif_release_ports(usbif_priv_t *up);
-
-usbif_priv_t *usbif_find(domid_t domid);
-#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define usbif_put(_b)                             \
-    do {                                          \
-        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            usbif_disconnect_complete(_b);        \
-    } while (0)
-
-
-void usbif_interface_init(void);
-void usbif_ctrlif_init(void);
-
-void usbif_deschedule(usbif_priv_t *up);
-void remove_from_usbif_list(usbif_priv_t *up);
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __USBIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c
deleted file mode 100644 (file)
index 899394a..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/control.c
- * 
- * Routines for interfacing with the control plane.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
-    
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_BE_CREATE:
-        if ( msg->length != sizeof(usbif_be_create_t) )
-            goto parse_error;
-        usbif_create((usbif_be_create_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DESTROY:
-        if ( msg->length != sizeof(usbif_be_destroy_t) )
-            goto parse_error;
-        usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_CONNECT:
-        if ( msg->length != sizeof(usbif_be_connect_t) )
-            goto parse_error;
-        usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
-        break;        
-    case CMSG_USBIF_BE_DISCONNECT:
-        if ( msg->length != sizeof(usbif_be_disconnect_t) )
-            goto parse_error;
-        if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
-            return; /* Sending the response is deferred until later. */
-        break;        
-    case CMSG_USBIF_BE_CLAIM_PORT:
-        if ( msg->length != sizeof(usbif_be_claim_port_t) )
-            goto parse_error;
-       usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
-        break;
-    case CMSG_USBIF_BE_RELEASE_PORT:
-        if ( msg->length != sizeof(usbif_be_release_port_t) )
-            goto parse_error;
-        usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
-        break;
-    default:
-        goto parse_error;
-    }
-
-    ctrl_if_send_response(msg);
-    return;
-
- parse_error:
-    DPRINTK("Parse error while reading message subtype %d, len %d\n",
-            msg->subtype, msg->length);
-    msg->length = 0;
-    ctrl_if_send_response(msg);
-}
-
-void usbif_ctrlif_init(void)
-{
-    ctrl_msg_t                       cmsg;
-    usbif_be_driver_status_changed_t st;
-
-    (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, 
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
-
-    /* Send a driver-UP notification to the domain controller. */
-    cmsg.type      = CMSG_USBIF_BE;
-    cmsg.subtype   = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
-    cmsg.length    = sizeof(usbif_be_driver_status_changed_t);
-    st.status      = USBIF_DRIVER_STATUS_UP;
-    memcpy(cmsg.msg, &st, sizeof(st));
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c
deleted file mode 100644 (file)
index 4630da8..0000000
+++ /dev/null
@@ -1,252 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/interface.c
- * 
- * USB device interface management.
- * 
- * by Mark Williamson, Copyright (c) 2004
- */
-
-
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/interface.c
- * 
- * Block-device interface management.
- * 
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-#define USBIF_HASHSZ 1024
-#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
-
-static kmem_cache_t      *usbif_priv_cachep;
-static usbif_priv_t      *usbif_priv_hash[USBIF_HASHSZ];
-
-usbif_priv_t *usbif_find(domid_t domid)
-{
-    usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up != NULL ) && ( up->domid != domid ) )
-        up = up->hash_next;
-    return up;
-}
-
-static void __usbif_disconnect_complete(void *arg)
-{
-    usbif_priv_t         *usbif = (usbif_priv_t *)arg;
-    ctrl_msg_t            cmsg;
-    usbif_be_disconnect_t disc;
-
-    /*
-     * These can't be done in usbif_disconnect() because at that point there
-     * may be outstanding requests at the device whose asynchronous responses
-     * must still be notified to the remote driver.
-     */
-    unbind_evtchn_from_irq(usbif->evtchn);
-    vfree(usbif->usb_ring.sring);
-
-    /* Construct the deferred response message. */
-    cmsg.type         = CMSG_USBIF_BE;
-    cmsg.subtype      = CMSG_USBIF_BE_DISCONNECT;
-    cmsg.id           = usbif->disconnect_rspid;
-    cmsg.length       = sizeof(usbif_be_disconnect_t);
-    disc.domid        = usbif->domid;
-    disc.status       = USBIF_BE_STATUS_OKAY;
-    memcpy(cmsg.msg, &disc, sizeof(disc));
-
-    /*
-     * Make sure message is constructed /before/ status change, because
-     * after the status change the 'usbif' structure could be deallocated at
-     * any time. Also make sure we send the response /after/ status change,
-     * as otherwise a subsequent CONNECT request could spuriously fail if
-     * another CPU doesn't see the status change yet.
-     */
-    mb();
-    if ( usbif->status != DISCONNECTING )
-        BUG();
-    usbif->status = DISCONNECTED;
-    mb();
-
-    /* Send the successful response. */
-    ctrl_if_send_response(&cmsg);
-}
-
-void usbif_disconnect_complete(usbif_priv_t *up)
-{
-    INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
-    schedule_work(&up->work);
-}
-
-void usbif_create(usbif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    usbif_priv_t **pup, *up;
-
-    if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
-    {
-        DPRINTK("Could not create usbif: out of memory\n");
-        create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(up, 0, sizeof(*up));
-    up->domid  = domid;
-    up->status = DISCONNECTED;
-    spin_lock_init(&up->usb_ring_lock);
-    atomic_set(&up->refcnt, 0);
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( *pup != NULL )
-    {
-        if ( (*pup)->domid == domid )
-        {
-            create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(usbif_priv_cachep, up);
-            return;
-        }
-        pup = &(*pup)->hash_next;
-    }
-
-    up->hash_next = *pup;
-    *pup = up;
-
-    create->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_destroy(usbif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    usbif_priv_t  **pup, *up;
-
-    pup = &usbif_priv_hash[USBIF_HASH(domid)];
-    while ( (up = *pup) != NULL )
-    {
-        if ( up->domid == domid )
-        {
-            if ( up->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pup = &up->hash_next;
-    }
-
-    destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
-    *pup = up->hash_next;
-    usbif_release_ports(up);
-    kmem_cache_free(usbif_priv_cachep, up);
-    destroy->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_connect(usbif_be_connect_t *connect)
-{
-    domid_t       domid  = connect->domid;
-    unsigned int  evtchn = connect->evtchn;
-    unsigned long shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-    pgprot_t      prot;
-    int           error;
-    usbif_priv_t *up;
-    usbif_sring_t *sring;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", 
-                connect->domid); 
-        connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = USBIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-
-    if ( up->status != DISCONNECTED )
-    {
-        connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-
-    sring = (usbif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&up->usb_ring, sring);
-
-    up->evtchn        = evtchn;
-    up->irq           = bind_evtchn_to_irq(evtchn);
-    up->shmem_frame   = shmem_frame;
-    up->status        = CONNECTED;
-    usbif_get(up);
-
-    request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
-
-    connect->status = USBIF_BE_STATUS_OKAY;
-}
-
-/* Remove URBs for this interface before destroying it. */
-void usbif_deschedule(usbif_priv_t *up)
-{
-    remove_from_usbif_list(up);
-}
-
-int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    usbif_priv_t *up;
-
-    up = usbif_find(domid);
-    if ( unlikely(up == NULL) )
-    {
-        DPRINTK("usbif_disconnect attempted for non-existent usbif"
-                " (%u)\n", disconnect->domid); 
-        disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( up->status == CONNECTED )
-    {
-        up->status = DISCONNECTING;
-        up->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        free_irq(up->irq, up);
-       usbif_deschedule(up);
-        usbif_put(up);
-        return 0; /* Caller should not send response message. */
-    }
-
-    disconnect->status = USBIF_BE_STATUS_OKAY;
-    return 1;
-}
-
-void __init usbif_interface_init(void)
-{
-    usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
-                                         sizeof(usbif_priv_t), 
-                                         0, 0, NULL, NULL);
-    memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
-}
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c
deleted file mode 100644 (file)
index b039b45..0000000
+++ /dev/null
@@ -1,1070 +0,0 @@
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/main.c
- * 
- * Backend for the Xen virtual USB driver - provides an abstraction of a
- * USB host controller to the corresponding frontend driver.
- *
- * by Mark Williamson
- * Copyright (c) 2004 Intel Research Cambridge
- * Copyright (c) 2004, 2005 Mark Williamson
- *
- * Based on arch/xen/drivers/blkif/backend/main.c
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/tqueue.h>
-
-/*
- * This is rather arbitrary.
- */
-#define MAX_PENDING_REQS 4
-#define BATCH_PER_DOMAIN 1
-
-static unsigned long mmap_vstart;
-
-/* Needs to be sufficiently large that we can map the (large) buffers
- * the USB mass storage driver wants. */
-#define MMAP_PAGES_PER_REQUEST \
-    (128)
-#define MMAP_PAGES             \
-    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-
-#define MMAP_VADDR(_req,_seg)                        \
-    (mmap_vstart +                                   \
-     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
-     ((_seg) * PAGE_SIZE))
-
-
-static spinlock_t owned_ports_lock;
-LIST_HEAD(owned_ports);
-
-/* A list of these structures is used to track ownership of physical USB
- * ports. */
-typedef struct 
-{
-    usbif_priv_t     *usbif_priv;
-    char             path[16];
-    int               guest_port;
-    int enabled;
-    struct list_head  list;
-    unsigned long guest_address; /* The USB device address that has been
-                                  * assigned by the guest. */
-    int               dev_present; /* Is there a device present? */
-    struct usb_device * dev;
-    unsigned long ifaces;  /* What interfaces are present on this device? */
-} owned_port_t;
-
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it.  The request is complete, the specified
- * domain has a response queued for it, with the saved 'id' passed back.
- */
-typedef struct {
-    usbif_priv_t       *usbif_priv;
-    unsigned long      id;
-    int                nr_pages;
-    unsigned short     operation;
-    int                status;
-} pending_req_t;
-
-/*
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock;
-
-/* NB. We use a different index type to differentiate from shared usb rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
-static void make_response(usbif_priv_t *usbif, unsigned long id, 
-                          unsigned short op, int st, int inband,
-                         unsigned long actual_length);
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port);
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);    
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
-static owned_port_t *usbif_find_port(char *);
-
-/******************************************************************
- * PRIVATE DEBUG FUNCTIONS
- */
-
-#undef DEBUG
-#ifdef DEBUG
-
-static void dump_port(owned_port_t *p)
-{
-    printk(KERN_DEBUG "owned_port_t @ %p\n"
-          "  usbif_priv @ %p\n"
-          "  path: %s\n"
-          "  guest_port: %d\n"
-          "  guest_address: %ld\n"
-          "  dev_present: %d\n"
-          "  dev @ %p\n"
-          "  ifaces: 0x%lx\n",
-          p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
-          p->dev_present, p->dev, p->ifaces);
-}
-
-
-static void dump_request(usbif_request_t *req)
-{    
-    printk(KERN_DEBUG "id = 0x%lx\n"
-          "devnum %d\n"
-          "endpoint 0x%x\n"
-          "direction %d\n"
-          "speed %d\n"
-          "pipe_type 0x%x\n"
-          "transfer_buffer 0x%lx\n"
-          "length 0x%lx\n"
-          "transfer_flags 0x%lx\n"
-          "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
-          "iso_schedule = 0x%lx\n"
-          "num_iso %ld\n",
-          req->id, req->devnum, req->endpoint, req->direction, req->speed,
-          req->pipe_type, req->transfer_buffer, req->length,
-          req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
-          req->setup[3], req->setup[4], req->setup[5], req->setup[6],
-          req->setup[7], req->iso_schedule, req->num_iso);
-}
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n", urb);
-
-#define DUMP_URB_FIELD(name, format) \
-    printk(KERN_DEBUG "  " # name " " format "\n", urb-> name)
-    
-    DUMP_URB_FIELD(pipe, "0x%x");
-    DUMP_URB_FIELD(status, "%d");
-    DUMP_URB_FIELD(transfer_flags, "0x%x");    
-    DUMP_URB_FIELD(transfer_buffer, "%p");
-    DUMP_URB_FIELD(transfer_buffer_length, "%d");
-    DUMP_URB_FIELD(actual_length, "%d");
-}
-
-static void dump_response(usbif_response_t *resp)
-{
-    printk(KERN_DEBUG "usbback: Sending response:\n"
-          "         id = 0x%x\n"
-          "         op = %d\n"
-          "         status = %d\n"
-          "         data = %d\n"
-          "         length = %d\n",
-          resp->id, resp->op, resp->status, resp->data, resp->length);
-}
-
-#else /* DEBUG */
-
-#define dump_port(blah)     ((void)0)
-#define dump_request(blah)   ((void)0)
-#define dump_urb(blah)      ((void)0)
-#define dump_response(blah) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************
- * MEMORY MANAGEMENT
- */
-
-static void fast_flush_area(int idx, int nr_pages)
-{
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    int               i;
-
-    for ( i = 0; i < nr_pages; i++ )
-    {
-        mcl[i].op = __HYPERVISOR_update_va_mapping;
-        mcl[i].args[0] = MMAP_VADDR(idx, i);
-        mcl[i].args[1] = 0;
-        mcl[i].args[2] = 0;
-    }
-
-    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
-    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
-        BUG();
-}
-
-
-/******************************************************************
- * USB INTERFACE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head usbio_schedule_list;
-static spinlock_t usbio_schedule_list_lock;
-
-static int __on_usbif_list(usbif_priv_t *up)
-{
-    return up->usbif_list.next != NULL;
-}
-
-void remove_from_usbif_list(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( !__on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( __on_usbif_list(up) )
-    {
-        list_del(&up->usbif_list);
-        up->usbif_list.next = NULL;
-        usbif_put(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-static void add_to_usbif_list_tail(usbif_priv_t *up)
-{
-    unsigned long flags;
-    if ( __on_usbif_list(up) ) return;
-    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
-    if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
-    {
-        list_add_tail(&up->usbif_list, &usbio_schedule_list);
-        usbif_get(up);
-    }
-    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-void free_pending(int pending_idx)
-{
-    unsigned long flags;
-
-    /* Free the pending request. */
-    spin_lock_irqsave(&pend_prod_lock, flags);
-    pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-    spin_unlock_irqrestore(&pend_prod_lock, flags);
-}
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as urb->complete()
- */
-
-static void maybe_trigger_usbio_schedule(void);
-
-static void __end_usb_io_op(struct urb *purb)
-{
-    pending_req_t *pending_req;
-    int pending_idx;
-
-    pending_req = purb->context;
-
-    pending_idx = pending_req - pending_reqs;
-
-    ASSERT(purb->actual_length <= purb->transfer_buffer_length);
-    ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
-    
-    /* An error fails the entire request. */
-    if ( purb->status )
-    {
-        printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status);
-    }
-
-    if ( usb_pipetype(purb->pipe) == 0 )
-    {
-        int i;
-        usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy back the schedule. */
-        for ( i = 0; i < purb->number_of_packets; i++ )
-        {
-            sched[i].length = purb->iso_frame_desc[i].actual_length;
-            ASSERT(sched[i].buffer_offset ==
-                   purb->iso_frame_desc[i].offset);
-            sched[i].status = purb->iso_frame_desc[i].status;
-        }
-    }
-    
-    fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
-
-    kfree(purb->setup_packet);
-
-    make_response(pending_req->usbif_priv, pending_req->id,
-                 pending_req->operation, pending_req->status, 0, purb->actual_length);
-    usbif_put(pending_req->usbif_priv);
-
-    usb_free_urb(purb);
-
-    free_pending(pending_idx);
-    
-    rmb();
-
-    /* Check for anything still waiting in the rings, having freed a request... */
-    maybe_trigger_usbio_schedule();
-}
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
-
-static int usbio_schedule(void *arg)
-{
-    DECLARE_WAITQUEUE(wq, current);
-
-    usbif_priv_t          *up;
-    struct list_head *ent;
-
-    daemonize();
-
-    for ( ; ; )
-    {
-        /* Wait for work to do. */
-        add_wait_queue(&usbio_schedule_wait, &wq);
-        set_current_state(TASK_INTERRUPTIBLE);
-        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-             list_empty(&usbio_schedule_list) )
-            schedule();
-        __set_current_state(TASK_RUNNING);
-        remove_wait_queue(&usbio_schedule_wait, &wq);
-
-        /* Queue up a batch of requests. */
-        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
-                !list_empty(&usbio_schedule_list) )
-        {
-            ent = usbio_schedule_list.next;
-            up = list_entry(ent, usbif_priv_t, usbif_list);
-            usbif_get(up);
-            remove_from_usbif_list(up);
-            if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
-                add_to_usbif_list_tail(up);
-            usbif_put(up);
-        }
-    }
-}
-
-static void maybe_trigger_usbio_schedule(void)
-{
-    /*
-     * Needed so that two processes, who together make the following predicate
-     * true, don't both read stale values and evaluate the predicate
-     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
-     */
-    smp_mb();
-
-    if ( !list_empty(&usbio_schedule_list) )
-        wake_up(&usbio_schedule_wait);
-}
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
-    usbif_priv_t *up = dev_id;
-
-    smp_mb();
-
-    add_to_usbif_list_tail(up); 
-
-    /* Will in fact /always/ trigger an io schedule in this case. */
-    maybe_trigger_usbio_schedule();
-
-    return IRQ_HANDLED;
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the usb-device layer proper.
- */
-
-static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
-{
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-    usbif_request_t *req;
-    RING_IDX i, rp;
-    int more_to_do = 0;
-
-    rp = usb_ring->sring->req_prod;
-    rmb(); /* Ensure we see queued requests up to 'rp'. */
-    
-    /* Take items off the comms ring, taking care not to overflow. */
-    for ( i = usb_ring->req_cons; 
-          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
-          i++ )
-    {
-        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
-        {
-            more_to_do = 1;
-            break;
-        }
-
-        req = RING_GET_REQUEST(usb_ring, i);
-        
-        switch ( req->operation )
-        {
-        case USBIF_OP_PROBE:
-            dispatch_usb_probe(up, req->id, req->port);
-            break;
-
-        case USBIF_OP_IO:
-         /* Assemble an appropriate URB. */
-         dispatch_usb_io(up, req);
-          break;
-
-       case USBIF_OP_RESET:
-         dispatch_usb_reset(up, req->port);
-          break;
-
-        default:
-            DPRINTK("error: unknown USB io operation [%d]\n",
-                    req->operation);
-            make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-            break;
-        }
-    }
-
-    usb_ring->req_cons = i;
-
-    return more_to_do;
-}
-
-static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
-{
-    unsigned long flags;
-    struct list_head *l;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(l, &owned_ports)
-    {
-        owned_port_t *p = list_entry(l, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_port == port)
-        {
-            spin_unlock_irqrestore(&owned_ports_lock, flags);
-            return p;
-        }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;
-}
-
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret = 0;
-
-
-    /* Allowing the guest to actually reset the device causes more problems
-     * than it's worth.  We just fake it out in software but we will do a real
-     * reset when the interface is destroyed. */
-
-    dump_port(port);
-
-    port->guest_address = 0;
-    /* If there's an attached device then the port is now enabled. */
-    if ( port->dev_present )
-        port->enabled = 1;
-    else
-        port->enabled = 0;
-
-    make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
-}
-
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid)
-{
-    owned_port_t *port = find_guest_port(up, portid);
-    int ret;
-    if ( port != NULL )
-        ret = port->dev_present;
-    else
-    {
-        ret = -EINVAL;
-        printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
-              "(port %ld)\n", portid);
-    }
-
-    /* Probe result is sent back in-band.  Probes don't have an associated id
-     * right now... */
-    make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
-}
-
-/**
- * check_iso_schedule - safety check the isochronous schedule for an URB
- * @purb : the URB in question
- */
-static int check_iso_schedule(struct urb *purb)
-{
-    int i;
-    unsigned long total_length = 0;
-    
-    for ( i = 0; i < purb->number_of_packets; i++ )
-    {
-        struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
-        
-        if ( desc->offset >= purb->transfer_buffer_length
-            || ( desc->offset + desc->length) > purb->transfer_buffer_length )
-            return -EINVAL;
-
-        total_length += desc->length;
-
-        if ( total_length > purb->transfer_buffer_length )
-            return -EINVAL;
-    }
-    
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
-
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long buffer_mach;
-    int i = 0, offset = 0,
-        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-    pending_req_t *pending_req;
-    unsigned long  remap_prot;
-    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
-    struct urb *purb = NULL;
-    owned_port_t *port;
-    unsigned char *setup;    
-
-    dump_request(req);
-
-    if ( NR_PENDING_REQS == MAX_PENDING_REQS )
-    {
-        printk(KERN_WARNING "usbback: Max requests already queued. "
-              "Giving up!\n");
-        
-        return;
-    }
-
-    port = find_port_for_request(up, req);
-
-    if ( port == NULL )
-    {
-       printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
-       dump_request(req);
-
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-    else if ( !port->dev_present )
-    {
-        /* In normal operation, we'll only get here if a device is unplugged
-         * and the frontend hasn't noticed yet. */
-        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
-       return;
-    }
-        
-
-    setup = kmalloc(8, GFP_KERNEL);
-
-    if ( setup == NULL )
-        goto no_mem;
-   
-    /* Copy request out for safety. */
-    memcpy(setup, req->setup, 8);
-
-    if( setup[0] == 0x0 && setup[1] == 0x5)
-    {
-        /* To virtualise the USB address space, we need to intercept
-         * set_address messages and emulate.  From the USB specification:
-         * bmRequestType = 0x0;
-         * Brequest = SET_ADDRESS (i.e. 0x5)
-         * wValue = device address
-         * wIndex = 0
-         * wLength = 0
-         * data = None
-         */
-        /* Store into the guest transfer buffer using cpu_to_le16 */
-        port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
-        /* Make a successful response.  That was easy! */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-       kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x0 && setup[1] == 0x9 )
-    {
-        /* The host kernel needs to know what device configuration is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-
-      /* Ignore configuration setting and hope that the host kernel
-        did it right. */
-        /* usb_set_configuration(port->dev, setup[2]); */
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-    else if ( setup[0] == 0x1 && setup[1] == 0xB )
-    {
-        /* The host kernel needs to know what device interface is in use
-         * because various error checks get confused otherwise.  We just do
-         * configuration settings here, under controlled conditions.
-         */
-        usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
-                          (setup[2] | setup[3] << 8) );
-
-        make_response(up, req->id, req->operation, 0, 0, 0);
-
-        kfree(setup);
-        return;
-    }
-
-    if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
-          + req->length )
-        > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
-    {
-        printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
-              req->length);
-        make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-        kfree(setup);
-        return;
-    }
-    
-    buffer_mach = req->transfer_buffer;
-
-    if( buffer_mach == 0 )
-       goto no_remap;
-
-    ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(buffer_mach);
-
-    /* Always map writeable for now. */
-    remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
-
-    for ( i = 0, offset = 0; offset < req->length;
-          i++, offset += PAGE_SIZE )
-    {
-       mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
-       mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
-        mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
-        mcl[i].args[2] = 0;
-        mcl[i].args[3] = up->domid;
-        
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
-
-        ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
-               == buffer_mach + i << PAGE_SHIFT);
-    }
-
-    if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
-    {
-        /* Map in ISO schedule, if necessary. */
-        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
-        mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
-        mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
-        mcl[i].args[2] = 0;
-        mcl[i].args[3] = up->domid;
-
-        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
-    
-        i++;
-    }
-
-    if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
-        BUG();
-    
-    {
-        int j;
-        for ( j = 0; j < i; j++ )
-        {
-            if ( unlikely(mcl[j].args[5] != 0) )
-            {
-                printk(KERN_WARNING
-                      "invalid buffer %d -- could not remap it\n", j);
-                fast_flush_area(pending_idx, i);
-                goto bad_descriptor;
-            }
-       }
-    }
-    
- no_remap:
-
-    ASSERT(i <= MMAP_PAGES_PER_REQUEST);
-    ASSERT(i * PAGE_SIZE >= req->length);
-
-    /* We have to do this because some things might complete out of order. */
-    pending_req = &pending_reqs[pending_idx];
-    pending_req->usbif_priv= up;
-    pending_req->id        = req->id;
-    pending_req->operation = req->operation;
-    pending_req->nr_pages  = i;
-
-    pending_cons++;
-
-    usbif_get(up);
-    
-    /* Fill out an actual request for the USB layer. */
-    purb = usb_alloc_urb(req->num_iso);
-
-    if ( purb == NULL )
-    {
-        usbif_put(up);
-        free_pending(pending_idx);
-        goto no_mem;
-    }
-
-    purb->dev = port->dev;
-    purb->context = pending_req;
-    purb->transfer_buffer =
-        (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
-    if(buffer_mach == 0)
-      purb->transfer_buffer = NULL;
-    purb->complete = __end_usb_io_op;
-    purb->transfer_buffer_length = req->length;
-    purb->transfer_flags = req->transfer_flags;
-
-    purb->pipe = 0;
-    purb->pipe |= req->direction << 7;
-    purb->pipe |= port->dev->devnum << 8;
-    purb->pipe |= req->speed << 26;
-    purb->pipe |= req->pipe_type << 30;
-    purb->pipe |= req->endpoint << 15;
-
-    purb->number_of_packets = req->num_iso;
-
-    if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
-        goto urb_error;
-
-    /* Make sure there's always some kind of timeout. */
-    purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
-                    :  1000;
-
-    purb->setup_packet = setup;
-
-    if ( req->pipe_type == 0 ) /* ISO */
-    {
-        int j;
-        usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
-
-        /* If we're dealing with an iso pipe, we need to copy in a schedule. */
-        for ( j = 0; j < purb->number_of_packets; j++ )
-        {
-            purb->iso_frame_desc[j].length = iso_sched[j].length;
-            purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
-            iso_sched[j].status = 0;
-        }
-    }
-
-    if ( check_iso_schedule(purb) != 0 )
-        goto urb_error;
-
-    if ( usb_submit_urb(purb) != 0 )
-        goto urb_error;
-
-    return;
-
- urb_error:
-    dump_urb(purb);    
-    usbif_put(up);
-    free_pending(pending_idx);
-
- bad_descriptor:
-    kfree ( setup );
-    if ( purb != NULL )
-        usb_free_urb(purb);
-    make_response(up, req->id, req->operation, -EINVAL, 0, 0);
-    return;
-    
- no_mem:
-    if ( setup != NULL )
-        kfree(setup);
-    make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
-    return;
-} 
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(usbif_priv_t *up, unsigned long id,
-                          unsigned short op, int st, int inband,
-                         unsigned long length)
-{
-    usbif_response_t *resp;
-    unsigned long     flags;
-    usbif_back_ring_t *usb_ring = &up->usb_ring;
-
-    /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&up->usb_ring_lock, flags);
-    resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
-    resp->id        = id;
-    resp->operation = op;
-    resp->status    = st;
-    resp->data      = inband;
-    resp->length = length;
-    wmb(); /* Ensure other side can see the response fields. */
-
-    dump_response(resp);
-
-    usb_ring->rsp_prod_pvt++;
-    RING_PUSH_RESPONSES(usb_ring);
-    spin_unlock_irqrestore(&up->usb_ring_lock, flags);
-
-    /* Kick the relevant domain. */
-    notify_via_evtchn(up->evtchn);
-}
-
-/**
- * usbif_claim_port - claim devices on a port on behalf of guest
- *
- * Once completed, this will ensure that any device attached to that
- * port is claimed by this driver for use by the guest.
- */
-int usbif_claim_port(usbif_be_claim_port_t *msg)
-{
-    owned_port_t *o_p;
-    
-    /* Sanity... */
-    if ( usbif_find_port(msg->path) != NULL )
-    {
-        printk(KERN_WARNING "usbback: Attempted to claim USB port "
-               "we already own!\n");
-        return -EINVAL;
-    }
-
-    /* No need for a slab cache - this should be infrequent. */
-    o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
-
-    if ( o_p == NULL )
-        return -ENOMEM;
-
-    o_p->enabled = 0;
-    o_p->usbif_priv = usbif_find(msg->domid);
-    o_p->guest_port = msg->usbif_port;
-    o_p->dev_present = 0;
-    o_p->guest_address = 0; /* Default address. */
-
-    strcpy(o_p->path, msg->path);
-
-    spin_lock_irq(&owned_ports_lock);
-    
-    list_add(&o_p->list, &owned_ports);
-
-    spin_unlock_irq(&owned_ports_lock);
-
-    printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
-          msg->domid, msg->usbif_port);
-
-    /* Force a reprobe for unclaimed devices. */
-    usb_scan_devices();
-
-    return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
-{
-    unsigned long flags;
-    struct list_head *port;
-
-    /* I'm assuming this is not called from IRQ context - correct?  I think
-     * it's probably only called in response to control messages or plug events
-     * in the USB hub kernel thread, so should be OK. */
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled )
-         {
-              dump_port(p);
-
-             spin_unlock_irqrestore(&owned_ports_lock, flags);
-              return p;
-         }
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return NULL;    
-}
-
-owned_port_t *__usbif_find_port(char *path)
-{
-    struct list_head *port;
-
-    list_for_each(port, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if(!strcmp(path, p->path))
-        {
-            return p;
-        }
-    }
-
-    return NULL;
-}
-
-owned_port_t *usbif_find_port(char *path)
-{
-    owned_port_t *ret;
-    unsigned long flags;
-
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    ret = __usbif_find_port(path);    
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-
-    return ret;
-}
-
-
-static void *probe(struct usb_device *dev, unsigned iface,
-                   const struct usb_device_id *id)
-{
-    owned_port_t *p;
-
-    /* We don't care what the device is - if we own the port, we want it.  We
-     * don't deal with device-specifics in this driver, so we don't care what
-     * the device actually is ;-) */
-    if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
-    {
-        printk(KERN_INFO "usbback: claimed device attached to owned port\n");
-
-        p->dev_present = 1;
-        p->dev = dev;
-        set_bit(iface, &p->ifaces);
-        
-        return p->usbif_priv;
-    }
-    else
-        printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n",
-              dev->devpath);
-   
-
-    return NULL;
-}
-
-static void disconnect(struct usb_device *dev, void *usbif)
-{
-    /* Note the device is removed so we can tell the guest when it probes. */
-    owned_port_t *port = usbif_find_port(dev->devpath);
-    port->dev_present = 0;
-    port->dev = NULL;
-    port->ifaces = 0;
-}
-
-
-struct usb_driver driver =
-{
-    .owner      = THIS_MODULE,
-    .name       = "Xen USB Backend",
-    .probe      = probe,
-    .disconnect = disconnect,
-    .id_table   = NULL,
-};
-
-/* __usbif_release_port - internal mechanics for releasing a port */
-void __usbif_release_port(owned_port_t *p)
-{
-    int i;
-
-    for ( i = 0; p->ifaces != 0; i++)
-        if ( p->ifaces & 1 << i )
-        {
-            usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
-            clear_bit(i, &p->ifaces);
-        }
-    list_del(&p->list);
-
-    /* Reset the real device.  We don't simulate disconnect / probe for other
-     * drivers in this kernel because we assume the device is completely under
-     * the control of ourselves (i.e. the guest!).  This should ensure that the
-     * device is in a sane state for the next customer ;-) */
-
-    /* MAW NB: we're not resetting the real device here.  This looks perfectly
-     * valid to me but it causes memory corruption.  We seem to get away with not
-     * resetting for now, although it'd be nice to have this tracked down. */
-/*     if ( p->dev != NULL) */
-/*         usb_reset_device(p->dev); */
-
-    kfree(p);
-}
-
-
-/**
- * usbif_release_port - stop claiming devices on a port on behalf of guest
- */
-void usbif_release_port(usbif_be_release_port_t *msg)
-{
-    owned_port_t *p;
-
-    spin_lock_irq(&owned_ports_lock);
-    p = __usbif_find_port(msg->path);
-    __usbif_release_port(p);
-    spin_unlock_irq(&owned_ports_lock);
-}
-
-void usbif_release_ports(usbif_priv_t *up)
-{
-    struct list_head *port, *tmp;
-    unsigned long flags;
-    
-    spin_lock_irqsave(&owned_ports_lock, flags);
-    list_for_each_safe(port, tmp, &owned_ports)
-    {
-        owned_port_t *p = list_entry(port, owned_port_t, list);
-        if ( p->usbif_priv == up )
-            __usbif_release_port(p);
-    }
-    spin_unlock_irqrestore(&owned_ports_lock, flags);
-}
-
-static int __init usbif_init(void)
-{
-    int i;
-
-    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
-         !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
-        return 0;
-    
-    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
-        BUG();
-
-    pending_cons = 0;
-    pending_prod = MAX_PENDING_REQS;
-    memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ )
-        pending_ring[i] = i;
-
-    spin_lock_init(&pend_prod_lock);
-
-    spin_lock_init(&owned_ports_lock);
-    INIT_LIST_HEAD(&owned_ports);
-
-    spin_lock_init(&usbio_schedule_list_lock);
-    INIT_LIST_HEAD(&usbio_schedule_list);
-
-    if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
-        BUG();
-    
-    usbif_interface_init();
-
-    usbif_ctrlif_init();
-
-    usb_register(&driver);
-
-    printk(KERN_INFO "Xen USB Backend Initialised");
-
-    return 0;
-}
-
-__initcall(usbif_init);
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c b/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c
deleted file mode 100644 (file)
index 46cca30..0000000
+++ /dev/null
@@ -1,1664 +0,0 @@
-/*
- * Xen Virtual USB Frontend Driver 
- *
- * This file contains the first version of the Xen virtual USB hub
- * that I've managed not to delete by mistake (3rd time lucky!).
- *
- * Based on Linux's uhci.c, original copyright notices are displayed
- * below.  Portions also (c) 2004 Intel Research Cambridge
- * and (c) 2004, 2005 Mark Williamson
- *
- * Contact <mark.williamson@cl.cam.ac.uk> or
- * <xen-devel@lists.sourceforge.net> regarding this code.
- *
- * Still to be (maybe) implemented:
- * - migration / backend restart support?
- * - support for building / using as a module
- */
-
-/*
- * Universal Host Controller Interface driver for USB.
- *
- * Maintainer: Johannes Erdfelt <johannes@erdfelt.com>
- *
- * (C) Copyright 1999 Linus Torvalds
- * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com
- * (C) Copyright 1999 Randy Dunlap
- * (C) Copyright 1999 Georg Acher, acher@in.tum.de
- * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de
- * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch
- * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at
- * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
- *               support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
- * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- *
- * Intel documents this fairly well, and as far as I know there
- * are no royalties or anything like that, but even so there are
- * people who decided that they want to do the same thing in a
- * completely different way.
- *
- * WARNING! The USB documentation is downright evil. Most of it
- * is just crap, written by a committee. You're better off ignoring
- * most of it, the important stuff is:
- *  - the low-level protocol (fairly simple but lots of small details)
- *  - working around the horridness of the rest
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#ifdef CONFIG_USB_DEBUG
-#define DEBUG
-#else
-#undef DEBUG
-#endif
-#include <linux/usb.h>
-
-#include <asm/irq.h>
-#include <asm/system.h>
-
-#include "xhci.h"
-
-#include "../../../../../drivers/usb/hcd.h"
-
-#include <asm-xen/xen-public/io/usbif.h>
-#include <asm/ctrl_if.h>
-#include <asm/xen-public/io/domain_controller.h>
-
-/*
- * Version Information
- */
-#define DRIVER_VERSION "v1.0"
-#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
-                      "Randy Dunlap, Georg Acher, Deti Fliegl, " \
-                      "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
-#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
-
-/*
- * debug = 0, no debugging messages
- * debug = 1, dump failed URB's except for stalls
- * debug = 2, dump all failed URB's (including stalls)
- */
-#ifdef DEBUG
-static int debug = 1;
-#else
-static int debug = 0;
-#endif
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "Debug level");
-static char *errbuf;
-#define ERRBUF_LEN    (PAGE_SIZE * 8)
-
-static int rh_submit_urb(struct urb *urb);
-static int rh_unlink_urb(struct urb *urb);
-static int xhci_unlink_urb(struct urb *urb);
-static void xhci_call_completion(struct urb *urb);
-static void xhci_drain_ring(void);
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
-static void xhci_finish_completion(void);
-
-#define MAX_URB_LOOP   2048            /* Maximum number of linked URB's */
-
-static kmem_cache_t *xhci_up_cachep;   /* urb_priv cache */
-static struct xhci *xhci;               /* XHCI structure for the interface */
-
-/******************************************************************************
- * DEBUGGING
- */
-
-#ifdef DEBUG
-
-static void dump_urb(struct urb *urb)
-{
-    printk(KERN_DEBUG "dumping urb @ %p\n"
-           "  hcpriv = %p\n"
-           "  next = %p\n"
-           "  dev = %p\n"
-           "  pipe = 0x%lx\n"
-           "  status = %d\n"
-           "  transfer_flags = 0x%lx\n"
-           "  transfer_buffer = %p\n"
-           "  transfer_buffer_length = %d\n"
-           "  actual_length = %d\n"
-           "  bandwidth = %d\n"
-           "  setup_packet = %p\n",
-           urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
-           urb->transfer_flags, urb->transfer_buffer,
-           urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
-           urb->setup_packet);
-    if ( urb->setup_packet != NULL )
-        printk(KERN_DEBUG
-               "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
-               urb->setup_packet[0], urb->setup_packet[1],
-               urb->setup_packet[2], urb->setup_packet[3],
-               urb->setup_packet[4], urb->setup_packet[5],
-               urb->setup_packet[6], urb->setup_packet[7]);
-    printk(KERN_DEBUG "complete = %p\n"
-           "interval = %d\n", urb->complete, urb->interval);
-        
-}
-
-static void xhci_show_resp(usbif_response_t *r)
-{
-        printk(KERN_DEBUG "dumping response @ %p\n"
-               "  id=0x%lx\n"
-               "  op=0x%x\n"
-               "  data=0x%x\n"
-               "  status=0x%x\n"
-               "  length=0x%lx\n",
-               r->id, r->operation, r->data, r->status, r->length);
-}
-
-#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
-
-#else /* DEBUG */
-
-#define dump_urb(blah) ((void)0)
-#define xhci_show_resp(blah) ((void)0)
-#define DPRINTK(blah,...) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************************
- * RING REQUEST HANDLING
- */
-
-/**
- * xhci_construct_isoc - add isochronous information to a request
- */
-static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
-{
-        usbif_iso_t *schedule;
-        int i;
-        struct urb_priv *urb_priv = urb->hcpriv;
-        
-        req->num_iso = urb->number_of_packets;
-        schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
-
-        if ( schedule == NULL )
-            return -ENOMEM;
-
-        for ( i = 0; i < req->num_iso; i++ )
-        {
-                schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
-                schedule[i].length = urb->iso_frame_desc[i].length;
-        }
-
-        urb_priv->schedule = schedule;
-       req->iso_schedule = virt_to_machine(schedule);
-
-        return 0;
-}
-
-/**
- * xhci_queue_req - construct and queue request for an URB
- */
-static int xhci_queue_req(struct urb *urb)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-        printk(KERN_DEBUG
-               "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n",
-               usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
-               usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-        
-
-        if ( RING_FULL(usb_ring) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_req(): USB ring full, not queuing request\n");
-                return -ENOBUFS;
-        }
-
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        req->operation       = USBIF_OP_IO;
-        req->port            = 0; /* We don't care what the port is. */
-        req->id              = (unsigned long) urb->hcpriv;
-        req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
-       req->devnum          = usb_pipedevice(urb->pipe);
-        req->direction       = usb_pipein(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-        req->pipe_type       = usb_pipetype(urb->pipe);
-        req->length          = urb->transfer_buffer_length;
-        req->transfer_flags  = urb->transfer_flags;
-       req->endpoint        = usb_pipeendpoint(urb->pipe);
-       req->speed           = usb_pipeslow(urb->pipe);
-       req->timeout         = urb->timeout * (1000 / HZ);
-
-        if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-            int ret = xhci_construct_isoc(req, urb);
-            if ( ret != 0 )
-                return ret;
-        }
-
-       if(urb->setup_packet != NULL)
-                memcpy(req->setup, urb->setup_packet, 8);
-        else
-                memset(req->setup, 0, 8);
-        
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        DPRINTK("Queued request for an URB.\n");
-        dump_urb(urb);
-
-        return -EINPROGRESS;
-}
-
-/**
- * xhci_queue_probe - queue a probe request for a particular port
- */
-static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
-       printk(KERN_DEBUG
-               "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
-               "resp_cons = %d\n", usbif->req_prod,
-               virt_to_machine(&usbif->req_prod),
-              usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-        
-        if ( RING_FULL(usb_ring) )
-        {
-                printk(KERN_WARNING
-                       "xhci_queue_probe(): ring full, not queuing request\n");
-                return NULL;
-        }
-
-        /* Stick something in the shared communications ring. */
-        req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, sizeof(*req), 0);
-
-        req->operation       = USBIF_OP_PROBE;
-        req->port            = port;
-
-        usb_ring->req_prod_pvt++;
-        RING_PUSH_REQUESTS(usb_ring);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        return req;
-}
-
-/**
- * xhci_port_reset - queue a reset request for a particular port
- */
-static int xhci_port_reset(usbif_vdev_t port)
-{
-        usbif_request_t *req;
-        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-        /* We only reset one port at a time, so we only need one variable per
-         * hub. */
-        xhci->awaiting_reset = 1;
-        
-        /* Stick something in the shared communications ring. */
-       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
-        memset(req, sizeof(*req), 0);
-
-        req->operation       = USBIF_OP_RESET;
-        req->port            = port;
-        
-        usb_ring->req_prod_pvt++;
-       RING_PUSH_REQUESTS(usb_ring);
-
-       notify_via_evtchn(xhci->evtchn);
-
-        while ( xhci->awaiting_reset > 0 )
-        {
-                mdelay(1);
-                xhci_drain_ring();
-        }
-
-       xhci->rh.ports[port].pe = 1;
-       xhci->rh.ports[port].pe_chg = 1;
-
-        return xhci->awaiting_reset;
-}
-
-
-/******************************************************************************
- * RING RESPONSE HANDLING
- */
-
-static void receive_usb_reset(usbif_response_t *resp)
-{
-    xhci->awaiting_reset = resp->status;
-    rmb();
-    
-}
-
-static void receive_usb_probe(usbif_response_t *resp)
-{
-    spin_lock(&xhci->rh.port_state_lock);
-
-    if ( resp->status > 0 )
-    {
-        if ( resp->status == 1 )
-        {
-            /* If theres a device there and there wasn't one before there must
-             * have been a connection status change. */
-            if( xhci->rh.ports[resp->data].cs == 0 )
-           {
-                xhci->rh.ports[resp->data].cs = 1;
-                xhci->rh.ports[resp->data].ccs = 1;
-                xhci->rh.ports[resp->data].cs_chg = 1;
-           }
-        }
-        else
-            printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
-                   "for port %d\n", resp->status, resp->data);
-    }
-    else if ( resp->status < 0)
-        printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
-               resp->status);
-
-    spin_unlock(&xhci->rh.port_state_lock);
-}
-
-static void receive_usb_io(usbif_response_t *resp)
-{
-        struct urb_priv *urbp = (struct urb_priv *)resp->id;
-        struct urb *urb = urbp->urb;
-
-        urb->actual_length = resp->length;
-        urbp->in_progress = 0;
-
-        if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
-        {
-                int i;
-              
-                /* Copy ISO schedule results back in. */
-                for ( i = 0; i < urb->number_of_packets; i++ )
-                {
-                        urb->iso_frame_desc[i].status
-                                = urbp->schedule[i].status;
-                        urb->iso_frame_desc[i].actual_length
-                                = urbp->schedule[i].length;
-                }
-                free_page((unsigned long)urbp->schedule);
-        }
-
-        /* Only set status if it's not been changed since submission.  It might
-         * have been changed if the URB has been unlinked asynchronously, for
-         * instance. */
-       if ( urb->status == -EINPROGRESS )
-                urbp->status = urb->status = resp->status;
-}
-
-/**
- * xhci_drain_ring - drain responses from the ring, calling handlers
- *
- * This may be called from interrupt context when an event is received from the
- * backend domain, or sometimes in process context whilst waiting for a port
- * reset or URB completion.
- */
-static void xhci_drain_ring(void)
-{
-       struct list_head *tmp, *head;
-       usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-       usbif_response_t *resp;
-        RING_IDX i, rp;
-
-        /* Walk the ring here to get responses, updating URBs to show what
-         * completed. */
-        
-        rp = usb_ring->sring->rsp_prod;
-        rmb(); /* Ensure we see queued requests up to 'rp'. */
-
-        /* Take items off the comms ring, taking care not to overflow. */
-        for ( i = usb_ring->rsp_cons; i != rp; i++ )
-        {
-            resp = RING_GET_RESPONSE(usb_ring, i);
-            
-            /* May need to deal with batching and with putting a ceiling on
-               the number dispatched for performance and anti-dos reasons */
-
-            xhci_show_resp(resp);
-
-            switch ( resp->operation )
-            {
-            case USBIF_OP_PROBE:
-                receive_usb_probe(resp);
-                break;
-                
-            case USBIF_OP_IO:
-                receive_usb_io(resp);
-                break;
-
-            case USBIF_OP_RESET:
-                receive_usb_reset(resp);
-                break;
-
-            default:
-                printk(KERN_WARNING
-                       "error: unknown USB io operation response [%d]\n",
-                       resp->operation);
-                break;
-            }
-        }
-
-        usb_ring->rsp_cons = i;
-
-       /* Walk the list of pending URB's to see which ones completed and do
-         * callbacks, etc. */
-       spin_lock(&xhci->urb_list_lock);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *urb = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               /* Checks the status and does all of the magic necessary */
-               xhci_transfer_result(xhci, urb);
-       }
-       spin_unlock(&xhci->urb_list_lock);
-
-       xhci_finish_completion();
-}
-
-
-static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
-{
-        xhci_drain_ring();
-}
-
-/******************************************************************************
- * HOST CONTROLLER FUNCTIONALITY
- */
-
-/**
- * no-op implementation of private device alloc / free routines
- */
-static int xhci_do_nothing_dev(struct usb_device *dev)
-{
-       return 0;
-}
-
-static inline void xhci_add_complete(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       list_add_tail(&urbp->complete_list, &xhci->complete_list);
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-/* When this returns, the owner of the URB may free its
- * storage.
- *
- * We spin and wait for the URB to complete before returning.
- *
- * Call with urb->lock acquired.
- */
-static void xhci_delete_urb(struct urb *urb)
-{
-        struct urb_priv *urbp;
-
-       urbp = urb->hcpriv;
-
-        /* If there's no urb_priv structure for this URB then it can't have
-         * been submitted at all. */
-       if ( urbp == NULL )
-               return;
-
-       /* For now we just spin until the URB completes.  It shouldn't take too
-         * long and we don't expect to have to do this very often. */
-       while ( urb->status == -EINPROGRESS )
-        {
-            xhci_drain_ring();
-            mdelay(1);
-        }
-
-       /* Now we know that further transfers to the buffer won't
-        * occur, so we can safely return. */
-}
-
-static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
-{
-       struct urb_priv *urbp;
-
-       urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
-       if (!urbp) {
-               err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n");
-               return NULL;
-       }
-
-       memset((void *)urbp, 0, sizeof(*urbp));
-
-       urbp->inserttime = jiffies;
-       urbp->urb = urb;
-       urbp->dev = urb->dev;
-       
-       INIT_LIST_HEAD(&urbp->complete_list);
-
-       urb->hcpriv = urbp;
-
-       return urbp;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-/* When is this called?  Do we need to stop the transfer (as we
- * currently do)? */
-static void xhci_destroy_urb_priv(struct urb *urb)
-{
-    struct urb_priv *urbp;
-    
-    urbp = (struct urb_priv *)urb->hcpriv;
-    if (!urbp)
-        return;
-
-    if (!list_empty(&urb->urb_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
-    
-    if (!list_empty(&urbp->complete_list))
-        warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb);
-    
-    kmem_cache_free(xhci_up_cachep, urb->hcpriv);
-
-    urb->hcpriv = NULL;
-}
-
-/**
- * Try to find URBs in progress on the same pipe to the same device.
- *
- * MUST be called with xhci->urb_list_lock acquired
- */
-static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
-{
-       struct list_head *tmp, *head;
-
-       /* We don't match Isoc transfers since they are special */
-       if (usb_pipeisoc(urb->pipe))
-               return NULL;
-
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               if (u->dev == urb->dev && u->pipe == urb->pipe &&
-                   u->status == -EINPROGRESS)
-                       return u;
-       }
-
-       return NULL;
-}
-
-static int xhci_submit_urb(struct urb *urb)
-{
-       int ret = -EINVAL;
-       unsigned long flags;
-       struct urb *eurb;
-       int bustime;
-
-        DPRINTK("URB submitted to XHCI driver.\n");
-        dump_urb(urb);
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
-               warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb);
-               return -ENODEV;
-       }
-
-        if ( urb->dev->devpath == NULL )
-                BUG();
-
-       usb_inc_dev_use(urb->dev);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
-           urb->status == -ECONNABORTED) {
-               dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status);
-               /* Since we can have problems on the out path */
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               usb_dec_dev_use(urb->dev);
-
-               return ret;
-       }
-
-       INIT_LIST_HEAD(&urb->urb_list);
-       if (!xhci_alloc_urb_priv(urb)) {
-               ret = -ENOMEM;
-
-               goto out;
-       }
-
-        ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
-
-       eurb = xhci_find_urb_ep(xhci, urb);
-       if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
-               ret = -ENXIO;
-
-               goto out;
-       }
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               ret = rh_submit_urb(urb);
-
-               goto out;
-       }
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-               ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_INTERRUPT:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0)
-                               ret = bustime;
-                       else {
-                               ret = xhci_queue_req(urb);
-                               if (ret == -EINPROGRESS)
-                                       usb_claim_bandwidth(urb->dev, urb,
-                                                            bustime, 0);
-                       }
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-
-       case PIPE_ISOCHRONOUS:
-               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
-                       if (urb->number_of_packets <= 0) {
-                               ret = -EINVAL;
-                               break;
-                       }
-                       bustime = usb_check_bandwidth(urb->dev, urb);
-                       if (bustime < 0) {
-                               ret = bustime;
-                               break;
-                       }
-
-                       ret = xhci_queue_req(urb);
-                       if (ret == -EINPROGRESS)
-                               usb_claim_bandwidth(urb->dev, urb, bustime, 1);
-               } else          /* bandwidth is already set */
-                       ret = xhci_queue_req(urb);
-               break;
-       }
-out:
-       urb->status = ret;
-
-       if (ret == -EINPROGRESS) {
-               /* We use _tail to make find_urb_ep more efficient */
-               list_add_tail(&urb->urb_list, &xhci->urb_list);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               return 0;
-       }
-
-       xhci_delete_urb(urb);
-
-       spin_unlock(&urb->lock);
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       /* Only call completion if it was successful */
-       if (!ret)
-               xhci_call_completion(urb);
-
-       return ret;
-}
-
-/*
- * Return the result of a transfer
- *
- * MUST be called with urb_list_lock acquired
- */
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
-{
-       int ret = 0;
-       unsigned long flags;
-       struct urb_priv *urbp;
-
-       /* The root hub is special */
-       if (urb->dev == xhci->rh.dev)
-               return;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-
-        if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
-                ret = -EINPROGRESS;
-
-        if (urb->actual_length < urb->transfer_buffer_length) {
-                if (urb->transfer_flags & USB_DISABLE_SPD) {
-                        ret = -EREMOTEIO;
-                }
-        }
-
-       if (urb->status == -EPIPE)
-        {
-                ret = urb->status;
-               /* endpoint has stalled - mark it halted */
-               usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
-                                  usb_pipeout(urb->pipe));
-        }
-
-       if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
-            (ret != 0 && debug > 1)) {
-               /* Some debugging code */
-               dbg("xhci_result_interrupt/bulk() failed with status %x",
-                       status);
-       }
-
-       if (ret == -EINPROGRESS)
-               goto out;
-
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
-       case PIPE_ISOCHRONOUS:
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 1);
-               xhci_delete_urb(urb);
-               break;
-       case PIPE_INTERRUPT:
-               /* Interrupts are an exception */
-               if (urb->interval)
-                       goto out_complete;
-
-               /* Release bandwidth for Interrupt or Isoc. transfers */
-               /* Spinlock needed ? */
-               if (urb->bandwidth)
-                       usb_release_bandwidth(urb->dev, urb, 0);
-               xhci_delete_urb(urb);
-               break;
-       default:
-               info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
-                     usb_pipetype(urb->pipe), urb);
-       }
-
-       /* Remove it from xhci->urb_list */
-       list_del_init(&urb->urb_list);
-
-out_complete:
-       xhci_add_complete(urb);
-
-out:
-       spin_unlock_irqrestore(&urb->lock, flags);
-}
-
-static int xhci_unlink_urb(struct urb *urb)
-{
-       unsigned long flags;
-       struct urb_priv *urbp = urb->hcpriv;
-
-       if (!urb)
-               return -EINVAL;
-
-       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
-               return -ENODEV;
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       spin_lock(&urb->lock);
-
-       /* Release bandwidth for Interrupt or Isoc. transfers */
-       /* Spinlock needed ? */
-       if (urb->bandwidth) {
-               switch (usb_pipetype(urb->pipe)) {
-               case PIPE_INTERRUPT:
-                       usb_release_bandwidth(urb->dev, urb, 0);
-                       break;
-               case PIPE_ISOCHRONOUS:
-                       usb_release_bandwidth(urb->dev, urb, 1);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       if (urb->status != -EINPROGRESS) {
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               return 0;
-       }
-
-       list_del_init(&urb->urb_list);
-
-       /* Short circuit the virtual root hub */
-       if (urb->dev == xhci->rh.dev) {
-               rh_unlink_urb(urb);
-
-               spin_unlock(&urb->lock);
-               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-               xhci_call_completion(urb);
-       } else {
-               if (urb->transfer_flags & USB_ASYNC_UNLINK) {
-                        /* We currently don't currently attempt to cancel URBs
-                         * that have been queued in the ring.  We handle async
-                         * unlinked URBs when they complete. */
-                       urbp->status = urb->status = -ECONNABORTED;
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-               } else {
-                       urb->status = -ENOENT;
-
-                       spin_unlock(&urb->lock);
-                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-                       if (in_interrupt()) {   /* wait at least 1 frame */
-                               static int errorcount = 10;
-
-                               if (errorcount--)
-                                       dbg("xhci_unlink_urb called from interrupt for urb %p", urb);
-                               udelay(1000);
-                       } else
-                               schedule_timeout(1+1*HZ/1000); 
-
-                        xhci_delete_urb(urb);
-
-                       xhci_call_completion(urb);
-               }
-       }
-
-       return 0;
-}
-
-static void xhci_call_completion(struct urb *urb)
-{
-       struct urb_priv *urbp;
-       struct usb_device *dev = urb->dev;
-       int is_ring = 0, killed, resubmit_interrupt, status;
-       struct urb *nurb;
-       unsigned long flags;
-
-       spin_lock_irqsave(&urb->lock, flags);
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-       if (!urbp || !urb->dev) {
-               spin_unlock_irqrestore(&urb->lock, flags);
-               return;
-       }
-
-       killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
-                       urb->status == -ECONNRESET);
-       resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
-                       urb->interval);
-
-       nurb = urb->next;
-       if (nurb && !killed) {
-               int count = 0;
-
-               while (nurb && nurb != urb && count < MAX_URB_LOOP) {
-                       if (nurb->status == -ENOENT ||
-                           nurb->status == -ECONNABORTED ||
-                           nurb->status == -ECONNRESET) {
-                               killed = 1;
-                               break;
-                       }
-
-                       nurb = nurb->next;
-                       count++;
-               }
-
-               if (count == MAX_URB_LOOP)
-                       err("xhci_call_completion: too many linked URB's, loop? (first loop)");
-
-               /* Check to see if chain is a ring */
-               is_ring = (nurb == urb);
-       }
-
-       status = urbp->status;
-       if (!resubmit_interrupt || killed)
-               /* We don't need urb_priv anymore */
-               xhci_destroy_urb_priv(urb);
-
-       if (!killed)
-               urb->status = status;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if (urb->complete)
-               urb->complete(urb);
-
-       if (resubmit_interrupt)
-               /* Recheck the status. The completion handler may have */
-               /*  unlinked the resubmitting interrupt URB */
-               killed = (urb->status == -ENOENT ||
-                         urb->status == -ECONNABORTED ||
-                         urb->status == -ECONNRESET);
-
-       if (resubmit_interrupt && !killed) {
-                if ( urb->dev != xhci->rh.dev )
-                        xhci_queue_req(urb); /* XXX What if this fails? */
-                /* Don't need to resubmit URBs for the virtual root dev. */
-       } else {
-               if (is_ring && !killed) {
-                       urb->dev = dev;
-                       xhci_submit_urb(urb);
-               } else {
-                       /* We decrement the usage count after we're done */
-                       /*  with everything */
-                       usb_dec_dev_use(dev);
-               }
-       }
-}
-
-static void xhci_finish_completion(void)
-{
-       struct list_head *tmp, *head;
-       unsigned long flags;
-
-       spin_lock_irqsave(&xhci->complete_list_lock, flags);
-       head = &xhci->complete_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
-                                                   complete_list);
-               struct urb *urb = urbp->urb;
-
-               list_del_init(&urbp->complete_list);
-               spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-
-               xhci_call_completion(urb);
-
-               spin_lock_irqsave(&xhci->complete_list_lock, flags);
-               head = &xhci->complete_list;
-               tmp = head->next;
-       }
-       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-static struct usb_operations xhci_device_operations = {
-       .allocate = xhci_do_nothing_dev,
-       .deallocate = xhci_do_nothing_dev,
-        /* It doesn't look like any drivers actually care what the frame number
-        * is at the moment!  If necessary, we could approximate the current
-        * frame nubmer by passing it from the backend in response messages. */
-       .get_frame_number = NULL,
-       .submit_urb = xhci_submit_urb,
-       .unlink_urb = xhci_unlink_urb
-};
-
-/******************************************************************************
- * VIRTUAL ROOT HUB EMULATION
- */
-
-static __u8 root_hub_dev_des[] =
-{
-       0x12,                   /*  __u8  bLength; */
-       0x01,                   /*  __u8  bDescriptorType; Device */
-       0x00,                   /*  __u16 bcdUSB; v1.0 */
-       0x01,
-       0x09,                   /*  __u8  bDeviceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  bDeviceSubClass; */
-       0x00,                   /*  __u8  bDeviceProtocol; */
-       0x08,                   /*  __u8  bMaxPacketSize0; 8 Bytes */
-       0x00,                   /*  __u16 idVendor; */
-       0x00,
-       0x00,                   /*  __u16 idProduct; */
-       0x00,
-       0x00,                   /*  __u16 bcdDevice; */
-       0x00,
-       0x00,                   /*  __u8  iManufacturer; */
-       0x02,                   /*  __u8  iProduct; */
-       0x01,                   /*  __u8  iSerialNumber; */
-       0x01                    /*  __u8  bNumConfigurations; */
-};
-
-
-/* Configuration descriptor */
-static __u8 root_hub_config_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x02,                   /*  __u8  bDescriptorType; Configuration */
-       0x19,                   /*  __u16 wTotalLength; */
-       0x00,
-       0x01,                   /*  __u8  bNumInterfaces; */
-       0x01,                   /*  __u8  bConfigurationValue; */
-       0x00,                   /*  __u8  iConfiguration; */
-       0x40,                   /*  __u8  bmAttributes;
-                                       Bit 7: Bus-powered, 6: Self-powered,
-                                       Bit 5 Remote-wakeup, 4..0: resvd */
-       0x00,                   /*  __u8  MaxPower; */
-
-       /* interface */
-       0x09,                   /*  __u8  if_bLength; */
-       0x04,                   /*  __u8  if_bDescriptorType; Interface */
-       0x00,                   /*  __u8  if_bInterfaceNumber; */
-       0x00,                   /*  __u8  if_bAlternateSetting; */
-       0x01,                   /*  __u8  if_bNumEndpoints; */
-       0x09,                   /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
-       0x00,                   /*  __u8  if_bInterfaceSubClass; */
-       0x00,                   /*  __u8  if_bInterfaceProtocol; */
-       0x00,                   /*  __u8  if_iInterface; */
-
-       /* endpoint */
-       0x07,                   /*  __u8  ep_bLength; */
-       0x05,                   /*  __u8  ep_bDescriptorType; Endpoint */
-       0x81,                   /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
-       0x03,                   /*  __u8  ep_bmAttributes; Interrupt */
-       0x08,                   /*  __u16 ep_wMaxPacketSize; 8 Bytes */
-       0x00,
-       0xff                    /*  __u8  ep_bInterval; 255 ms */
-};
-
-static __u8 root_hub_hub_des[] =
-{
-       0x09,                   /*  __u8  bLength; */
-       0x29,                   /*  __u8  bDescriptorType; Hub-descriptor */
-       0x02,                   /*  __u8  bNbrPorts; */
-       0x00,                   /* __u16  wHubCharacteristics; */
-       0x00,
-       0x01,                   /*  __u8  bPwrOn2pwrGood; 2ms */
-       0x00,                   /*  __u8  bHubContrCurrent; 0 mA */
-       0x00,                   /*  __u8  DeviceRemovable; *** 7 Ports max *** */
-       0xff                    /*  __u8  PortPwrCtrlMask; *** 7 ports max *** */
-};
-
-/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
-static int rh_send_irq(struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-        xhci_port_t *ports = xhci->rh.ports;
-       unsigned long flags;
-       int i, len = 1;
-       __u16 data = 0;
-
-       spin_lock_irqsave(&urb->lock, flags);
-       for (i = 0; i < xhci->rh.numports; i++) {
-                /* Set a bit if anything at all has changed on the port, as per
-                * USB spec 11.12 */
-               data |= (ports[i].cs_chg || ports[i].pe_chg )
-                        ? (1 << (i + 1))
-                        : 0;
-
-               len = (i + 1) / 8 + 1;
-       }
-
-       *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
-       urb->actual_length = len;
-       urbp->status = 0;
-
-       spin_unlock_irqrestore(&urb->lock, flags);
-
-       if ((data > 0) && (xhci->rh.send != 0)) {
-               dbg("root-hub INT complete: data: %x", data);
-               xhci_call_completion(urb);
-       }
-
-       return 0;
-}
-
-/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
-static int rh_init_int_timer(struct urb *urb);
-
-static void rh_int_timer_do(unsigned long ptr)
-{
-       struct urb *urb = (struct urb *)ptr;
-       struct list_head list, *tmp, *head;
-       unsigned long flags;
-       int i;
-
-       for ( i = 0; i < xhci->rh.numports; i++)
-                xhci_queue_probe(i);
-
-       if (xhci->rh.send)
-               rh_send_irq(urb);
-
-       INIT_LIST_HEAD(&list);
-
-       spin_lock_irqsave(&xhci->urb_list_lock, flags);
-       head = &xhci->urb_list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-               struct urb_priv *up = (struct urb_priv *)u->hcpriv;
-
-               tmp = tmp->next;
-
-               spin_lock(&u->lock);
-
-               /* Check if the URB timed out */
-               if (u->timeout && time_after_eq(jiffies,
-                                                up->inserttime + u->timeout)) {
-                       list_del(&u->urb_list);
-                       list_add_tail(&u->urb_list, &list);
-               }
-
-               spin_unlock(&u->lock);
-       }
-       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
-       head = &list;
-       tmp = head->next;
-       while (tmp != head) {
-               struct urb *u = list_entry(tmp, struct urb, urb_list);
-
-               tmp = tmp->next;
-
-               u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
-               xhci_unlink_urb(u);
-       }
-
-       rh_init_int_timer(urb);
-}
-
-/* Root Hub INTs are polled by this timer */
-static int rh_init_int_timer(struct urb *urb)
-{
-       xhci->rh.interval = urb->interval;
-       init_timer(&xhci->rh.rh_int_timer);
-       xhci->rh.rh_int_timer.function = rh_int_timer_do;
-       xhci->rh.rh_int_timer.data = (unsigned long)urb;
-       xhci->rh.rh_int_timer.expires = jiffies
-                + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
-       add_timer(&xhci->rh.rh_int_timer);
-
-       return 0;
-}
-
-#define OK(x)                  len = (x); break
-
-/* Root Hub Control Pipe */
-static int rh_submit_urb(struct urb *urb)
-{
-       unsigned int pipe = urb->pipe;
-       struct usb_ctrlrequest *cmd =
-                (struct usb_ctrlrequest *)urb->setup_packet;
-       void *data = urb->transfer_buffer;
-       int leni = urb->transfer_buffer_length;
-       int len = 0;
-       xhci_port_t *status;
-       int stat = 0;
-       int i;
-       int retstatus;
-        unsigned long flags;
-        
-       __u16 cstatus;
-       __u16 bmRType_bReq;
-       __u16 wValue;
-       __u16 wIndex;
-       __u16 wLength;
-
-       if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
-               xhci->rh.urb = urb;
-               xhci->rh.send = 1;
-               xhci->rh.interval = urb->interval;
-               rh_init_int_timer(urb);
-
-               return -EINPROGRESS;
-       }
-
-       bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
-       wValue = le16_to_cpu(cmd->wValue);
-       wIndex = le16_to_cpu(cmd->wIndex);
-       wLength = le16_to_cpu(cmd->wLength);
-
-       for (i = 0; i < 8; i++)
-               xhci->rh.c_p_r[i] = 0;
-
-        status = &xhci->rh.ports[wIndex - 1];
-
-        spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
-
-       switch (bmRType_bReq) {
-               /* Request Destination:
-                  without flags: Device,
-                  RH_INTERFACE: interface,
-                  RH_ENDPOINT: endpoint,
-                  RH_CLASS means HUB here,
-                  RH_OTHER | RH_CLASS  almost ever means HUB_PORT here
-               */
-
-       case RH_GET_STATUS:
-               *(__u16 *)data = cpu_to_le16(1);
-               OK(2);
-       case RH_GET_STATUS | RH_INTERFACE:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_ENDPOINT:
-               *(__u16 *)data = cpu_to_le16(0);
-               OK(2);
-       case RH_GET_STATUS | RH_CLASS:
-               *(__u32 *)data = cpu_to_le32(0);
-               OK(4);          /* hub power */
-       case RH_GET_STATUS | RH_OTHER | RH_CLASS:
-               cstatus = (status->cs_chg) |
-                       (status->pe_chg << 1) |
-                       (xhci->rh.c_p_r[wIndex - 1] << 4);
-               retstatus = (status->ccs) |
-                       (status->pe << 1) |
-                       (status->susp << 2) |
-                       (status->pr << 8) |
-                       (1 << 8) |      /* power on */
-                       (status->lsda << 9);
-               *(__u16 *)data = cpu_to_le16(retstatus);
-               *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
-               OK(4);
-       case RH_CLEAR_FEATURE | RH_ENDPOINT:
-               switch (wValue) {
-               case RH_ENDPOINT_STALL:
-                       OK(0);
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_CLASS:
-               switch (wValue) {
-               case RH_C_HUB_OVER_CURRENT:
-                       OK(0);  /* hub power over current */
-               }
-               break;
-       case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_ENABLE:
-                        status->pe     = 0;
-                       OK(0);
-               case RH_PORT_SUSPEND:
-                        status->susp   = 0;
-                       OK(0);
-               case RH_PORT_POWER:
-                       OK(0);  /* port power */
-               case RH_C_PORT_CONNECTION:
-                        status->cs_chg = 0;
-                       OK(0);
-               case RH_C_PORT_ENABLE:
-                        status->pe_chg = 0;
-                       OK(0);
-               case RH_C_PORT_SUSPEND:
-                       /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
-                       OK(0);
-               case RH_C_PORT_OVER_CURRENT:
-                       OK(0);  /* port power over current */
-               case RH_C_PORT_RESET:
-                       xhci->rh.c_p_r[wIndex - 1] = 0;
-                       OK(0);
-               }
-               break;
-       case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
-               switch (wValue) {
-               case RH_PORT_SUSPEND:
-                        status->susp = 1;      
-                       OK(0);
-               case RH_PORT_RESET:
-                {
-                        int ret;
-                        xhci->rh.c_p_r[wIndex - 1] = 1;
-                        status->pr = 0;
-                        status->pe = 1;
-                        ret = xhci_port_reset(wIndex - 1);
-                        /* XXX MAW: should probably cancel queued transfers during reset... *\/ */
-                        if ( ret == 0 ) { OK(0); }
-                        else { return ret; }
-                }
-                break;
-               case RH_PORT_POWER:
-                       OK(0); /* port power ** */
-               case RH_PORT_ENABLE:
-                        status->pe = 1;
-                       OK(0);
-               }
-               break;
-       case RH_SET_ADDRESS:
-               xhci->rh.devnum = wValue;
-               OK(0);
-       case RH_GET_DESCRIPTOR:
-               switch ((wValue & 0xff00) >> 8) {
-               case 0x01:      /* device descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_dev_des), wLength));
-                       memcpy(data, root_hub_dev_des, len);
-                       OK(len);
-               case 0x02:      /* configuration descriptor */
-                       len = min_t(unsigned int, leni,
-                                 min_t(unsigned int,
-                                     sizeof(root_hub_config_des), wLength));
-                       memcpy (data, root_hub_config_des, len);
-                       OK(len);
-               case 0x03:      /* string descriptors */
-                       len = usb_root_hub_string (wValue & 0xff,
-                               0, "XHCI-alt",
-                               data, wLength);
-                       if (len > 0) {
-                               OK(min_t(int, leni, len));
-                       } else 
-                               stat = -EPIPE;
-               }
-               break;
-       case RH_GET_DESCRIPTOR | RH_CLASS:
-               root_hub_hub_des[2] = xhci->rh.numports;
-               len = min_t(unsigned int, leni,
-                         min_t(unsigned int, sizeof(root_hub_hub_des), wLength));
-               memcpy(data, root_hub_hub_des, len);
-               OK(len);
-       case RH_GET_CONFIGURATION:
-               *(__u8 *)data = 0x01;
-               OK(1);
-       case RH_SET_CONFIGURATION:
-               OK(0);
-       case RH_GET_INTERFACE | RH_INTERFACE:
-               *(__u8 *)data = 0x00;
-               OK(1);
-       case RH_SET_INTERFACE | RH_INTERFACE:
-               OK(0);
-       default:
-               stat = -EPIPE;
-       }
-
-        spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
-
-       urb->actual_length = len;
-
-       return stat;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-static int rh_unlink_urb(struct urb *urb)
-{
-       if (xhci->rh.urb == urb) {
-               urb->status = -ENOENT;
-               xhci->rh.send = 0;
-               xhci->rh.urb = NULL;
-               del_timer(&xhci->rh.rh_int_timer);
-       }
-       return 0;
-}
-
-/******************************************************************************
- * CONTROL PLANE FUNCTIONALITY
- */
-
-/**
- * alloc_xhci - initialise a new virtual root hub for a new USB device channel
- */
-static int alloc_xhci(void)
-{
-       int retval;
-       struct usb_bus *bus;
-
-       retval = -EBUSY;
-
-       xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
-       if (!xhci) {
-               err("couldn't allocate xhci structure");
-               retval = -ENOMEM;
-               goto err_alloc_xhci;
-       }
-
-       xhci->state = USBIF_STATE_CLOSED;
-
-       spin_lock_init(&xhci->urb_list_lock);
-       INIT_LIST_HEAD(&xhci->urb_list);
-
-       spin_lock_init(&xhci->complete_list_lock);
-       INIT_LIST_HEAD(&xhci->complete_list);
-
-       spin_lock_init(&xhci->frame_list_lock);
-
-       bus = usb_alloc_bus(&xhci_device_operations);
-
-       if (!bus) {
-               err("unable to allocate bus");
-               goto err_alloc_bus;
-       }
-
-       xhci->bus = bus;
-       bus->bus_name = "XHCI";
-       bus->hcpriv = xhci;
-
-       usb_register_bus(xhci->bus);
-
-       /* Initialize the root hub */
-
-       xhci->rh.numports = 0;
-
-       xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
-       if (!xhci->rh.dev) {
-               err("unable to allocate root hub");
-               goto err_alloc_root_hub;
-       }
-
-       xhci->state = 0;
-
-       return 0;
-
-/*
- * error exits:
- */
-err_alloc_root_hub:
-        usb_deregister_bus(xhci->bus);
-       usb_free_bus(xhci->bus);
-       xhci->bus = NULL;
-
-err_alloc_bus:
-       kfree(xhci);
-
-err_alloc_xhci:
-       return retval;
-}
-
-/**
- * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
- */
-static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
-{
-    ctrl_msg_t                   cmsg;
-    usbif_fe_interface_connect_t up;
-    long rc;
-    usbif_sring_t *sring;
-
-    switch ( status->status )
-    {
-    case USBIF_INTERFACE_STATUS_DESTROYED:
-        printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
-               xhci->state);
-        break;
-
-    case USBIF_INTERFACE_STATUS_DISCONNECTED:
-        if ( xhci->state != USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-            /* Not bothering to do recovery here for now.  Keep things
-             * simple. */
-        }
-
-        /* Move from CLOSED to DISCONNECTED state. */
-        sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
-        SHARED_RING_INIT(sring);
-        FRONT_RING_INIT(&xhci->usb_ring, sring);
-        xhci->state  = USBIF_STATE_DISCONNECTED;
-
-        /* Construct an interface-CONNECT message for the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
-        cmsg.length    = sizeof(usbif_fe_interface_connect_t);
-        up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
-        memcpy(cmsg.msg, &up, sizeof(up));
-        
-        /* Tell the controller to bring up the interface. */
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-        break;
-
-    case USBIF_INTERFACE_STATUS_CONNECTED:
-        if ( xhci->state == USBIF_STATE_CLOSED )
-        {
-            printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
-                   " in state %d\n", xhci->state);
-            break;
-        }
-
-        xhci->evtchn = status->evtchn;
-        xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
-       xhci->bandwidth = status->bandwidth;
-       xhci->rh.numports = status->num_ports;
-
-        xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL);
-        memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
-
-       usb_connect(xhci->rh.dev);
-
-       if (usb_new_device(xhci->rh.dev) != 0) {
-               err("unable to start root hub");
-       }
-
-       /* Allocate the appropriate USB bandwidth here...  Need to
-         * somehow know what the total available is thought to be so we
-         * can calculate the reservation correctly. */
-       usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
-                           1000 - xhci->bandwidth, 0);
-
-        if ( (rc = request_irq(xhci->irq, xhci_interrupt, 
-                               SA_SAMPLE_RANDOM, "usbif", xhci)) )
-                printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
-
-       DPRINTK(KERN_INFO __FILE__
-                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
-                xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
-                xhci->evtchn, xhci->irq);
-
-        xhci->state = USBIF_STATE_CONNECTED;
-        
-        break;
-
-    default:
-        printk(KERN_WARNING "Status change to unknown value %d\n", 
-               status->status);
-        break;
-    }
-}
-
-/**
- * usbif_ctrlif_rx - demux control messages by subtype
- */
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
-        if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) )
-            goto parse_error;
-        usbif_status_change((usbif_fe_interface_status_changed_t *)
-                            &msg->msg[0]);
-        break;        
-
-        /* New interface...? */
-    default:
-        goto parse_error;
-    }
-
-    ctrl_if_send_response(msg);
-    return;
-
- parse_error:
-    msg->length = 0;
-    ctrl_if_send_response(msg);
-}
-
-
-static int __init xhci_hcd_init(void)
-{
-       int retval = -ENOMEM, i;
-        usbif_fe_interface_status_changed_t st;
-        control_msg_t cmsg;
-
-       if ( (xen_start_info.flags & SIF_INITDOMAIN)
-            || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
-                return 0;
-
-       info(DRIVER_DESC " " DRIVER_VERSION);
-
-       if (debug) {
-               errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
-               if (!errbuf)
-                       goto errbuf_failed;
-       }
-
-       xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
-               sizeof(struct urb_priv), 0, 0, NULL, NULL);
-       if (!xhci_up_cachep)
-               goto up_failed;
-
-        /* Let the domain controller know we're here.  For now we wait until
-         * connection, as for the block and net drivers.  This is only strictly
-         * necessary if we're going to boot off a USB device. */
-        printk(KERN_INFO "Initialising Xen virtual USB hub\n");
-    
-        (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
-                                        CALLBACK_IN_BLOCKING_CONTEXT);
-        
-       alloc_xhci();
-
-        /* Send a driver-UP notification to the domain controller. */
-        cmsg.type      = CMSG_USBIF_FE;
-        cmsg.subtype   = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
-        cmsg.length    = sizeof(usbif_fe_driver_status_changed_t);
-        st.status      = USBIF_DRIVER_STATUS_UP;
-        memcpy(cmsg.msg, &st, sizeof(st));
-        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-        
-        /*
-         * We should read 'nr_interfaces' from response message and wait
-         * for notifications before proceeding. For now we assume that we
-         * will be notified of exactly one interface.
-         */
-        for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
-        {
-            set_current_state(TASK_INTERRUPTIBLE);
-            schedule_timeout(1);
-        }
-        
-        if (xhci->state != USBIF_STATE_CONNECTED)
-            printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
-       
-       return 0;
-
-up_failed:
-       if (errbuf)
-               kfree(errbuf);
-
-errbuf_failed:
-       return retval;
-}
-
-module_init(xhci_hcd_init);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-
diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h b/linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h
deleted file mode 100644 (file)
index f503e59..0000000
+++ /dev/null
@@ -1,180 +0,0 @@
-/******************************************************************************
- * xhci.h
- *
- * Private definitions for the Xen Virtual USB Controller.  Based on
- * drivers/usb/host/uhci.h from Linux.  Copyright for the imported content is
- * retained by the original authors.
- *
- * Modifications are:
- * Copyright (C) 2004 Intel Research Cambridge
- * Copyright (C) 2004, 2005 Mark Williamson
- */
-
-#ifndef __LINUX_XHCI_H
-#define __LINUX_XHCI_H
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <asm-xen/xen-public/io/usbif.h>
-#include <linux/spinlock.h>
-
-/* xhci_port_t - current known state of a virtual hub ports */
-typedef struct {
-        unsigned int cs     :1; /* Connection status.  do we really need this /and/ ccs? */
-        unsigned int cs_chg :1; /* Connection status change.  */
-        unsigned int pe     :1; /* Port enable.               */
-        unsigned int pe_chg :1; /* Port enable change.        */
-        unsigned int ccs    :1; /* Current connect status.    */
-        unsigned int susp   :1; /* Suspended.                 */
-        unsigned int lsda   :1; /* Low speed device attached. */
-        unsigned int pr     :1; /* Port reset.                */
-} xhci_port_t;
-
-/* struct virt_root_hub - state related to the virtual root hub */
-struct virt_root_hub {
-       struct usb_device *dev;
-       int devnum;             /* Address of Root Hub endpoint */
-       struct urb *urb;
-       void *int_addr;
-       int send;
-       int interval;
-       int numports;
-       int c_p_r[8];
-       struct timer_list rh_int_timer;
-        spinlock_t port_state_lock;
-        xhci_port_t *ports;
-};
-
-/* struct xhci - contains the state associated with a single USB interface */
-struct xhci {
-
-#ifdef CONFIG_PROC_FS
-       /* procfs */
-       int num;
-       struct proc_dir_entry *proc_entry;
-#endif
-
-        int evtchn;                        /* Interdom channel to backend */
-        int irq;                           /* Bound to evtchn */
-        enum { USBIF_STATE_CONNECTED    = 2,
-               USBIF_STATE_DISCONNECTED = 1,
-               USBIF_STATE_CLOSED       = 0
-        } state; /* State of this USB interface */
-        unsigned long bandwidth;
-
-       struct usb_bus *bus;
-
-       /* Main list of URB's currently controlled by this HC */
-       spinlock_t urb_list_lock;
-       struct list_head urb_list;              /* P: xhci->urb_list_lock */
-
-       /* List of URB's awaiting completion callback */
-       spinlock_t complete_list_lock;
-       struct list_head complete_list;         /* P: xhci->complete_list_lock */
-
-       struct virt_root_hub rh;        /* private data of the virtual root hub */
-
-        usbif_front_ring_t usb_ring;
-
-        int awaiting_reset;
-};
-
-/* per-URB private data structure for the host controller */
-struct urb_priv {
-       struct urb *urb;
-        usbif_iso_t *schedule;
-       struct usb_device *dev;
-
-        int in_progress : 1;           /* QH was queued (not linked in) */
-       int short_control_packet : 1;   /* If we get a short packet during */
-                                       /*  a control transfer, retrigger */
-                                       /*  the status phase */
-
-       int status;                     /* Final status */
-
-       unsigned long inserttime;       /* In jiffies */
-
-       struct list_head complete_list; /* P: xhci->complete_list_lock */
-};
-
-/*
- * Locking in xhci.c
- *
- * spinlocks are used extensively to protect the many lists and data
- * structures we have. It's not that pretty, but it's necessary. We
- * need to be done with all of the locks (except complete_list_lock) when
- * we call urb->complete. I've tried to make it simple enough so I don't
- * have to spend hours racking my brain trying to figure out if the
- * locking is safe.
- *
- * Here's the safe locking order to prevent deadlocks:
- *
- * #1 xhci->urb_list_lock
- * #2 urb->lock
- * #3 xhci->urb_remove_list_lock
- * #4 xhci->complete_list_lock
- *
- * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
- * at the lowest level FIRST and NEVER grab locks at the same level at the
- * same time.
- * 
- * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
- */
-
-/* -------------------------------------------------------------------------
-   Virtual Root HUB
-   ------------------------------------------------------------------------- */
-/* destination of request */
-#define RH_DEVICE              0x00
-#define RH_INTERFACE           0x01
-#define RH_ENDPOINT            0x02
-#define RH_OTHER               0x03
-
-#define RH_CLASS               0x20
-#define RH_VENDOR              0x40
-
-/* Requests: bRequest << 8 | bmRequestType */
-#define RH_GET_STATUS          0x0080
-#define RH_CLEAR_FEATURE       0x0100
-#define RH_SET_FEATURE         0x0300
-#define RH_SET_ADDRESS         0x0500
-#define RH_GET_DESCRIPTOR      0x0680
-#define RH_SET_DESCRIPTOR      0x0700
-#define RH_GET_CONFIGURATION   0x0880
-#define RH_SET_CONFIGURATION   0x0900
-#define RH_GET_STATE           0x0280
-#define RH_GET_INTERFACE       0x0A80
-#define RH_SET_INTERFACE       0x0B00
-#define RH_SYNC_FRAME          0x0C80
-/* Our Vendor Specific Request */
-#define RH_SET_EP              0x2000
-
-/* Hub port features */
-#define RH_PORT_CONNECTION     0x00
-#define RH_PORT_ENABLE         0x01
-#define RH_PORT_SUSPEND                0x02
-#define RH_PORT_OVER_CURRENT   0x03
-#define RH_PORT_RESET          0x04
-#define RH_PORT_POWER          0x08
-#define RH_PORT_LOW_SPEED      0x09
-#define RH_C_PORT_CONNECTION   0x10
-#define RH_C_PORT_ENABLE       0x11
-#define RH_C_PORT_SUSPEND      0x12
-#define RH_C_PORT_OVER_CURRENT 0x13
-#define RH_C_PORT_RESET                0x14
-
-/* Hub features */
-#define RH_C_HUB_LOCAL_POWER   0x00
-#define RH_C_HUB_OVER_CURRENT  0x01
-#define RH_DEVICE_REMOTE_WAKEUP        0x00
-#define RH_ENDPOINT_STALL      0x01
-
-/* Our Vendor Specific feature */
-#define RH_REMOVE_EP           0x00
-
-#define RH_ACK                 0x01
-#define RH_REQ_ERR             -1
-#define RH_NACK                        0x00
-
-#endif
-
diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
deleted file mode 100644 (file)
index 421a81f..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#if 1
-       printk("smpboot_clear_io_apic_irqs\n");
-#else
-       io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-#if 1
-       printk("smpboot_setup_warm_reset_vector\n");
-#else
-       CMOS_WRITE(0xa, 0xf);
-       local_flush_tlb();
-       Dprintk("1.\n");
-       *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
-       Dprintk("2.\n");
-       *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
-       Dprintk("3.\n");
-#endif
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
-       /*
-        * Install writable page 0 entry to set BIOS data area.
-        */
-       local_flush_tlb();
-
-       /*
-        * Paranoid:  Set warm reset code and vector here back
-        * to default values.
-        */
-       CMOS_WRITE(0, 0xf);
-
-       *((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void smpboot_setup_io_apic(void)
-{
-#if 1
-       printk("smpboot_setup_io_apic\n");
-#else
-       /*
-        * Here we can be sure that there is an IO-APIC in the system. Let's
-        * go and set it up:
-        */
-       if (!skip_ioapic_setup && nr_ioapics)
-               setup_IO_APIC();
-#endif
-}
-
-
-#define        smp_found_config        (HYPERVISOR_shared_info->n_vcpu > 1)
diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
deleted file mode 100644 (file)
index fb8bd00..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <linux/config.h>
-#include <linux/compiler.h>
-
-asmlinkage int printk(const char * fmt, ...)
-       __attribute__ ((format (printf, 1, 2)));
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-typedef struct {
-       volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       unsigned magic;
-#endif
-} spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
-#else
-#define SPINLOCK_MAGIC_INIT    /* */
-#endif
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
-
-#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
-
-/*
- * Simple spin lock operations.  There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- */
-
-#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->lock) <= 0)
-#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
-
-#define spin_lock_string \
-       "\n1:\t" \
-       "lock ; decb %0\n\t" \
-       "jns 3f\n" \
-       "2:\t" \
-       "rep;nop\n\t" \
-       "cmpb $0,%0\n\t" \
-       "jle 2b\n\t" \
-       "jmp 1b\n" \
-       "3:\n\t"
-
-#define spin_lock_string_flags \
-       "\n1:\t" \
-       "lock ; decb %0\n\t" \
-       "jns 4f\n\t" \
-       "2:\t" \
-       "testl $0x200, %1\n\t" \
-       "jz 3f\n\t" \
-       "#sti\n\t" \
-       "3:\t" \
-       "rep;nop\n\t" \
-       "cmpb $0, %0\n\t" \
-       "jle 3b\n\t" \
-       "#cli\n\t" \
-       "jmp 1b\n" \
-       "4:\n\t"
-
-/*
- * This works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE)
- * (PPro errata 66, 92)
- */
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define spin_unlock_string \
-       "movb $1,%0" \
-               :"=m" (lock->lock) : : "memory"
-
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(lock->magic != SPINLOCK_MAGIC);
-       BUG_ON(!spin_is_locked(lock));
-#endif
-       __asm__ __volatile__(
-               spin_unlock_string
-       );
-}
-
-#else
-
-#define spin_unlock_string \
-       "xchgb %b0, %1" \
-               :"=q" (oldval), "=m" (lock->lock) \
-               :"0" (oldval) : "memory"
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
-       char oldval = 1;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(lock->magic != SPINLOCK_MAGIC);
-       BUG_ON(!spin_is_locked(lock));
-#endif
-       __asm__ __volatile__(
-               spin_unlock_string
-       );
-}
-
-#endif
-
-static inline int _raw_spin_trylock(spinlock_t *lock)
-{
-       char oldval;
-       __asm__ __volatile__(
-               "xchgb %b0,%1"
-               :"=q" (oldval), "=m" (lock->lock)
-               :"0" (0) : "memory");
-       return oldval > 0;
-}
-
-static inline void _raw_spin_lock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-               printk("eip: %p\n", __builtin_return_address(0));
-               BUG();
-       }
-#endif
-       __asm__ __volatile__(
-               spin_lock_string
-               :"=m" (lock->lock) : : "memory");
-}
-
-static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
-               printk("eip: %p\n", __builtin_return_address(0));
-               BUG();
-       }
-#endif
-       __asm__ __volatile__(
-               spin_lock_string_flags
-               :"=m" (lock->lock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
-       volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-       unsigned magic;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC   0xdeaf1eed
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
-#else
-#define RWLOCK_MAGIC_INIT      /* */
-#endif
-
-#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
-
-#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
-
-#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
-
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores.  See
- * semaphore.h for details.  -ben
- */
-/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
-
-static inline void _raw_read_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
-       __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void _raw_write_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
-       BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
-       __build_write_lock(rw, "__write_lock_failed");
-}
-
-#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
-       atomic_t *count = (atomic_t *)lock;
-       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-               return 1;
-       atomic_add(RW_LOCK_BIAS, count);
-       return 0;
-}
-
-#endif /* __ASM_SPINLOCK_H */
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
new file mode 100644 (file)
index 0000000..6cd16cc
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+       struct thread_info      tinfo;
+       u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{      
+       /* high bits used in ret_from_ code */
+       int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
+#ifdef CONFIG_4KSTACKS
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+#endif
+
+       irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+       {
+               long esp;
+
+               __asm__ __volatile__("andl %%esp,%0" :
+                                       "=r" (esp) : "0" (THREAD_SIZE - 1));
+               if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+                       printk("do_IRQ: stack overflow: %ld\n",
+                               esp - sizeof(struct thread_info));
+                       dump_stack();
+               }
+       }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+       curctx = (union irq_ctx *) current_thread_info();
+       irqctx = hardirq_ctx[smp_processor_id()];
+
+       /*
+        * this is where we switch to the IRQ stack. However, if we are
+        * already using the IRQ stack (because we interrupted a hardirq
+        * handler) we can't do that and just have to keep using the
+        * current stack (which is the irq stack already after all)
+        */
+       if (curctx != irqctx) {
+               int arg1, arg2, ebx;
+
+               /* build the stack frame on the IRQ stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+               irqctx->tinfo.task = curctx->tinfo.task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp      \n"
+                       "       call    __do_IRQ         \n"
+                       "       movl   %%ebx,%%esp      \n"
+                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+                       :  "0" (irq),   "1" (regs),  "2" (isp)
+                       : "memory", "cc", "ecx"
+               );
+       } else
+#endif
+               __do_IRQ(irq, regs);
+
+       irq_exit();
+
+       return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+               __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+       union irq_ctx *irqctx;
+
+       if (hardirq_ctx[cpu])
+               return;
+
+       irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       hardirq_ctx[cpu] = irqctx;
+
+       irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       softirq_ctx[cpu] = irqctx;
+
+       printk("CPU %u irqstacks, hard=%p soft=%p\n",
+               cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+       unsigned long flags;
+       struct thread_info *curctx;
+       union irq_ctx *irqctx;
+       u32 *isp;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       if (local_softirq_pending()) {
+               curctx = current_thread_info();
+               irqctx = softirq_ctx[smp_processor_id()];
+               irqctx->tinfo.task = curctx->task;
+               irqctx->tinfo.previous_esp = current_stack_pointer;
+
+               /* build the stack frame on the softirq stack */
+               isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+               asm volatile(
+                       "       xchgl   %%ebx,%%esp     \n"
+                       "       call    __do_softirq    \n"
+                       "       movl    %%ebx,%%esp     \n"
+                       : "=b"(isp)
+                       : "0"(isp)
+                       : "memory", "cc", "edx", "ecx", "eax"
+               );
+       }
+
+       local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int i = *(loff_t *) v, j;
+       struct irqaction * action;
+       unsigned long flags;
+
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for (j=0; j<NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "CPU%d       ",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               spin_lock_irqsave(&irq_desc[i].lock, flags);
+               action = irq_desc[i].action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+               seq_printf(p, " %14s", irq_desc[i].handler->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
+               seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+       } else if (i == NR_IRQS) {
+               seq_printf(p, "NMI: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ", nmi_count(j));
+               seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+               seq_printf(p, "LOC: ");
+               for (j = 0; j < NR_CPUS; j++)
+                       if (cpu_online(j))
+                               seq_printf(p, "%10u ",
+                                       irq_stat[j].apic_timer_irqs);
+               seq_putc(p, '\n');
+#endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+               seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+       }
+       return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
new file mode 100644 (file)
index 0000000..7b77638
--- /dev/null
@@ -0,0 +1,599 @@
+/*
+ *     Intel SMP support routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#if 0
+#include <mach_apic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+/*
+ *     Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *     The Linux implications for SMP are handled as follows:
+ *
+ *     Pentium III / [Xeon]
+ *             None of the E1AP-E3AP errata are visible to the user.
+ *
+ *     E1AP.   see PII A1AP
+ *     E2AP.   see PII A2AP
+ *     E3AP.   see PII A3AP
+ *
+ *     Pentium II / [Xeon]
+ *             None of the A1AP-A3AP errata are visible to the user.
+ *
+ *     A1AP.   see PPro 1AP
+ *     A2AP.   see PPro 2AP
+ *     A3AP.   see PPro 7AP
+ *
+ *     Pentium Pro
+ *             None of 1AP-9AP errata are visible to the normal user,
+ *     except occasional delivery of 'spurious interrupt' as trap #15.
+ *     This is very rare and a non-problem.
+ *
+ *     1AP.    Linux maps APIC as non-cacheable
+ *     2AP.    worked around in hardware
+ *     3AP.    fixed in C0 and above steppings microcode update.
+ *             Linux does not use excessive STARTUP_IPIs.
+ *     4AP.    worked around in hardware
+ *     5AP.    symmetric IO mode (normal Linux operation) not affected.
+ *             'noapic' mode has vector 0xf filled out properly.
+ *     6AP.    'noapic' mode might be affected - fixed in later steppings
+ *     7AP.    We do not assume writes to the LVT deassering IRQs
+ *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
+ *     9AP.    We do not use mixed mode
+ *
+ *     Pentium
+ *             There is a marginal case where REP MOVS on 100MHz SMP
+ *     machines with B stepping processors can fail. XXX should provide
+ *     an L1cache=Writethrough or L1cache=off option.
+ *
+ *             B stepping CPUs may hang. There are hardware work arounds
+ *     for this. We warn about it in case your board doesn't have the work
+ *     arounds. Basically thats so I can tell anyone with a B stepping
+ *     CPU and SMP problems "tough".
+ *
+ *     Specific items [From Pentium Processor Specification Update]
+ *
+ *     1AP.    Linux doesn't use remote read
+ *     2AP.    Linux doesn't trust APIC errors
+ *     3AP.    We work around this
+ *     4AP.    Linux never generated 3 interrupts of the same priority
+ *             to cause a lost local interrupt.
+ *     5AP.    Remote read is never used
+ *     6AP.    not affected - worked around in hardware
+ *     7AP.    not affected - worked around in hardware
+ *     8AP.    worked around in hardware - we get explicit CS errors if not
+ *     9AP.    only 'noapic' mode affected. Might generate spurious
+ *             interrupts, we log only the first one and count the
+ *             rest silently.
+ *     10AP.   not affected - worked around in hardware
+ *     11AP.   Linux reads the APIC between writes to avoid this, as per
+ *             the documentation. Make sure you preserve this as it affects
+ *             the C stepping chips too.
+ *     12AP.   not affected - worked around in hardware
+ *     13AP.   not affected - worked around in hardware
+ *     14AP.   we always deassert INIT during bootup
+ *     15AP.   not affected - worked around in hardware
+ *     16AP.   not affected - worked around in hardware
+ *     17AP.   not affected - worked around in hardware
+ *     18AP.   not affected - worked around in hardware
+ *     19AP.   not affected - worked around in BIOS
+ *
+ *     If this sounds worrying believe me these bugs are either ___RARE___,
+ *     or are signal timing bugs worked around in hardware and there's
+ *     about nothing of note with C stepping upwards.
+ */
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+       return SET_APIC_DEST_FIELD(mask);
+}
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
+       if (evtchn) {
+#if 0
+               shared_info_t *s = HYPERVISOR_shared_info;
+               while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
+                      synch_test_bit(evtchn, &s->evtchn_mask[0]))
+                       ;
+#endif
+               notify_via_evtchn(evtchn);
+       } else
+               printk("send_IPI to unbound port %d/%d",
+                      cpu, vector);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+}
+
+void fastcall send_IPI_self(int vector)
+{
+       __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+{
+       unsigned long flags;
+       unsigned int cpu;
+
+       local_irq_save(flags);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, mask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+
+       local_irq_restore(flags);
+}
+
+inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+
+       send_IPI_mask_bitmask(mask, vector);
+}
+
+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+
+/*
+ *     Smarter SMP flushing macros. 
+ *             c/o Linus Torvalds.
+ *
+ *     These mean you can really definitely utterly forget about
+ *     writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *     Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL      0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+               BUG();
+       cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+       load_cr3(swapper_pg_dir);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ *     Stop ipi delivery for the old mm. This is not synchronized with
+ *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *     for the wrong mm, and in the worst case we perform a superflous
+ *     tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *     was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ *     Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ *     Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *     flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *     Atomically set the bit [other cpus will start sending flush ipis],
+ *     and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
+{
+       unsigned long cpu;
+
+       cpu = get_cpu();
+
+       if (!cpu_isset(cpu, flush_cpumask))
+               goto out;
+               /* 
+                * This was a BUG() but until someone can quote me the
+                * line from the intel manual that guarantees an IPI to
+                * multiple CPUs is retried _only_ on the erroring CPUs
+                * its staying as a return
+                *
+                * BUG();
+                */
+                
+       if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+               if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+                       if (flush_va == FLUSH_ALL)
+                               local_flush_tlb();
+                       else
+                               __flush_tlb_one(flush_va);
+               } else
+                       leave_mm(cpu);
+       }
+       smp_mb__before_clear_bit();
+       cpu_clear(cpu, flush_cpumask);
+       smp_mb__after_clear_bit();
+out:
+       put_cpu_no_resched();
+
+       return IRQ_HANDLED;
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+                                               unsigned long va)
+{
+       cpumask_t tmp;
+       /*
+        * A couple of (to be removed) sanity checks:
+        *
+        * - we do not send IPIs to not-yet booted CPUs.
+        * - current CPU must not be in mask
+        * - mask must exist :)
+        */
+       BUG_ON(cpus_empty(cpumask));
+
+       cpus_and(tmp, cpumask, cpu_online_map);
+       BUG_ON(!cpus_equal(cpumask, tmp));
+       BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+       BUG_ON(!mm);
+
+       /*
+        * i'm not happy about this global shared spinlock in the
+        * MM hot path, but we'll see how contended it is.
+        * Temporarily this turns IRQs off, so that lockups are
+        * detected by the NMI watchdog.
+        */
+       spin_lock(&tlbstate_lock);
+       
+       flush_mm = mm;
+       flush_va = va;
+#if NR_CPUS <= BITS_PER_LONG
+       atomic_set_mask(cpumask, &flush_cpumask);
+#else
+       {
+               int k;
+               unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+               unsigned long *cpu_mask = (unsigned long *)&cpumask;
+               for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+                       atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+       }
+#endif
+       /*
+        * We have to send the IPI only to
+        * CPUs affected.
+        */
+       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+       while (!cpus_empty(flush_cpumask))
+               /* nothing. lockup detection does not belong here */
+               mb();
+
+       flush_mm = NULL;
+       flush_va = 0;
+       spin_unlock(&tlbstate_lock);
+}
+       
+void flush_tlb_current_task(void)
+{
+       struct mm_struct *mm = current->mm;
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       local_flush_tlb();
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+       preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       if (current->active_mm == mm) {
+               if (current->mm)
+                       local_flush_tlb();
+               else
+                       leave_mm(smp_processor_id());
+       }
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+       preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       cpumask_t cpu_mask;
+
+       preempt_disable();
+       cpu_mask = mm->cpu_vm_mask;
+       cpu_clear(smp_processor_id(), cpu_mask);
+
+       if (current->active_mm == mm) {
+               if(current->mm)
+                       __flush_tlb_one(va);
+               else
+                       leave_mm(smp_processor_id());
+       }
+
+       if (!cpus_empty(cpu_mask))
+               flush_tlb_others(cpu_mask, mm, va);
+
+       preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+       unsigned long cpu = smp_processor_id();
+
+       __flush_tlb_all();
+       if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+               leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+       on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+       send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+       void (*func) (void *info);
+       void *info;
+       atomic_t started;
+       atomic_t finished;
+       int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+                       int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+{
+       struct call_data_struct data;
+       int cpus = num_online_cpus()-1;
+
+       if (!cpus)
+               return 0;
+
+       /* Can deadlock when called with interrupts disabled */
+       WARN_ON(irqs_disabled());
+
+       data.func = func;
+       data.info = info;
+       atomic_set(&data.started, 0);
+       data.wait = wait;
+       if (wait)
+               atomic_set(&data.finished, 0);
+
+       spin_lock(&call_lock);
+       call_data = &data;
+       mb();
+       
+       /* Send a message to all other CPUs and wait for them to respond */
+       send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+       /* Wait for response */
+       while (atomic_read(&data.started) != cpus)
+               barrier();
+
+       if (wait)
+               while (atomic_read(&data.finished) != cpus)
+                       barrier();
+       spin_unlock(&call_lock);
+
+       return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+       /*
+        * Remove this CPU:
+        */
+       cpu_clear(smp_processor_id(), cpu_online_map);
+       local_irq_disable();
+#if 1
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
+       disable_local_APIC();
+#endif
+       if (cpu_data[smp_processor_id()].hlt_works_ok)
+               for(;;) __asm__("hlt");
+       for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+       smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+       local_irq_disable();
+#if 1
+       xxprint("smp_send_stop disable_local_APIC\n");
+#else
+       disable_local_APIC();
+#endif
+       local_irq_enable();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+#include <linux/kallsyms.h>
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+                                       struct pt_regs *regs)
+{
+       void (*func) (void *info) = call_data->func;
+       void *info = call_data->info;
+       int wait = call_data->wait;
+
+       /*
+        * Notify initiating CPU that I've grabbed the data and am
+        * about to execute the function
+        */
+       mb();
+       atomic_inc(&call_data->started);
+       /*
+        * At this point the info structure may be out of scope unless wait==1
+        */
+       irq_enter();
+       (*func)(info);
+       irq_exit();
+
+       if (wait) {
+               mb();
+               atomic_inc(&call_data->finished);
+       }
+
+       return IRQ_HANDLED;
+}
+
diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
new file mode 100644 (file)
index 0000000..9f39dde
--- /dev/null
@@ -0,0 +1,1365 @@
+/*
+ *     x86 SMP booting functions
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Much of the core SMP work is based on previous work by Thomas Radke, to
+ *     whom a great many thanks are extended.
+ *
+ *     Thanks to Intel for making available several different Pentium,
+ *     Pentium Pro and Pentium-II/Xeon MP machines.
+ *     Original development of Linux SMP code supported by Caldera.
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ *
+ *     Fixes
+ *             Felix Koop      :       NR_CPUS used properly
+ *             Jose Renau      :       Handle single CPU case.
+ *             Alan Cox        :       By repeated request 8) - Total BogoMIPS report.
+ *             Greg Wright     :       Fix for kernel stacks panic.
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *     Matthias Sattler        :       Changes for 2.1 kernel map.
+ *     Michel Lespinasse       :       Changes for 2.1 kernel map.
+ *     Michael Chastain        :       Change trampoline.S to gnu as.
+ *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
+ *             Ingo Molnar     :       Added APIC timers, based on code
+ *                                     from Jose Renau
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
+ *             Martin J. Bligh :       Added support for multi-quad systems
+ *             Dave Jones      :       Report invalid combinations of Athlon CPUs.
+*              Rusty Russell   :       Hacked into shape for new "hotplug" boot process. */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+
+#if 1
+#define Dprintk(args...)
+#else
+#include <mach_apic.h>
+#endif
+#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
+
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
+
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+u8 x86_cpu_to_apicid[NR_CPUS] =
+                       { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+#if 0
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+static int trampoline_exec;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+       return virt_to_phys(trampoline_base);
+}
+#endif
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+#if 1
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+#else
+       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       if (__pa(trampoline_base) >= 0x9F000)
+               BUG();
+       /*
+        * Make the SMP trampoline executable:
+        */
+       trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+#endif
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+       struct cpuinfo_x86 *c = cpu_data + id;
+
+       *c = boot_cpu_data;
+       if (id!=0)
+               identify_cpu(c);
+       /*
+        * Mask B, Pentium, but not Pentium MMX
+        */
+       if (c->x86_vendor == X86_VENDOR_INTEL &&
+           c->x86 == 5 &&
+           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_model <= 3)
+               /*
+                * Remember we have B step Pentia with bugs
+                */
+               smp_b_stepping = 1;
+
+       /*
+        * Certain Athlons might work (for various values of 'work') in SMP
+        * but they are not certified as MP capable.
+        */
+       if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+               /* Athlon 660/661 is valid. */  
+               if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+                       goto valid_k7;
+
+               /* Duron 670 is valid */
+               if ((c->x86_model==7) && (c->x86_mask==0))
+                       goto valid_k7;
+
+               /*
+                * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+                * It's worth noting that the A5 stepping (662) of some Athlon XP's
+                * have the MP bit set.
+                * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+                */
+               if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+                   ((c->x86_model==7) && (c->x86_mask>=1)) ||
+                    (c->x86_model> 7))
+                       if (cpu_has_mp)
+                               goto valid_k7;
+
+               /* If we get here, it's not a certified SMP capable AMD system. */
+               tainted |= TAINT_UNSAFE_SMP;
+       }
+
+valid_k7:
+       ;
+}
+
+#if 0
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+static void __init synchronize_tsc_bp (void)
+{
+       int i;
+       unsigned long long t0;
+       unsigned long long sum, avg;
+       long long delta;
+       unsigned long one_usec;
+       int buggy = 0;
+
+       printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+       /* convert from kcyc/sec to cyc/usec */
+       one_usec = cpu_khz / 1000;
+
+       atomic_set(&tsc_start_flag, 1);
+       wmb();
+
+       /*
+        * We loop a few times to get a primed instruction cache,
+        * then the last pass is more or less synchronized and
+        * the BP and APs set their cycle counters to zero all at
+        * once. This reduces the chance of having random offsets
+        * between the processors, and guarantees that the maximum
+        * delay between the cycle counters is never bigger than
+        * the latency of information-passing (cachelines) between
+        * two CPUs.
+        */
+       for (i = 0; i < NR_LOOPS; i++) {
+               /*
+                * all APs synchronize but they loop on '== num_cpus'
+                */
+               while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_stop, 0);
+               wmb();
+               /*
+                * this lets the APs save their current TSC:
+                */
+               atomic_inc(&tsc_count_start);
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               /*
+                * We clear the TSC in the last loop:
+                */
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               /*
+                * Wait for all APs to leave the synchronization point:
+                */
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+                       mb();
+               atomic_set(&tsc_count_start, 0);
+               wmb();
+               atomic_inc(&tsc_count_stop);
+       }
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (cpu_isset(i, cpu_callout_map)) {
+                       t0 = tsc_values[i];
+                       sum += t0;
+               }
+       }
+       avg = sum;
+       do_div(avg, num_booting_cpus());
+
+       sum = 0;
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_isset(i, cpu_callout_map))
+                       continue;
+               delta = tsc_values[i] - avg;
+               if (delta < 0)
+                       delta = -delta;
+               /*
+                * We report bigger than 2 microseconds clock differences.
+                */
+               if (delta > 2*one_usec) {
+                       long realdelta;
+                       if (!buggy) {
+                               buggy = 1;
+                               printk("\n");
+                       }
+                       realdelta = delta;
+                       do_div(realdelta, one_usec);
+                       if (tsc_values[i] < avg)
+                               realdelta = -realdelta;
+
+                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+               }
+
+               sum += delta;
+       }
+       if (!buggy)
+               printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+       int i;
+
+       /*
+        * Not every cpu is online at the time
+        * this gets called, so we first wait for the BP to
+        * finish SMP initialization:
+        */
+       while (!atomic_read(&tsc_start_flag)) mb();
+
+       for (i = 0; i < NR_LOOPS; i++) {
+               atomic_inc(&tsc_count_start);
+               while (atomic_read(&tsc_count_start) != num_booting_cpus())
+                       mb();
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               atomic_inc(&tsc_count_stop);
+               while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+       }
+}
+#undef NR_LOOPS
+#endif
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+       int cpuid, phys_id;
+       unsigned long timeout;
+
+#if 0
+       /*
+        * If waken up by an INIT in an 82489DX configuration
+        * we may get here before an INIT-deassert IPI reaches
+        * our local APIC.  We have to wait for the IPI or we'll
+        * lock up on an APIC access.
+        */
+       wait_for_init_deassert(&init_deasserted);
+#endif
+
+       /*
+        * (This works even if the APIC is not enabled.)
+        */
+       phys_id = smp_processor_id();
+       cpuid = smp_processor_id();
+       if (cpu_isset(cpuid, cpu_callin_map)) {
+               printk("huh, phys CPU#%d, CPU#%d already present??\n",
+                                       phys_id, cpuid);
+               BUG();
+       }
+       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+       /*
+        * STARTUP IPIs are fragile beasts as they might sometimes
+        * trigger some glue motherboard logic. Complete APIC bus
+        * silence for 1 second, this overestimates the time the
+        * boot CPU is spending to send the up to 2 STARTUP IPIs
+        * by a factor of two. This should be enough.
+        */
+
+       /*
+        * Waiting 2s total for startup (udelay is not yet working)
+        */
+       timeout = jiffies + 2*HZ;
+       while (time_before(jiffies, timeout)) {
+               /*
+                * Has the boot CPU finished it's STARTUP sequence?
+                */
+               if (cpu_isset(cpuid, cpu_callout_map))
+                       break;
+               rep_nop();
+       }
+
+       if (!time_before(jiffies, timeout)) {
+               printk("BUG: CPU%d started up but did not get a callout!\n",
+                       cpuid);
+               BUG();
+       }
+
+#if 0
+       /*
+        * the boot CPU has finished the init stage and is spinning
+        * on callin_map until we finish. We are free to set up this
+        * CPU, first the APIC. (this is probably redundant on most
+        * boards)
+        */
+
+       Dprintk("CALLIN, before setup_local_APIC().\n");
+       smp_callin_clear_local_apic();
+       setup_local_APIC();
+#endif
+       map_cpu_to_logical_apicid();
+
+       /*
+        * Get our bogomips.
+        */
+       calibrate_delay();
+       Dprintk("Stack at about %p\n",&cpuid);
+
+       /*
+        * Save our processor parameters
+        */
+       smp_store_cpu_info(cpuid);
+
+#if 0
+       disable_APIC_timer();
+#endif
+
+       /*
+        * Allow the master to continue.
+        */
+       cpu_set(cpuid, cpu_callin_map);
+
+#if 0
+       /*
+        *      Synchronize the TSC with the BP
+        */
+       if (cpu_has_tsc && cpu_khz)
+               synchronize_tsc_ap();
+#endif
+}
+
+int cpucount;
+
+
+static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
+                                        struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+static struct irqaction local_irq_debug = {
+       local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
+       NULL, NULL
+};
+
+void local_setup_debug(void)
+{
+       (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
+}
+
+
+extern void local_setup_timer(void);
+
+/*
+ * Activate a secondary processor.
+ */
+static int __init start_secondary(void *unused)
+{
+       /*
+        * Dont put anything before smp_callin(), SMP
+        * booting is too fragile that we want to limit the
+        * things done here to the most necessary things.
+        */
+       cpu_init();
+       smp_callin();
+       while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+               rep_nop();
+       local_setup_timer();
+       local_setup_debug();    /* XXX */
+       smp_intr_init();
+       local_irq_enable();
+       /*
+        * low-memory mappings have been cleared, flush them from
+        * the local TLBs too.
+        */
+       local_flush_tlb();
+       cpu_set(smp_processor_id(), cpu_online_map);
+
+       /* We can take interrupts now: we're officially "up". */
+       local_irq_enable();
+
+       wmb();
+       if (0) {
+               char *msg2 = "delay2\n";
+               int timeout;
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       udelay(1000);
+                       if (timeout == 2000) {
+                               (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
+                               timeout = 0;
+                       }
+               }
+       }
+       cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+       /*
+        * We don't actually need to load the full TSS,
+        * basically just the stack pointer and the eip.
+        */
+
+       asm volatile(
+               "movl %0,%%esp\n\t"
+               "jmp *%1"
+               :
+               :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+       void * esp;
+       unsigned short ss;
+} stack_start;
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+       printk("Mapping cpu %d to node %d\n", cpu, node);
+       cpu_set(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
+{
+       int node;
+
+       printk("Unmapping cpu %d from all nodes\n", cpu);
+       for (node = 0; node < MAX_NUMNODES; node ++)
+               cpu_clear(cpu, node_2_cpu_mask[node]);
+       cpu_2_node[cpu] = 0;
+}
+#else /* !CONFIG_NUMA */
+
+#define map_cpu_to_node(cpu, node)     ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+       int cpu = smp_processor_id();
+       int apicid = smp_processor_id();
+
+       cpu_2_logical_apicid[cpu] = apicid;
+       map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+       cpu_2_logical_apicid[cpu] = BAD_APICID;
+       unmap_cpu_to_node(cpu);
+}
+
+#if APIC_DEBUG
+static inline void __inquire_remote_apic(int apicid)
+{
+       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+       char *names[] = { "ID", "VERSION", "SPIV" };
+       int timeout, status;
+
+       printk("Inquiring remote APIC #%d...\n", apicid);
+
+       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+               printk("... APIC #%d %s: ", apicid, names[i]);
+
+               /*
+                * Wait for idle.
+                */
+               apic_wait_icr_idle();
+
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+               timeout = 0;
+               do {
+                       udelay(100);
+                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+               switch (status) {
+               case APIC_ICR_RR_VALID:
+                       status = apic_read(APIC_RRR);
+                       printk("%08x\n", status);
+                       break;
+               default:
+                       printk("failed\n");
+               }
+       }
+}
+#endif
+
+#if 0
+#ifdef WAKE_SECONDARY_VIA_NMI
+/* 
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+       unsigned long send_status = 0, accept_status = 0;
+       int timeout, maxlvt;
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+       /* Boot on the stack */
+       /* Kick the second */
+       apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       /*
+        * Give the other CPU some time to accept the IPI.
+        */
+       udelay(200);
+       /*
+        * Due to the Pentium erratum 3AP.
+        */
+       maxlvt = get_maxlvt();
+       if (maxlvt > 3) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+       }
+       accept_status = (apic_read(APIC_ESR) & 0xEF);
+       Dprintk("NMI sent.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
+
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+       unsigned long send_status = 0, accept_status = 0;
+       int maxlvt, timeout, num_starts, j;
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       Dprintk("Asserting INIT.\n");
+
+       /*
+        * Turn INIT on target chip
+        */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /*
+        * Send IPI
+        */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                               | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       mdelay(10);
+
+       Dprintk("Deasserting INIT.\n");
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+       /* Send IPI */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       atomic_set(&init_deasserted, 1);
+
+       /*
+        * Should we send STARTUP IPIs ?
+        *
+        * Determine this based on the APIC version.
+        * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+        */
+       if (APIC_INTEGRATED(apic_version[phys_apicid]))
+               num_starts = 2;
+       else
+               num_starts = 0;
+
+       /*
+        * Run STARTUP IPI loop.
+        */
+       Dprintk("#startup loops: %d.\n", num_starts);
+
+       maxlvt = get_maxlvt();
+
+       for (j = 1; j <= num_starts; j++) {
+               Dprintk("Sending STARTUP #%d.\n",j);
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+               Dprintk("After apic_write.\n");
+
+               /*
+                * STARTUP IPI
+                */
+
+               /* Target chip */
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+               /* Boot on the stack */
+               /* Kick the second */
+               apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                                       | (start_eip >> 12));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(300);
+
+               Dprintk("Startup point 1.\n");
+
+               Dprintk("Waiting for send to finish...\n");
+               timeout = 0;
+               do {
+                       Dprintk("+");
+                       udelay(100);
+                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+               } while (send_status && (timeout++ < 1000));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(200);
+               /*
+                * Due to the Pentium erratum 3AP.
+                */
+               if (maxlvt > 3) {
+                       apic_read_around(APIC_SPIV);
+                       apic_write(APIC_ESR, 0);
+               }
+               accept_status = (apic_read(APIC_ESR) & 0xEF);
+               if (send_status || accept_status)
+                       break;
+       }
+       Dprintk("After Startup.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_INIT */
+#endif
+
+extern cpumask_t cpu_initialized;
+
+static int __init do_boot_cpu(int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ */
+{
+       struct task_struct *idle;
+       unsigned long boot_error;
+       int timeout, cpu;
+       unsigned long start_eip;
+#if 0
+       unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+       full_execution_context_t ctxt;
+       extern void startup_32_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern int smp_trap_init(trap_info_t *);
+       int i;
+
+       cpu = ++cpucount;
+       /*
+        * We can't use kernel_thread since we must avoid to
+        * reschedule the child.
+        */
+       idle = fork_idle(cpu);
+       if (IS_ERR(idle))
+               panic("failed fork for CPU %d", cpu);
+       idle->thread.eip = (unsigned long) start_secondary;
+       /* start_eip had better be page-aligned! */
+       start_eip = (unsigned long)startup_32_smp;
+
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       stack_start.esp = (void *) idle->thread.esp;
+
+       irq_ctx_init(cpu);
+
+       /*
+        * This grunge runs the startup process for
+        * the targeted processor.
+        */
+
+       atomic_set(&init_deasserted, 0);
+
+#if 1
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+              (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+               memset((char *)cpu_gdt_descr[cpu].address +
+                      FIRST_RESERVED_GDT_ENTRY * 8, 0,
+                      NR_RESERVED_GDT_ENTRIES * 8);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.cpu_ctxt.ds = __USER_DS;
+       ctxt.cpu_ctxt.es = __USER_DS;
+       ctxt.cpu_ctxt.fs = 0;
+       ctxt.cpu_ctxt.gs = 0;
+       ctxt.cpu_ctxt.ss = __KERNEL_DS;
+       ctxt.cpu_ctxt.cs = __KERNEL_CS;
+       ctxt.cpu_ctxt.eip = start_eip;
+       ctxt.cpu_ctxt.esp = idle->thread.esp;
+       ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+       /* FPU is set up to default initial state. */
+       memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+               flush_page_update_queue();
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss  = __KERNEL_DS;
+       ctxt.kernel_esp = idle->thread.esp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_cs     = __KERNEL_CS;
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_cs  = __KERNEL_CS;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+       ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+#else
+       Dprintk("Setting warm reset code and vector.\n");
+
+       store_NMI_vector(&nmi_high, &nmi_low);
+
+       smpboot_setup_warm_reset_vector(start_eip);
+
+       /*
+        * Starting actual IPI sequence...
+        */
+       boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+                       if (*((volatile unsigned char *)trampoline_base)
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+                       else
+                               /* trampoline code not run */
+                               printk("Not responding.\n");
+                       inquire_remote_apic(apicid);
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+       if (boot_error) {
+               /* Try to put things back the way they were before ... */
+               unmap_cpu_to_logical_apicid(cpu);
+               cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+               cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+               cpucount--;
+       }
+
+       /* mark "stuck" area as not stuck */
+       *((volatile unsigned long *)trampoline_base) = 0;
+#endif
+
+       return boot_error;
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+       unsigned long cachesize;       /* kB   */
+       unsigned long bandwidth = 350; /* MB/s */
+       /*
+        * Rough estimation for SMP scheduling, this is the number of
+        * cycles it takes for a fully memory-limited process to flush
+        * the SMP-local cache.
+        *
+        * (For a P5 this pretty much means we will choose another idle
+        *  CPU almost always at wakeup time (this is due to the small
+        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
+        *  the cache size)
+        */
+
+       if (!cpu_khz) {
+               /*
+                * this basically disables processor-affinity
+                * scheduling on SMP without a TSC.
+                */
+               cacheflush_time = 0;
+               return;
+       } else {
+               cachesize = boot_cpu_data.x86_cache_size;
+               if (cachesize == -1) {
+                       cachesize = 16; /* Pentiums, 2x8kB cache */
+                       bandwidth = 100;
+               }
+
+               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+       }
+
+       cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+               (long)cacheflush_time/(cpu_khz/1000),
+               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+       printk("task migration cache decay timeout: %ld msecs.\n",
+               cache_decay_ticks);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+#if 0
+static int boot_cpu_logical_apicid;
+#endif
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+       int cpu, kicked;
+       unsigned long bogosum = 0;
+#if 0
+       int apicid, bit;
+#endif
+
+       /*
+        * Setup boot CPU information
+        */
+       smp_store_cpu_info(0); /* Final full version of the data */
+       printk("CPU%d: ", 0);
+       print_cpu_info(&cpu_data[0]);
+
+#if 0
+       boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+       boot_cpu_logical_apicid = logical_smp_processor_id();
+       x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+       // boot_cpu_physical_apicid = 0;
+       // boot_cpu_logical_apicid = 0;
+       x86_cpu_to_apicid[0] = 0;
+#endif
+
+       current_thread_info()->cpu = 0;
+       smp_tune_scheduling();
+       cpus_clear(cpu_sibling_map[0]);
+       cpu_set(0, cpu_sibling_map[0]);
+
+       /*
+        * If we couldn't find an SMP configuration at boot time,
+        * get out of here now!
+        */
+       if (!smp_found_config /* && !acpi_lapic) */) {
+               printk(KERN_NOTICE "SMP motherboard not detected.\n");
+               smpboot_clear_io_apic_irqs();
+#if 0
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               if (APIC_init_uniprocessor())
+                       printk(KERN_NOTICE "Local APIC not detected."
+                                          " Using dummy APIC emulation.\n");
+#endif
+               map_cpu_to_logical_apicid();
+               return;
+       }
+
+#if 0
+       /*
+        * Should not be necessary because the MP table should list the boot
+        * CPU too, but we do it for the sake of robustness anyway.
+        * Makes no sense to do this check in clustered apic mode, so skip it
+        */
+       if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+                               boot_cpu_physical_apicid);
+               physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+       }
+
+       /*
+        * If we couldn't find a local APIC, then get out of here now!
+        */
+       if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+                       boot_cpu_physical_apicid);
+               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+               smpboot_clear_io_apic_irqs();
+               phys_cpu_present_map = physid_mask_of_physid(0);
+               return;
+       }
+
+       verify_local_APIC();
+#endif
+
+       /*
+        * If SMP should be disabled, then really disable it!
+        */
+       if (!max_cpus) {
+               HYPERVISOR_shared_info->n_vcpu = 1;
+               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+               smpboot_clear_io_apic_irqs();
+#if 0
+               phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+               return;
+       }
+
+       smp_intr_init();
+
+#if 0
+       connect_bsp_APIC();
+       setup_local_APIC();
+#endif
+       map_cpu_to_logical_apicid();
+#if 0
+
+
+       setup_portio_remap();
+
+       /*
+        * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+        *
+        * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+        * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+        * clustered apic ID.
+        */
+       Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+       Dprintk("CPU present map: %lx\n",
+               (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
+
+       kicked = 1;
+       for (cpu = 1; kicked < NR_CPUS &&
+                    cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
+               if (max_cpus <= cpucount+1)
+                       continue;
+
+               if (do_boot_cpu(cpu))
+                       printk("CPU #%d not responding - cannot use it.\n",
+                                                               cpu);
+               else
+                       ++kicked;
+       }
+
+#if 0
+       /*
+        * Cleanup possible dangling ends...
+        */
+       smpboot_restore_warm_reset_vector();
+#endif
+
+       /*
+        * Allow the user to impress friends.
+        */
+       Dprintk("Before bogomips.\n");
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               if (cpu_isset(cpu, cpu_callout_map))
+                       bogosum += cpu_data[cpu].loops_per_jiffy;
+       printk(KERN_INFO
+               "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+               cpucount+1,
+               bogosum/(500000/HZ),
+               (bogosum/(5000/HZ))%100);
+       
+       Dprintk("Before bogocount - setting activated=1.\n");
+
+       if (smp_b_stepping)
+               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+       /*
+        * Don't taint if we are running SMP kernel on a single non-MP
+        * approved Athlon
+        */
+       if (tainted & TAINT_UNSAFE_SMP) {
+               if (cpucount)
+                       printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+               else
+                       tainted &= ~TAINT_UNSAFE_SMP;
+       }
+
+       Dprintk("Boot done.\n");
+
+       /*
+        * construct cpu_sibling_map[], so that we can tell sibling CPUs
+        * efficiently.
+        */
+       for (cpu = 0; cpu < NR_CPUS; cpu++)
+               cpus_clear(cpu_sibling_map[cpu]);
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               int siblings = 0;
+               int i;
+               if (!cpu_isset(cpu, cpu_callout_map))
+                       continue;
+
+               if (smp_num_siblings > 1) {
+                       for (i = 0; i < NR_CPUS; i++) {
+                               if (!cpu_isset(i, cpu_callout_map))
+                                       continue;
+                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                                       siblings++;
+                                       cpu_set(i, cpu_sibling_map[cpu]);
+                               }
+                       }
+               } else {
+                       siblings++;
+                       cpu_set(cpu, cpu_sibling_map[cpu]);
+               }
+
+               if (siblings != smp_num_siblings)
+                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+       }
+
+#if 0
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               check_nmi_watchdog();
+
+       smpboot_setup_io_apic();
+
+       setup_boot_APIC_clock();
+
+       /*
+        * Synchronize the TSC with the AP
+        */
+       if (cpu_has_tsc && cpucount && cpu_khz)
+               synchronize_tsc_bp();
+#endif
+}
+
+/* These are wrappers to interface to the new boot process.  Someone
+   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+       smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+       cpu_set(smp_processor_id(), cpu_online_map);
+       cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+       /* This only works at boot for x86.  See "rewrite" above. */
+       if (cpu_isset(cpu, smp_commenced_mask)) {
+               local_irq_enable();
+               return -ENOSYS;
+       }
+
+       /* In case one didn't come up */
+       if (!cpu_isset(cpu, cpu_callin_map)) {
+               local_irq_enable();
+               return -EIO;
+       }
+
+       local_irq_enable();
+       /* Unleash the CPU! */
+       cpu_set(cpu, smp_commenced_mask);
+       while (!cpu_isset(cpu, cpu_online_map))
+               mb();
+       return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#if 1
+#else
+#ifdef CONFIG_X86_IO_APIC
+       setup_ioapic_dest();
+#endif
+       zap_low_mappings();
+       /*
+        * Disable executability of the SMP trampoline:
+        */
+       set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction reschedule_irq = {
+       smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
+       NULL, NULL
+};
+
+extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction invalidate_irq = {
+       smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
+       NULL, NULL
+};
+
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction call_function_irq = {
+       smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
+       "call_function", NULL, NULL
+};
+
+void __init smp_intr_init(void)
+{
+
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
+           &reschedule_irq);
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
+           &invalidate_irq);
+       (void)setup_irq(
+           bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
+           &call_function_irq);
+}
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
new file mode 100644 (file)
index 0000000..51addc6
--- /dev/null
@@ -0,0 +1,19 @@
+/* Copyright (C) 2004, Christian Limpach */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/threads.h>
+
+unsigned int __initdata maxcpus = NR_CPUS;
+
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+       printk("setup_profiling_timer\n");
+
+       return 0;
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
new file mode 100644 (file)
index 0000000..80b7ca0
--- /dev/null
@@ -0,0 +1,3 @@
+
+obj-y  := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
+
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
new file mode 100644 (file)
index 0000000..a9a0067
--- /dev/null
@@ -0,0 +1,87 @@
+/******************************************************************************
+ * blktap.c
+ * 
+ * XenLinux virtual block-device tap.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Based on the original split block driver:
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * 
+ * Note that unlike the split block driver code, this driver has been developed
+ * strictly for Linux 2.6
+ */
+
+#include "blktap.h"
+
+int __init xlblktap_init(void)
+{
+    ctrl_msg_t               cmsg;
+    blkif_fe_driver_status_t fe_st;
+    blkif_be_driver_status_t be_st;
+
+    printk(KERN_INFO "Initialising Xen block tap device\n");
+
+    DPRINTK("   tap - Backend connection init:\n");
+
+
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_FE;
+    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_fe_driver_status_t);
+    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+    DPRINTK("   tap - Frontend connection init:\n");
+    
+    active_reqs_init();
+    blkif_interface_init();
+    blkdev_schedule_init();
+    
+    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_BE;
+    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_be_driver_status_t);
+    be_st.status   = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &be_st, sizeof(be_st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+    DPRINTK("   tap - Userland channel init:\n");
+
+    blktap_init();
+
+    DPRINTK("Blkif tap device initialized.\n");
+
+    return 0;
+}
+
+#if 0 /* tap doesn't handle suspend/resume */
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+    ctrl_msg_t               cmsg;
+    blkif_fe_driver_status_t st;    
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_BLKIF_FE;
+    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
+    cmsg.length    = sizeof(blkif_fe_driver_status_t);
+    st.status      = BLKIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &st, sizeof(st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+#endif
+
+__initcall(xlblktap_init);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
new file mode 100644 (file)
index 0000000..eb084e8
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * blktap.h
+ * 
+ * Interfaces for the Xen block tap driver.
+ * 
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ * 
+ */
+
+#ifndef __BLKTAP_H__
+#define __BLKTAP_H__
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
+
+/* Used to signal to the backend that this is a tap domain. */
+#define BLKTAP_COOKIE 0xbeadfeed
+
+/* -------[ debug / pretty printing ]--------------------------------- */
+
+#if 0
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
+
+/* -------[ state descriptors ]--------------------------------------- */
+
+#define BLKIF_STATE_CLOSED       0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED    2
+
+/* -------[ connection tracking ]------------------------------------- */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+extern spinlock_t blkif_io_lock;
+
+typedef struct blkif_st {
+    /* Unique identifier for this interface. */
+    domid_t             domid;
+    unsigned int        handle;
+    /* Physical parameters of the comms window. */
+    unsigned long       shmem_frame;
+    unsigned int        evtchn;
+    int                 irq;
+    /* Comms information. */
+    blkif_back_ring_t   blk_ring;
+    
+    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+    /*
+     * DISCONNECT response is deferred until pending requests are ack'ed.
+     * We therefore need to store the id from the original request.
+     */    
+    u8                  disconnect_rspid;
+    struct blkif_st    *hash_next;
+    struct list_head    blkdev_list;
+    spinlock_t          blk_ring_lock;
+    atomic_t            refcnt;
+    struct work_struct work;
+} blkif_t;
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_disconnect_complete(blkif_t *blkif);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b)                             \
+    do {                                          \
+        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+            blkif_disconnect_complete(_b);        \
+    } while (0)
+
+
+/* -------[ active request tracking ]--------------------------------- */
+
+typedef struct {
+    blkif_t       *blkif;
+    unsigned long  id;
+    int            nr_pages;
+    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int            next_free;
+} active_req_t;
+
+typedef unsigned int ACTIVE_RING_IDX;
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
+
+extern inline unsigned int ID_TO_IDX(unsigned long id) 
+{ 
+    return ( id & 0x0000ffff );
+}
+
+extern inline domid_t ID_TO_DOM(unsigned long id) 
+{ 
+    return (id >> 16); 
+}
+
+void active_reqs_init(void);
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs:                                                      */
+#define BLKTAP_IOCTL_KICK_FE         1
+#define BLKTAP_IOCTL_KICK_BE         2
+#define BLKTAP_IOCTL_SETMODE         3
+#define BLKTAP_IOCTL_PRINT_IDXS      100  
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
+#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
+#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
+#define BLKTAP_MODE_COPY_FE          0x00000004
+#define BLKTAP_MODE_COPY_BE          0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+    return (
+        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+        );
+}
+
+
+
+/* -------[ Mappings to User VMA ]------------------------------------ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+extern struct vm_area_struct *blktap_vma;
+
+/* The following are from blkback.c and should probably be put in a
+ * header and included from there.
+ * The mmap area described here is where attached data pages eill be mapped.
+ */
+extern unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES             \
+    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg)                        \
+    (mmap_vstart +                                   \
+     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+     ((_seg) * PAGE_SIZE))
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+
+#define RING_PAGES 3 /* Ctrl, Front, and Back */ 
+extern unsigned long rings_vstart;
+
+
+/* -------[ Here be globals ]----------------------------------------- */
+extern unsigned long blktap_mode;
+
+/* Connection to a single backend domain. */
+extern blkif_front_ring_t blktap_be_ring;
+extern unsigned int blktap_be_evtchn;
+extern unsigned int blktap_be_state;
+
+/* User ring status. */
+extern unsigned long blktap_ring_ok;
+
+/* -------[ ...and function prototypes. ]----------------------------- */
+
+/* init function for character device interface.                       */
+int blktap_init(void);
+
+/* init function for the blkif cache. */
+void __init blkif_interface_init(void);
+void __init blkdev_schedule_init(void);
+void blkif_deschedule(blkif_t *blkif);
+
+/* interfaces to the char driver, passing messages to and from apps.   */
+void blktap_kick_user(void);
+
+/* user ring access functions: */
+int blktap_write_fe_ring(blkif_request_t *req);
+int blktap_write_be_ring(blkif_response_t *rsp);
+int blktap_write_ctrl_ring(ctrl_msg_t *msg);
+
+/* fe/be ring access functions: */
+int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
+int write_req_to_be_ring(blkif_request_t *req);
+
+/* event notification functions */
+void kick_fe_domain(blkif_t *blkif);
+void kick_be_domain(void);
+
+/* Interrupt handlers. */
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+                                  struct pt_regs *ptregs);
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Control message receiver. */
+extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+/* debug */
+void print_vm_ring_idxs(void);
+        
+#define __BLKINT_H__
+#endif
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
new file mode 100644 (file)
index 0000000..9ce74c7
--- /dev/null
@@ -0,0 +1,517 @@
+/******************************************************************************
+ * blktap_controlmsg.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Control interfaces to the frontend and backend drivers.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+#include "blktap.h"
+
+static char *blkif_state_name[] = {
+    [BLKIF_STATE_CLOSED]       = "closed",
+    [BLKIF_STATE_DISCONNECTED] = "disconnected",
+    [BLKIF_STATE_CONNECTED]    = "connected",
+};
+
+static char * blkif_status_name[] = {
+    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
+    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+
+static unsigned blktap_be_irq;
+unsigned int    blktap_be_state = BLKIF_STATE_CLOSED;
+unsigned int    blktap_be_evtchn;
+
+/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static kmem_cache_t *blkif_cachep;
+static blkif_t      *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif != NULL) && 
+            ((blkif->domid != domid) || (blkif->handle != handle)) )
+        blkif = blkif->hash_next;
+    return blkif;
+}
+
+static void __blkif_disconnect_complete(void *arg)
+{
+    blkif_t              *blkif = (blkif_t *)arg;
+    ctrl_msg_t            cmsg;
+    blkif_be_disconnect_t disc;
+
+    /*
+     * These can't be done in blkif_disconnect() because at that point there
+     * may be outstanding requests at the disc whose asynchronous responses
+     * must still be notified to the remote driver.
+     */
+    unbind_evtchn_from_irq(blkif->evtchn);
+    vfree(blkif->blk_ring.sring);
+
+    /* Construct the deferred response message. */
+    cmsg.type         = CMSG_BLKIF_BE;
+    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
+    cmsg.id           = blkif->disconnect_rspid;
+    cmsg.length       = sizeof(blkif_be_disconnect_t);
+    disc.domid        = blkif->domid;
+    disc.blkif_handle = blkif->handle;
+    disc.status       = BLKIF_BE_STATUS_OKAY;
+    memcpy(cmsg.msg, &disc, sizeof(disc));
+
+    /*
+     * Make sure message is constructed /before/ status change, because
+     * after the status change the 'blkif' structure could be deallocated at
+     * any time. Also make sure we send the response /after/ status change,
+     * as otherwise a subsequent CONNECT request could spuriously fail if
+     * another CPU doesn't see the status change yet.
+     */
+    mb();
+    if ( blkif->status != DISCONNECTING )
+        BUG();
+    blkif->status = DISCONNECTED;
+    mb();
+
+    /* Send the successful response. */
+    ctrl_if_send_response(&cmsg);
+}
+
+void blkif_disconnect_complete(blkif_t *blkif)
+{
+    INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
+    schedule_work(&blkif->work);
+}
+
+void blkif_ptfe_create(blkif_be_create_t *create)
+{
+    blkif_t      *blkif, **pblkif;
+    domid_t       domid  = create->domid;
+    unsigned int  handle = create->blkif_handle;
+
+
+    /* May want to store info on the connecting domain here. */
+
+    DPRINTK("PT got BE_CREATE\n");
+
+    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+    {
+        DPRINTK("Could not create blkif: out of memory\n");
+        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    /* blkif struct init code from blkback.c */
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid  = domid;
+    blkif->handle = handle;
+    blkif->status = DISCONNECTED;  
+    spin_lock_init(&blkif->blk_ring_lock);
+    atomic_set(&blkif->refcnt, 0);
+
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( *pblkif != NULL )
+    {
+        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+        {
+            DPRINTK("Could not create blkif: already exists\n");
+            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+            kmem_cache_free(blkif_cachep, blkif);
+            return;
+        }
+        pblkif = &(*pblkif)->hash_next;
+    }
+
+    blkif->hash_next = *pblkif;
+    *pblkif = blkif;
+
+    create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
+{
+    /* Clear anything that we initialized above. */
+
+    domid_t       domid  = destroy->domid;
+    unsigned int  handle = destroy->blkif_handle;
+    blkif_t     **pblkif, *blkif;
+
+    DPRINTK("PT got BE_DESTROY\n");
+    
+    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+    while ( (blkif = *pblkif) != NULL )
+    {
+        if ( (blkif->domid == domid) && (blkif->handle == handle) )
+        {
+            if ( blkif->status != DISCONNECTED )
+                goto still_connected;
+            goto destroy;
+        }
+        pblkif = &blkif->hash_next;
+    }
+
+    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+    return;
+
+ still_connected:
+    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+    return;
+
+ destroy:
+    *pblkif = blkif->hash_next;
+    kmem_cache_free(blkif_cachep, blkif);
+    destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_connect(blkif_be_connect_t *connect)
+{
+    domid_t        domid  = connect->domid;
+    unsigned int   handle = connect->blkif_handle;
+    unsigned int   evtchn = connect->evtchn;
+    unsigned long  shmem_frame = connect->shmem_frame;
+    struct vm_struct *vma;
+    pgprot_t       prot;
+    int            error;
+    blkif_t       *blkif;
+    blkif_sring_t *sring;
+
+    DPRINTK("PT got BE_CONNECT\n");
+
+    blkif = blkif_find_by_handle(domid, handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
+                connect->domid, connect->blkif_handle); 
+        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+    {
+        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+                                    prot, domid);
+    if ( error != 0 )
+    {
+        WPRINTK("BE_CONNECT: error! (%d)\n", error);
+        if ( error == -ENOMEM ) 
+            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+        else if ( error == -EFAULT ) {
+            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+            WPRINTK("BE_CONNECT: MAPPING error!\n");
+        }
+        else
+            connect->status = BLKIF_BE_STATUS_ERROR;
+        vfree(vma->addr);
+        return;
+    }
+
+    if ( blkif->status != DISCONNECTED )
+    {
+        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+        vfree(vma->addr);
+        return;
+    }
+
+    sring = (blkif_sring_t *)vma->addr;
+    SHARED_RING_INIT(sring);
+    BACK_RING_INIT(&blkif->blk_ring, sring);
+    
+    blkif->evtchn        = evtchn;
+    blkif->irq           = bind_evtchn_to_irq(evtchn);
+    blkif->shmem_frame   = shmem_frame;
+    blkif->status        = CONNECTED;
+    blkif_get(blkif);
+
+    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+
+    connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+    domid_t       domid  = disconnect->domid;
+    unsigned int  handle = disconnect->blkif_handle;
+    blkif_t      *blkif;
+
+    DPRINTK("PT got BE_DISCONNECT\n");
+    
+    blkif = blkif_find_by_handle(domid, handle);
+    if ( unlikely(blkif == NULL) )
+    {
+        DPRINTK("blkif_disconnect attempted for non-existent blkif"
+                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
+        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return 1; /* Caller will send response error message. */
+    }
+
+    if ( blkif->status == CONNECTED )
+    {
+        blkif->status = DISCONNECTING;
+        blkif->disconnect_rspid = rsp_id;
+        wmb(); /* Let other CPUs see the status change. */
+        free_irq(blkif->irq, blkif);
+        blkif_deschedule(blkif);
+        blkif_put(blkif);
+        return 0; /* Caller should not send response message. */
+    }
+
+    disconnect->status = BLKIF_BE_STATUS_OKAY;
+    return 1;
+}
+
+/*-----[ Control Messages to/from Backend VM ]----------------------------*/
+
+/* Tell the controller to bring up the interface. */
+static void blkif_ptbe_send_interface_connect(void)
+{
+    ctrl_msg_t cmsg = {
+        .type    = CMSG_BLKIF_FE,
+        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+        .length  = sizeof(blkif_fe_interface_connect_t),
+    };
+    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+    msg->handle      = 0;
+    msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+    
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_ptbe_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_ptbe_disconnect(void)
+{
+    blkif_sring_t *sring;
+    
+    sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+    SHARED_RING_INIT(sring);
+    FRONT_RING_INIT(&blktap_be_ring, sring);
+    blktap_be_state  = BLKIF_STATE_DISCONNECTED;
+    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
+    blkif_ptbe_send_interface_connect();
+}
+
+static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
+{
+    int err = 0;
+    
+    blktap_be_evtchn = status->evtchn;
+    blktap_be_irq    = bind_evtchn_to_irq(blktap_be_evtchn);
+
+    err = request_irq(blktap_be_irq, blkif_ptbe_int, 
+                      SA_SAMPLE_RANDOM, "blkif", NULL);
+    if ( err ) {
+       WPRINTK("blkfront request_irq failed (%d)\n", err);
+        return;
+    } else {
+       /* transtion to connected in case we need to do a 
+           a partion probe on a whole disk */
+        blktap_be_state = BLKIF_STATE_CONNECTED;
+    }
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
+    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
+           blkif_status_name[status->status],
+           blkif_state_name[blktap_be_state]);
+}
+
+static void blkif_ptbe_status(
+    blkif_fe_interface_status_t *status)
+{
+    if ( status->handle != 0 )
+    {
+        DPRINTK("Status change on unsupported blkif %d\n",
+               status->handle);
+        return;
+    }
+
+    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
+    
+    switch ( status->status )
+    {
+    case BLKIF_INTERFACE_STATUS_CLOSED:
+        switch ( blktap_be_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_ptbe_close();
+            break;
+        }
+        break;
+        
+    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+        switch ( blktap_be_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            blkif_ptbe_disconnect();
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+        case BLKIF_STATE_CONNECTED:
+            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
+            unexpected(status);
+            break;
+        }
+        break;
+        
+    case BLKIF_INTERFACE_STATUS_CONNECTED:
+        switch ( blktap_be_state )
+        {
+        case BLKIF_STATE_CLOSED:
+            unexpected(status);
+            blkif_ptbe_disconnect();
+            blkif_ptbe_connect(status);
+            break;
+        case BLKIF_STATE_DISCONNECTED:
+            blkif_ptbe_connect(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            unexpected(status);
+            blkif_ptbe_connect(status);
+            break;
+        }
+        break;
+
+   case BLKIF_INTERFACE_STATUS_CHANGED:
+        switch ( blktap_be_state )
+        {
+        case BLKIF_STATE_CLOSED:
+        case BLKIF_STATE_DISCONNECTED:
+            unexpected(status);
+            break;
+        case BLKIF_STATE_CONNECTED:
+            /* vbd_update(); */
+            /* tap doesn't really get state changes... */
+            unexpected(status);
+            break;
+        }
+       break;
+       
+    default:
+        DPRINTK("Status change to unknown value %d\n", status->status);
+        break;
+    }
+}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->type )
+    {
+    case CMSG_BLKIF_FE:
+
+        switch ( msg->subtype )
+        {
+        case CMSG_BLKIF_FE_INTERFACE_STATUS:
+            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+                goto parse_error;
+            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
+            break;        
+
+        default:
+            goto parse_error;
+        }
+
+    case CMSG_BLKIF_BE:
+        
+        /* send a copy of the message to user if wanted */
+        
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+            
+            blktap_write_ctrl_ring(msg);
+        }
+        
+        switch ( msg->subtype )
+        {
+        case CMSG_BLKIF_BE_CREATE:
+            if ( msg->length != sizeof(blkif_be_create_t) )
+                goto parse_error;
+            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
+            break; 
+        case CMSG_BLKIF_BE_DESTROY:
+            if ( msg->length != sizeof(blkif_be_destroy_t) )
+                goto parse_error;
+            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+            break;        
+        case CMSG_BLKIF_BE_CONNECT:
+            if ( msg->length != sizeof(blkif_be_connect_t) )
+                goto parse_error;
+            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
+            break;        
+        case CMSG_BLKIF_BE_DISCONNECT:
+            if ( msg->length != sizeof(blkif_be_disconnect_t) )
+                goto parse_error;
+            if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
+                    msg->id) )
+                return;
+            break;        
+
+        /* We just ignore anything to do with vbds for now. */
+        
+        case CMSG_BLKIF_BE_VBD_CREATE:
+            DPRINTK("PT got VBD_CREATE\n");
+            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_DESTROY:
+            DPRINTK("PT got VBD_DESTROY\n");
+            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_GROW:
+            DPRINTK("PT got VBD_GROW\n");
+            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        case CMSG_BLKIF_BE_VBD_SHRINK:
+            DPRINTK("PT got VBD_SHRINK\n");
+            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
+                = BLKIF_BE_STATUS_OKAY;
+            break;
+        default:
+            goto parse_error;
+        }
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+/*-----[ Initialization ]-------------------------------------------------*/
+
+void __init blkif_interface_init(void)
+{
+    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
+                                     0, 0, NULL, NULL);
+    memset(blkif_hash, 0, sizeof(blkif_hash));
+    
+    blktap_be_ring.sring = NULL;
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
new file mode 100644 (file)
index 0000000..e88c562
--- /dev/null
@@ -0,0 +1,472 @@
+/******************************************************************************
+ * blktap_datapath.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ * -- see full header in blktap.c
+ */
+#include "blktap.h"
+#include <asm-xen/evtchn.h>
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+
+active_req_t     active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX  active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t       active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX  active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+
+inline active_req_t *get_active_req(void) 
+{
+    ACTIVE_RING_IDX idx;
+    active_req_t *ar;
+    unsigned long flags;
+        
+    ASSERT(active_cons != active_prod);   
+    
+    spin_lock_irqsave(&active_req_lock, flags);
+    idx =  active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+    ar = &active_reqs[idx];
+    spin_unlock_irqrestore(&active_req_lock, flags);
+    
+    return ar;
+}
+
+inline void free_active_req(active_req_t *ar) 
+{
+    unsigned long flags;
+        
+    spin_lock_irqsave(&active_req_lock, flags);
+    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+    spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+    return &active_reqs[idx];   
+}
+
+void active_reqs_init(void)
+{
+    ACTIVE_RING_IDX i;
+    
+    active_cons = 0;
+    active_prod = MAX_ACTIVE_REQS;
+    memset(active_reqs, 0, sizeof(active_reqs));
+    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+        active_req_ring[i] = i;
+}
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message.  In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+    return ( (fe_dom << 16) | idx );
+}
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+    blkif_response_t *resp_d;
+    active_req_t *ar;
+    
+    ar = &active_reqs[ID_TO_IDX(rsp->id)];
+    rsp->id = ar->id;
+            
+    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+            blkif->blk_ring.rsp_prod_pvt);
+    memcpy(resp_d, rsp, sizeof(blkif_response_t));
+    wmb();
+    blkif->blk_ring.rsp_prod_pvt++;
+            
+    blkif_put(ar->blkif);
+    free_active_req(ar);
+    
+    return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+    blkif_request_t *req_d;
+
+    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+        WPRINTK("Tap trying to access an unconnected backend!\n");
+        return 0;
+    }
+    
+    req_d = RING_GET_REQUEST(&blktap_be_ring,
+            blktap_be_ring.req_prod_pvt);
+    memcpy(req_d, req, sizeof(blkif_request_t));
+    wmb();
+    blktap_be_ring.req_prod_pvt++;
+            
+    return 0;
+}
+
+void kick_fe_domain(blkif_t *blkif) 
+{
+    RING_PUSH_RESPONSES(&blkif->blk_ring);
+    notify_via_evtchn(blkif->evtchn);
+    DPRINTK("notified FE(dom %u)\n", blkif->domid);
+    
+}
+
+void kick_be_domain(void)
+{
+    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 
+        return;
+    
+    wmb(); /* Ensure that the frontend can see the requests. */
+    RING_PUSH_REQUESTS(&blktap_be_ring);
+    notify_via_evtchn(blktap_be_evtchn);
+    DPRINTK("notified BE\n");
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+    return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( !__on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( __on_blkdev_list(blkif) )
+    {
+        list_del(&blkif->blkdev_list);
+        blkif->blkdev_list.next = NULL;
+        blkif_put(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+    unsigned long flags;
+    if ( __on_blkdev_list(blkif) ) return;
+    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+    {
+        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+        blkif_get(blkif);
+    }
+    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+    DECLARE_WAITQUEUE(wq, current);
+
+    blkif_t          *blkif;
+    struct list_head *ent;
+
+    daemonize(
+        "xentapd"
+        );
+
+    for ( ; ; )
+    {
+        /* Wait for work to do. */
+        add_wait_queue(&blkio_schedule_wait, &wq);
+        set_current_state(TASK_INTERRUPTIBLE);
+        if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 
+             list_empty(&blkio_schedule_list) )
+            schedule();
+        __set_current_state(TASK_RUNNING);
+        remove_wait_queue(&blkio_schedule_wait, &wq);
+
+        /* Queue up a batch of requests. */
+        while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+                !list_empty(&blkio_schedule_list) )
+        {
+            ent = blkio_schedule_list.next;
+            blkif = list_entry(ent, blkif_t, blkdev_list);
+            blkif_get(blkif);
+            remove_from_blkdev_list(blkif);
+            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+                add_to_blkdev_list_tail(blkif);
+            blkif_put(blkif);
+        }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+        /* Push the batch through to disc. */
+        run_task_queue(&tq_disk);
+#endif
+    }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
+         !list_empty(&blkio_schedule_list) ) 
+        wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+    remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+    spin_lock_init(&blkio_schedule_list_lock);
+    INIT_LIST_HEAD(&blkio_schedule_list);
+
+    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+        BUG();
+}
+    
+/*-----[ Interrupt entry from a frontend ]------ */
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    blkif_t *blkif = dev_id;
+
+    add_to_blkdev_list_tail(blkif);
+    maybe_trigger_blkio_schedule();
+    return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+    /* we have pending messages from the real frontend. */
+
+    blkif_request_t *req_s;
+    RING_IDX i, rp;
+    unsigned long flags;
+    active_req_t *ar;
+    int more_to_do = 0;
+    int notify_be = 0, notify_user = 0;
+    
+    DPRINTK("PT got FE interrupt.\n");
+
+    if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+    
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+
+    rp = blkif->blk_ring.sring->req_prod;
+    rmb();
+    
+    for ( i = blkif->blk_ring.req_cons; 
+         (i != rp) && 
+            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+          i++ )
+    {
+        
+        if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 
+        {
+            more_to_do = 1;
+            break;
+        }
+        
+        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+        /* This is a new request:  
+         * Assign an active request record, and remap the id. 
+         */
+        ar = get_active_req();
+        ar->id = req_s->id;
+        ar->nr_pages = req_s->nr_segments; 
+        blkif_get(blkif);
+        ar->blkif = blkif;
+        req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+        /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+
+        /* FE -> BE interposition point is here. */
+        
+        /* ------------------------------------------------------------- */
+        /* BLKIF_OP_PROBE_HACK:                                          */
+        /* Signal to the backend that we are a tap domain.               */
+
+        if (req_s->operation == BLKIF_OP_PROBE) {
+            DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+            req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+        }
+
+        /* ------------------------------------------------------------- */
+
+        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+            
+            /* Copy the response message to UFERing */
+            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+            DPRINTK("req->UFERing\n"); 
+            blktap_write_fe_ring(req_s);
+            notify_user = 1;
+        }
+
+        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+            
+            /* be included to prevent noise from the fe when its off */
+            /* copy the request message to the BERing */
+
+            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
+                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+                    (unsigned)blktap_be_ring.req_prod_pvt & 
+                    (RING_SIZE((&blktap_be_ring)-1)));
+            
+            write_req_to_be_ring(req_s);
+            notify_be = 1;
+        }
+    }
+
+    blkif->blk_ring.req_cons = i;
+    
+    /* unlock rings */
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+    
+    if (notify_user)
+        blktap_kick_user();
+    if (notify_be)
+        kick_be_domain();
+    
+    return more_to_do;
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
+                                  struct pt_regs *ptregs)
+{
+    blkif_response_t  *resp_s;
+    blkif_t *blkif;
+    RING_IDX rp, i;
+    unsigned long flags;
+
+    DPRINTK("PT got BE interrupt.\n");
+
+    /* lock both rings */
+    spin_lock_irqsave(&blkif_io_lock, flags);
+    
+    rp = blktap_be_ring.sring->rsp_prod;
+    rmb();
+      
+    for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+    {
+        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+        
+        /* BE -> FE interposition point is here. */
+    
+        blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+        
+        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+            /* Copy the response message to UBERing */
+            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+            DPRINTK("rsp->UBERing\n"); 
+            blktap_write_be_ring(resp_s);
+            blktap_kick_user();
+
+        }
+       
+        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+            
+            /* (fe included to prevent random interference from the BE) */
+            /* Copy the response message to FERing */
+         
+            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
+                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
+                    (RING_SIZE((&blkif->blk_ring)-1)));
+
+            write_resp_to_fe_ring(blkif, resp_s);
+            kick_fe_domain(blkif);
+
+        }
+    }
+    
+    blktap_be_ring.rsp_cons = i;
+    
+
+    spin_unlock_irqrestore(&blkif_io_lock, flags);
+    
+    return IRQ_HANDLED;
+}
+
+/* Debug : print the current ring indices. */
+
+void print_vm_ring_idxs(void)
+{
+    int i;
+    blkif_t *blkif;
+            
+    WPRINTK("FE Rings: \n---------\n");
+    for ( i = 0; i < 50; i++) { 
+        blkif = blkif_find_by_handle((domid_t)i, 0);
+        if (blkif != NULL) {
+            if (blkif->blk_ring.sring != NULL) {
+                WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
+                    "| req_prod: %2d, rsp_prod: %2d\n", i, 
+                    blkif->blk_ring.req_cons,
+                    blkif->blk_ring.rsp_prod_pvt,
+                    blkif->blk_ring.sring->req_prod,
+                    blkif->blk_ring.sring->rsp_prod);
+            } else {
+                WPRINTK("%2d: [no device channel yet]\n", i);
+            }
+        }
+    }
+    if (blktap_be_ring.sring != NULL) {
+        WPRINTK("BE Ring: \n--------\n");
+        WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+            "| req_prod: %2d, rsp_prod: %2d\n",
+            blktap_be_ring.rsp_cons,
+            blktap_be_ring.req_prod_pvt,
+            blktap_be_ring.sring->req_prod,
+            blktap_be_ring.sring->rsp_prod);
+    }
+}        
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
new file mode 100644 (file)
index 0000000..3cc307f
--- /dev/null
@@ -0,0 +1,489 @@
+/******************************************************************************
+ * blktap_userdev.c
+ * 
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ * 
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+static blkif_back_ring_t  blktap_ube_ring;
+static ctrl_front_ring_t  blktap_uctrl_ring;
+
+/* local prototypes */
+static int blktap_read_fe_ring(void);
+static int blktap_read_be_ring(void);
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+                                             unsigned long address,
+                                             int *type)
+{
+    /*
+     * if the page has not been mapped in by the driver then generate
+     * a SIGBUS to the domain.
+     */
+
+    force_sig(SIGBUS, current);
+
+    return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+    nopage:   blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+    blkif_sring_t *sring;
+    ctrl_sring_t *csring;
+    
+    if ( test_and_set_bit(0, &blktap_dev_inuse) )
+        return -EBUSY;
+
+    printk(KERN_ALERT "blktap open.\n");
+    
+    /* Allocate the ctrl ring. */
+    csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
+    if (csring == NULL)
+        goto fail_nomem;
+
+    SetPageReserved(virt_to_page(csring));
+    
+    SHARED_RING_INIT(csring);
+    FRONT_RING_INIT(&blktap_uctrl_ring, csring);
+
+
+    /* Allocate the fe ring. */
+    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+    if (sring == NULL)
+        goto fail_free_ctrl;
+
+    SetPageReserved(virt_to_page(sring));
+    
+    SHARED_RING_INIT(sring);
+    FRONT_RING_INIT(&blktap_ufe_ring, sring);
+
+    /* Allocate the be ring. */
+    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+    if (sring == NULL)
+        goto fail_free_fe;
+
+    SetPageReserved(virt_to_page(sring));
+    
+    SHARED_RING_INIT(sring);
+    BACK_RING_INIT(&blktap_ube_ring, sring);
+
+    DPRINTK(KERN_ALERT "blktap open.\n");
+
+    return 0;
+    
+ fail_free_ctrl:
+    free_page( (unsigned long) blktap_uctrl_ring.sring);
+
+ fail_free_fe:
+    free_page( (unsigned long) blktap_ufe_ring.sring);
+
+ fail_nomem:
+    return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+    blktap_dev_inuse = 0;
+    blktap_ring_ok = 0;
+
+    printk(KERN_ALERT "blktap closed.\n");
+
+    /* Free the ring page. */
+    ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
+    free_page((unsigned long) blktap_uctrl_ring.sring);
+
+    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+    free_page((unsigned long) blktap_ufe_ring.sring);
+
+    ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+    free_page((unsigned long) blktap_ube_ring.sring);
+    
+    return 0;
+}
+
+/* Note on mmap:
+ * remap_pfn_range sets VM_IO on vma->vm_flags.  In trying to make libaio
+ * work to do direct page access from userspace, this ended up being a
+ * problem.  The bigger issue seems to be that there is no way to map
+ * a foreign page in to user space and have the virtual address of that 
+ * page map sanely down to a mfn.
+ * Removing the VM_IO flag results in a loop in get_user_pages, as 
+ * pfn_valid() always fails on a foreign page.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+    int size;
+
+    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+           vma->vm_start, vma->vm_end);
+
+    vma->vm_ops = &blktap_vm_ops;
+
+    size = vma->vm_end - vma->vm_start;
+    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+        printk(KERN_INFO 
+               "blktap: you _must_ map exactly %d pages!\n",
+               MMAP_PAGES + RING_PAGES);
+        return -EAGAIN;
+    }
+
+    size >>= PAGE_SHIFT;
+    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+    
+    rings_vstart = vma->vm_start;
+    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+    
+    /* Map the ring pages to the start of the region and reserve it. */
+
+    /* not sure if I really need to do this... */
+    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+    DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
+    if (remap_pfn_range(vma, vma->vm_start, 
+                         __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 
+                         PAGE_SIZE, vma->vm_page_prot)) {
+        WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
+    }
+
+
+    DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+    if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 
+                         __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 
+                         PAGE_SIZE, vma->vm_page_prot)) {
+        WPRINTK("be_ring: remap_pfn_range failure!\n");
+    }
+
+    DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+    if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 
+                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
+                         PAGE_SIZE, vma->vm_page_prot)) {
+        WPRINTK("fe_ring: remap_pfn_range failure!\n");
+    }
+            
+    blktap_vma = vma;
+    blktap_ring_ok = 1;
+
+    return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg)
+{
+    switch(cmd) {
+    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+        return blktap_read_fe_ring();
+
+    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+        return blktap_read_be_ring();
+
+    case BLKTAP_IOCTL_SETMODE:
+        if (BLKTAP_MODE_VALID(arg)) {
+            blktap_mode = arg;
+            /* XXX: may need to flush rings here. */
+            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+            return 0;
+        }
+    case BLKTAP_IOCTL_PRINT_IDXS:
+        {
+            print_vm_ring_idxs();
+            WPRINTK("User Rings: \n-----------\n");
+            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+                            "| req_prod: %2d, rsp_prod: %2d\n",
+                            blktap_ufe_ring.rsp_cons,
+                            blktap_ufe_ring.req_prod_pvt,
+                            blktap_ufe_ring.sring->req_prod,
+                            blktap_ufe_ring.sring->rsp_prod);
+            WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+                            "| req_prod: %2d, rsp_prod: %2d\n",
+                            blktap_ube_ring.req_cons,
+                            blktap_ube_ring.rsp_prod_pvt,
+                            blktap_ube_ring.sring->req_prod,
+                            blktap_ube_ring.sring->rsp_prod);
+            
+        }
+    }
+    return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+        poll_wait(file, &blktap_wait, wait);
+
+        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
+             RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
+             RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+
+            RING_PUSH_REQUESTS(&blktap_uctrl_ring);
+            RING_PUSH_REQUESTS(&blktap_ufe_ring);
+            RING_PUSH_RESPONSES(&blktap_ube_ring);
+            return POLLIN | POLLRDNORM;
+        }
+
+        return 0;
+}
+
+void blktap_kick_user(void)
+{
+    /* blktap_ring->req_prod = blktap_req_prod; */
+    wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+    owner:    THIS_MODULE,
+    poll:     blktap_poll,
+    ioctl:    blktap_ioctl,
+    open:     blktap_open,
+    release:  blktap_release,
+    mmap:     blktap_mmap,
+};
+    
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+    blkif_request_t *target;
+    int error, i;
+
+    /*
+     * This is called to pass a request from the real frontend domain's
+     * blkif ring to the character device.
+     */
+
+    if ( ! blktap_ring_ok ) {
+        DPRINTK("blktap: ufe_ring not ready for a request!\n");
+        return 0;
+    }
+
+    if ( RING_FULL(&blktap_ufe_ring) ) {
+        DPRINTK("blktap: fe_ring is full, can't add.\n");
+        return 0;
+    }
+
+    target = RING_GET_REQUEST(&blktap_ufe_ring,
+            blktap_ufe_ring.req_prod_pvt);
+    memcpy(target, req, sizeof(*req));
+
+    /* Attempt to map the foreign pages directly in to the application */
+    for (i=0; i<target->nr_segments; i++) {
+
+        error = direct_remap_area_pages(blktap_vma->vm_mm, 
+                                        MMAP_VADDR(ID_TO_IDX(req->id), i), 
+                                        target->frame_and_sects[i] & PAGE_MASK,
+                                        PAGE_SIZE,
+                                        blktap_vma->vm_page_prot,
+                                        ID_TO_DOM(req->id));
+        if ( error != 0 ) {
+            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+            /* the request is now dropped on the floor. */
+            return 0;
+        }
+    }
+    
+    blktap_ufe_ring.req_prod_pvt++;
+    
+    return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+    blkif_response_t *target;
+
+    /*
+     * This is called to pass a request from the real backend domain's
+     * blkif ring to the character device.
+     */
+
+    if ( ! blktap_ring_ok ) {
+        DPRINTK("blktap: be_ring not ready for a request!\n");
+        return 0;
+    }
+
+    /* No test for fullness in the response direction. */
+
+    target = RING_GET_RESPONSE(&blktap_ube_ring,
+            blktap_ube_ring.rsp_prod_pvt);
+    memcpy(target, rsp, sizeof(*rsp));
+
+    /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+
+    blktap_ube_ring.rsp_prod_pvt++;
+    
+    return 0;
+}
+
+static void blktap_fast_flush_area(int idx, int nr_pages)
+{
+    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+    int               i;
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        mcl[i].op = __HYPERVISOR_update_va_mapping;
+        mcl[i].args[0] = MMAP_VADDR(idx, i);
+        mcl[i].args[1] = 0;
+        mcl[i].args[2] = 0;
+    }
+
+    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+        BUG();
+}
+
+static int blktap_read_fe_ring(void)
+{
+    /* This is called to read responses from the UFE ring. */
+
+    RING_IDX i, rp;
+    blkif_response_t *resp_s;
+    blkif_t *blkif;
+    active_req_t *ar;
+
+    DPRINTK("blktap_read_fe_ring()\n");
+
+    /* if we are forwarding from UFERring to FERing */
+    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+        /* for each outstanding message on the UFEring  */
+        rp = blktap_ufe_ring.sring->rsp_prod;
+        rmb();
+        
+        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+        {
+            resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+            
+            DPRINTK("resp->fe_ring\n");
+            ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+            blkif = ar->blkif;
+            blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+            write_resp_to_fe_ring(blkif, resp_s);
+            kick_fe_domain(blkif);
+        }
+        
+        blktap_ufe_ring.rsp_cons = i;
+    }
+    return 0;
+}
+
+static int blktap_read_be_ring(void)
+{
+    /* This is called to read requests from the UBE ring. */
+
+    RING_IDX i, rp;
+    blkif_request_t *req_s;
+
+    DPRINTK("blktap_read_be_ring()\n");
+
+    /* if we are forwarding from UFERring to FERing */
+    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+        /* for each outstanding message on the UFEring  */
+        rp = blktap_ube_ring.sring->req_prod;
+        rmb();
+        for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+        {
+            req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
+
+            DPRINTK("req->be_ring\n");
+            write_req_to_be_ring(req_s);
+            kick_be_domain();
+        }
+        
+        blktap_ube_ring.req_cons = i;
+    }
+
+    return 0;
+}
+
+int blktap_write_ctrl_ring(ctrl_msg_t *msg)
+{
+    ctrl_msg_t *target;
+
+    if ( ! blktap_ring_ok ) {
+        DPRINTK("blktap: be_ring not ready for a request!\n");
+        return 0;
+    }
+
+    /* No test for fullness in the response direction. */
+
+    target = RING_GET_REQUEST(&blktap_uctrl_ring,
+            blktap_uctrl_ring.req_prod_pvt);
+    memcpy(target, msg, sizeof(*msg));
+
+    blktap_uctrl_ring.req_prod_pvt++;
+    
+    /* currently treat the ring as unidirectional. */
+    blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
+    
+    return 0;
+       
+}
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+    .minor        = BLKTAP_MINOR,
+    .name         = "blktap",
+    .fops         = &blktap_fops,
+    .devfs_name   = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+    int err;
+
+    err = misc_register(&blktap_miscdev);
+    if ( err != 0 )
+    {
+        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+        return err;
+    }
+
+    init_waitqueue_head(&blktap_wait);
+
+
+    return 0;
+}
index 9c57aaf3a1aaba357dfa9799757badd862b83511..5967f52842928a3332b0562f5e613439b323a6c5 100644 (file)
@@ -176,8 +176,9 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
     {
         unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
         pgd_t *pgd = pgd_offset_k(m2pv);
-        pmd_t *pmd = pmd_offset(pgd, m2pv);
-        unsigned long m2p_start_mfn = pmd_val_ma(*pmd) >> PAGE_SHIFT;
+        pud_t *pud = pud_offset(pgd, m2pv);
+        pmd_t *pmd = pmd_offset(pud, m2pv);
+        unsigned long m2p_start_mfn = pfn_to_mfn(pmd_val(*pmd) >> PAGE_SHIFT);
         ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
     }
     break;
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
new file mode 100644 (file)
index 0000000..bcab204
--- /dev/null
@@ -0,0 +1,85 @@
+
+#ifndef __USBIF__BACKEND__COMMON_H__
+#define __USBIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/hypervisor.h>
+
+#include <asm-xen/xen-public/io/usbif.h>
+
+#if 0
+#define ASSERT(_p) \
+    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+                           __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct usbif_priv_st usbif_priv_t;
+
+struct usbif_priv_st {
+    /* Unique identifier for this interface. */
+    domid_t          domid;
+    unsigned int     handle;
+    /* Physical parameters of the comms window. */
+    unsigned long    shmem_frame;
+    unsigned int     evtchn;
+    int              irq;
+    /* Comms Information */
+    usbif_back_ring_t usb_ring;
+    /* Private fields. */
+    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+    /*
+     * DISCONNECT response is deferred until pending requests are ack'ed.
+     * We therefore need to store the id from the original request.
+     */
+    u8                   disconnect_rspid;
+    usbif_priv_t        *hash_next;
+    struct list_head     usbif_list;
+    spinlock_t           usb_ring_lock;
+    atomic_t             refcnt;
+
+    struct work_struct work;
+};
+
+void usbif_create(usbif_be_create_t *create);
+void usbif_destroy(usbif_be_destroy_t *destroy);
+void usbif_connect(usbif_be_connect_t *connect);
+int  usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
+void usbif_disconnect_complete(usbif_priv_t *up);
+
+void usbif_release_port(usbif_be_release_port_t *msg);
+int usbif_claim_port(usbif_be_claim_port_t *msg);
+void usbif_release_ports(usbif_priv_t *up);
+
+usbif_priv_t *usbif_find(domid_t domid);
+#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define usbif_put(_b)                             \
+    do {                                          \
+        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+            usbif_disconnect_complete(_b);        \
+    } while (0)
+
+
+void usbif_interface_init(void);
+void usbif_ctrlif_init(void);
+
+void usbif_deschedule(usbif_priv_t *up);
+void remove_from_usbif_list(usbif_priv_t *up);
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __USBIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
new file mode 100644 (file)
index 0000000..899394a
--- /dev/null
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/control.c
+ * 
+ * Routines for interfacing with the control plane.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
+    
+    switch ( msg->subtype )
+    {
+    case CMSG_USBIF_BE_CREATE:
+        if ( msg->length != sizeof(usbif_be_create_t) )
+            goto parse_error;
+        usbif_create((usbif_be_create_t *)&msg->msg[0]);
+        break;        
+    case CMSG_USBIF_BE_DESTROY:
+        if ( msg->length != sizeof(usbif_be_destroy_t) )
+            goto parse_error;
+        usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
+        break;        
+    case CMSG_USBIF_BE_CONNECT:
+        if ( msg->length != sizeof(usbif_be_connect_t) )
+            goto parse_error;
+        usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
+        break;        
+    case CMSG_USBIF_BE_DISCONNECT:
+        if ( msg->length != sizeof(usbif_be_disconnect_t) )
+            goto parse_error;
+        if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
+            return; /* Sending the response is deferred until later. */
+        break;        
+    case CMSG_USBIF_BE_CLAIM_PORT:
+        if ( msg->length != sizeof(usbif_be_claim_port_t) )
+            goto parse_error;
+       usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
+        break;
+    case CMSG_USBIF_BE_RELEASE_PORT:
+        if ( msg->length != sizeof(usbif_be_release_port_t) )
+            goto parse_error;
+        usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
+        break;
+    default:
+        goto parse_error;
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    DPRINTK("Parse error while reading message subtype %d, len %d\n",
+            msg->subtype, msg->length);
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+void usbif_ctrlif_init(void)
+{
+    ctrl_msg_t                       cmsg;
+    usbif_be_driver_status_changed_t st;
+
+    (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, 
+                                    CALLBACK_IN_BLOCKING_CONTEXT);
+
+    /* Send a driver-UP notification to the domain controller. */
+    cmsg.type      = CMSG_USBIF_BE;
+    cmsg.subtype   = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
+    cmsg.length    = sizeof(usbif_be_driver_status_changed_t);
+    st.status      = USBIF_DRIVER_STATUS_UP;
+    memcpy(cmsg.msg, &st, sizeof(st));
+    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
new file mode 100644 (file)
index 0000000..4630da8
--- /dev/null
@@ -0,0 +1,252 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/interface.c
+ * 
+ * USB device interface management.
+ * 
+ * by Mark Williamson, Copyright (c) 2004
+ */
+
+
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ * 
+ * Block-device interface management.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#define USBIF_HASHSZ 1024
+#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
+
+static kmem_cache_t      *usbif_priv_cachep;
+static usbif_priv_t      *usbif_priv_hash[USBIF_HASHSZ];
+
+usbif_priv_t *usbif_find(domid_t domid)
+{
+    usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
+    while ( (up != NULL ) && ( up->domid != domid ) )
+        up = up->hash_next;
+    return up;
+}
+
+static void __usbif_disconnect_complete(void *arg)
+{
+    usbif_priv_t         *usbif = (usbif_priv_t *)arg;
+    ctrl_msg_t            cmsg;
+    usbif_be_disconnect_t disc;
+
+    /*
+     * These can't be done in usbif_disconnect() because at that point there
+     * may be outstanding requests at the device whose asynchronous responses
+     * must still be notified to the remote driver.
+     */
+    unbind_evtchn_from_irq(usbif->evtchn);
+    vfree(usbif->usb_ring.sring);
+
+    /* Construct the deferred response message. */
+    cmsg.type         = CMSG_USBIF_BE;
+    cmsg.subtype      = CMSG_USBIF_BE_DISCONNECT;
+    cmsg.id           = usbif->disconnect_rspid;
+    cmsg.length       = sizeof(usbif_be_disconnect_t);
+    disc.domid        = usbif->domid;
+    disc.status       = USBIF_BE_STATUS_OKAY;
+    memcpy(cmsg.msg, &disc, sizeof(disc));
+
+    /*
+     * Make sure message is constructed /before/ status change, because
+     * after the status change the 'usbif' structure could be deallocated at
+     * any time. Also make sure we send the response /after/ status change,
+     * as otherwise a subsequent CONNECT request could spuriously fail if
+     * another CPU doesn't see the status change yet.
+     */
+    mb();
+    if ( usbif->status != DISCONNECTING )
+        BUG();
+    usbif->status = DISCONNECTED;
+    mb();
+
+    /* Send the successful response. */
+    ctrl_if_send_response(&cmsg);
+}
+
+void usbif_disconnect_complete(usbif_priv_t *up)
+{
+    INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
+    schedule_work(&up->work);
+}
+
+void usbif_create(usbif_be_create_t *create)
+{
+    domid_t       domid  = create->domid;
+    usbif_priv_t **pup, *up;
+
+    if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
+    {
+        DPRINTK("Could not create usbif: out of memory\n");
+        create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    memset(up, 0, sizeof(*up));
+    up->domid  = domid;
+    up->status = DISCONNECTED;
+    spin_lock_init(&up->usb_ring_lock);
+    atomic_set(&up->refcnt, 0);
+
+    pup = &usbif_priv_hash[USBIF_HASH(domid)];
+    while ( *pup != NULL )
+    {
+        if ( (*pup)->domid == domid )
+        {
+            create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
+            kmem_cache_free(usbif_priv_cachep, up);
+            return;
+        }
+        pup = &(*pup)->hash_next;
+    }
+
+    up->hash_next = *pup;
+    *pup = up;
+
+    create->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_destroy(usbif_be_destroy_t *destroy)
+{
+    domid_t       domid  = destroy->domid;
+    usbif_priv_t  **pup, *up;
+
+    pup = &usbif_priv_hash[USBIF_HASH(domid)];
+    while ( (up = *pup) != NULL )
+    {
+        if ( up->domid == domid )
+        {
+            if ( up->status != DISCONNECTED )
+                goto still_connected;
+            goto destroy;
+        }
+        pup = &up->hash_next;
+    }
+
+    destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+    return;
+
+ still_connected:
+    destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+    return;
+
+ destroy:
+    *pup = up->hash_next;
+    usbif_release_ports(up);
+    kmem_cache_free(usbif_priv_cachep, up);
+    destroy->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_connect(usbif_be_connect_t *connect)
+{
+    domid_t       domid  = connect->domid;
+    unsigned int  evtchn = connect->evtchn;
+    unsigned long shmem_frame = connect->shmem_frame;
+    struct vm_struct *vma;
+    pgprot_t      prot;
+    int           error;
+    usbif_priv_t *up;
+    usbif_sring_t *sring;
+
+    up = usbif_find(domid);
+    if ( unlikely(up == NULL) )
+    {
+        DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", 
+                connect->domid); 
+        connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return;
+    }
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+    {
+        connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+        return;
+    }
+
+    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+                                    prot, domid);
+    if ( error != 0 )
+    {
+        if ( error == -ENOMEM )
+            connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+        else if ( error == -EFAULT )
+            connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
+        else
+            connect->status = USBIF_BE_STATUS_ERROR;
+        vfree(vma->addr);
+        return;
+    }
+
+    if ( up->status != DISCONNECTED )
+    {
+        connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+        vfree(vma->addr);
+        return;
+    }
+
+    sring = (usbif_sring_t *)vma->addr;
+    SHARED_RING_INIT(sring);
+    BACK_RING_INIT(&up->usb_ring, sring);
+
+    up->evtchn        = evtchn;
+    up->irq           = bind_evtchn_to_irq(evtchn);
+    up->shmem_frame   = shmem_frame;
+    up->status        = CONNECTED;
+    usbif_get(up);
+
+    request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
+
+    connect->status = USBIF_BE_STATUS_OKAY;
+}
+
+/* Remove URBs for this interface before destroying it. */
+void usbif_deschedule(usbif_priv_t *up)
+{
+    remove_from_usbif_list(up);
+}
+
+int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+    domid_t       domid  = disconnect->domid;
+    usbif_priv_t *up;
+
+    up = usbif_find(domid);
+    if ( unlikely(up == NULL) )
+    {
+        DPRINTK("usbif_disconnect attempted for non-existent usbif"
+                " (%u)\n", disconnect->domid); 
+        disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+        return 1; /* Caller will send response error message. */
+    }
+
+    if ( up->status == CONNECTED )
+    {
+        up->status = DISCONNECTING;
+        up->disconnect_rspid = rsp_id;
+        wmb(); /* Let other CPUs see the status change. */
+        free_irq(up->irq, up);
+       usbif_deschedule(up);
+        usbif_put(up);
+        return 0; /* Caller should not send response message. */
+    }
+
+    disconnect->status = USBIF_BE_STATUS_OKAY;
+    return 1;
+}
+
+void __init usbif_interface_init(void)
+{
+    usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
+                                         sizeof(usbif_priv_t), 
+                                         0, 0, NULL, NULL);
+    memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
+}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
new file mode 100644 (file)
index 0000000..b039b45
--- /dev/null
@@ -0,0 +1,1070 @@
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/main.c
+ * 
+ * Backend for the Xen virtual USB driver - provides an abstraction of a
+ * USB host controller to the corresponding frontend driver.
+ *
+ * by Mark Williamson
+ * Copyright (c) 2004 Intel Research Cambridge
+ * Copyright (c) 2004, 2005 Mark Williamson
+ *
+ * Based on arch/xen/drivers/blkif/backend/main.c
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/tqueue.h>
+
+/*
+ * This is rather arbitrary.
+ */
+#define MAX_PENDING_REQS 4
+#define BATCH_PER_DOMAIN 1
+
+static unsigned long mmap_vstart;
+
+/* Needs to be sufficiently large that we can map the (large) buffers
+ * the USB mass storage driver wants. */
+#define MMAP_PAGES_PER_REQUEST \
+    (128)
+#define MMAP_PAGES             \
+    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+
+#define MMAP_VADDR(_req,_seg)                        \
+    (mmap_vstart +                                   \
+     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+     ((_seg) * PAGE_SIZE))
+
+
+static spinlock_t owned_ports_lock;
+LIST_HEAD(owned_ports);
+
+/* A list of these structures is used to track ownership of physical USB
+ * ports. */
+typedef struct 
+{
+    usbif_priv_t     *usbif_priv;
+    char             path[16];
+    int               guest_port;
+    int enabled;
+    struct list_head  list;
+    unsigned long guest_address; /* The USB device address that has been
+                                  * assigned by the guest. */
+    int               dev_present; /* Is there a device present? */
+    struct usb_device * dev;
+    unsigned long ifaces;  /* What interfaces are present on this device? */
+} owned_port_t;
+
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it.  The request is complete, the specified
+ * domain has a response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+    usbif_priv_t       *usbif_priv;
+    unsigned long      id;
+    int                nr_pages;
+    unsigned short     operation;
+    int                status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of 
+ * order. We therefore maintain an allocation ring. This ring also indicates 
+ * when enough work has been passed down -- at that point the allocation ring 
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock;
+
+/* NB. We use a different index type to differentiate from shared usb rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
+static void make_response(usbif_priv_t *usbif, unsigned long id, 
+                          unsigned short op, int st, int inband,
+                         unsigned long actual_length);
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port);
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);    
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
+static owned_port_t *usbif_find_port(char *);
+
+/******************************************************************
+ * PRIVATE DEBUG FUNCTIONS
+ */
+
+#undef DEBUG
+#ifdef DEBUG
+
+static void dump_port(owned_port_t *p)
+{
+    printk(KERN_DEBUG "owned_port_t @ %p\n"
+          "  usbif_priv @ %p\n"
+          "  path: %s\n"
+          "  guest_port: %d\n"
+          "  guest_address: %ld\n"
+          "  dev_present: %d\n"
+          "  dev @ %p\n"
+          "  ifaces: 0x%lx\n",
+          p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
+          p->dev_present, p->dev, p->ifaces);
+}
+
+
+static void dump_request(usbif_request_t *req)
+{    
+    printk(KERN_DEBUG "id = 0x%lx\n"
+          "devnum %d\n"
+          "endpoint 0x%x\n"
+          "direction %d\n"
+          "speed %d\n"
+          "pipe_type 0x%x\n"
+          "transfer_buffer 0x%lx\n"
+          "length 0x%lx\n"
+          "transfer_flags 0x%lx\n"
+          "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
+          "iso_schedule = 0x%lx\n"
+          "num_iso %ld\n",
+          req->id, req->devnum, req->endpoint, req->direction, req->speed,
+          req->pipe_type, req->transfer_buffer, req->length,
+          req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
+          req->setup[3], req->setup[4], req->setup[5], req->setup[6],
+          req->setup[7], req->iso_schedule, req->num_iso);
+}
+
+static void dump_urb(struct urb *urb)
+{
+    printk(KERN_DEBUG "dumping urb @ %p\n", urb);
+
+#define DUMP_URB_FIELD(name, format) \
+    printk(KERN_DEBUG "  " # name " " format "\n", urb-> name)
+    
+    DUMP_URB_FIELD(pipe, "0x%x");
+    DUMP_URB_FIELD(status, "%d");
+    DUMP_URB_FIELD(transfer_flags, "0x%x");    
+    DUMP_URB_FIELD(transfer_buffer, "%p");
+    DUMP_URB_FIELD(transfer_buffer_length, "%d");
+    DUMP_URB_FIELD(actual_length, "%d");
+}
+
+static void dump_response(usbif_response_t *resp)
+{
+    printk(KERN_DEBUG "usbback: Sending response:\n"
+          "         id = 0x%x\n"
+          "         op = %d\n"
+          "         status = %d\n"
+          "         data = %d\n"
+          "         length = %d\n",
+          resp->id, resp->op, resp->status, resp->data, resp->length);
+}
+
+#else /* DEBUG */
+
+#define dump_port(blah)     ((void)0)
+#define dump_request(blah)   ((void)0)
+#define dump_urb(blah)      ((void)0)
+#define dump_response(blah) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************
+ * MEMORY MANAGEMENT
+ */
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+    int               i;
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        mcl[i].op = __HYPERVISOR_update_va_mapping;
+        mcl[i].args[0] = MMAP_VADDR(idx, i);
+        mcl[i].args[1] = 0;
+        mcl[i].args[2] = 0;
+    }
+
+    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+        BUG();
+}
+
+
+/******************************************************************
+ * USB INTERFACE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head usbio_schedule_list;
+static spinlock_t usbio_schedule_list_lock;
+
+static int __on_usbif_list(usbif_priv_t *up)
+{
+    return up->usbif_list.next != NULL;
+}
+
+void remove_from_usbif_list(usbif_priv_t *up)
+{
+    unsigned long flags;
+    if ( !__on_usbif_list(up) ) return;
+    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+    if ( __on_usbif_list(up) )
+    {
+        list_del(&up->usbif_list);
+        up->usbif_list.next = NULL;
+        usbif_put(up);
+    }
+    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+static void add_to_usbif_list_tail(usbif_priv_t *up)
+{
+    unsigned long flags;
+    if ( __on_usbif_list(up) ) return;
+    spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+    if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
+    {
+        list_add_tail(&up->usbif_list, &usbio_schedule_list);
+        usbif_get(up);
+    }
+    spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+void free_pending(int pending_idx)
+{
+    unsigned long flags;
+
+    /* Free the pending request. */
+    spin_lock_irqsave(&pend_prod_lock, flags);
+    pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+    spin_unlock_irqrestore(&pend_prod_lock, flags);
+}
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as urb->complete()
+ */
+
+static void maybe_trigger_usbio_schedule(void);
+
+static void __end_usb_io_op(struct urb *purb)
+{
+    pending_req_t *pending_req;
+    int pending_idx;
+
+    pending_req = purb->context;
+
+    pending_idx = pending_req - pending_reqs;
+
+    ASSERT(purb->actual_length <= purb->transfer_buffer_length);
+    ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
+    
+    /* An error fails the entire request. */
+    if ( purb->status )
+    {
+        printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status);
+    }
+
+    if ( usb_pipetype(purb->pipe) == 0 )
+    {
+        int i;
+        usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1);
+
+        /* If we're dealing with an iso pipe, we need to copy back the schedule. */
+        for ( i = 0; i < purb->number_of_packets; i++ )
+        {
+            sched[i].length = purb->iso_frame_desc[i].actual_length;
+            ASSERT(sched[i].buffer_offset ==
+                   purb->iso_frame_desc[i].offset);
+            sched[i].status = purb->iso_frame_desc[i].status;
+        }
+    }
+    
+    fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
+
+    kfree(purb->setup_packet);
+
+    make_response(pending_req->usbif_priv, pending_req->id,
+                 pending_req->operation, pending_req->status, 0, purb->actual_length);
+    usbif_put(pending_req->usbif_priv);
+
+    usb_free_urb(purb);
+
+    free_pending(pending_idx);
+    
+    rmb();
+
+    /* Check for anything still waiting in the rings, having freed a request... */
+    maybe_trigger_usbio_schedule();
+}
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
+
+static int usbio_schedule(void *arg)
+{
+    DECLARE_WAITQUEUE(wq, current);
+
+    usbif_priv_t          *up;
+    struct list_head *ent;
+
+    daemonize();
+
+    for ( ; ; )
+    {
+        /* Wait for work to do. */
+        add_wait_queue(&usbio_schedule_wait, &wq);
+        set_current_state(TASK_INTERRUPTIBLE);
+        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
+             list_empty(&usbio_schedule_list) )
+            schedule();
+        __set_current_state(TASK_RUNNING);
+        remove_wait_queue(&usbio_schedule_wait, &wq);
+
+        /* Queue up a batch of requests. */
+        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+                !list_empty(&usbio_schedule_list) )
+        {
+            ent = usbio_schedule_list.next;
+            up = list_entry(ent, usbif_priv_t, usbif_list);
+            usbif_get(up);
+            remove_from_usbif_list(up);
+            if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
+                add_to_usbif_list_tail(up);
+            usbif_put(up);
+        }
+    }
+}
+
+static void maybe_trigger_usbio_schedule(void)
+{
+    /*
+     * Needed so that two processes, who together make the following predicate
+     * true, don't both read stale values and evaluate the predicate
+     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+     */
+    smp_mb();
+
+    if ( !list_empty(&usbio_schedule_list) )
+        wake_up(&usbio_schedule_wait);
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+    usbif_priv_t *up = dev_id;
+
+    smp_mb();
+
+    add_to_usbif_list_tail(up); 
+
+    /* Will in fact /always/ trigger an io schedule in this case. */
+    maybe_trigger_usbio_schedule();
+
+    return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the usb-device layer proper.
+ */
+
+static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
+{
+    usbif_back_ring_t *usb_ring = &up->usb_ring;
+    usbif_request_t *req;
+    RING_IDX i, rp;
+    int more_to_do = 0;
+
+    rp = usb_ring->sring->req_prod;
+    rmb(); /* Ensure we see queued requests up to 'rp'. */
+    
+    /* Take items off the comms ring, taking care not to overflow. */
+    for ( i = usb_ring->req_cons; 
+          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
+          i++ )
+    {
+        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+        {
+            more_to_do = 1;
+            break;
+        }
+
+        req = RING_GET_REQUEST(usb_ring, i);
+        
+        switch ( req->operation )
+        {
+        case USBIF_OP_PROBE:
+            dispatch_usb_probe(up, req->id, req->port);
+            break;
+
+        case USBIF_OP_IO:
+         /* Assemble an appropriate URB. */
+         dispatch_usb_io(up, req);
+          break;
+
+       case USBIF_OP_RESET:
+         dispatch_usb_reset(up, req->port);
+          break;
+
+        default:
+            DPRINTK("error: unknown USB io operation [%d]\n",
+                    req->operation);
+            make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+            break;
+        }
+    }
+
+    usb_ring->req_cons = i;
+
+    return more_to_do;
+}
+
+static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
+{
+    unsigned long flags;
+    struct list_head *l;
+
+    spin_lock_irqsave(&owned_ports_lock, flags);
+    list_for_each(l, &owned_ports)
+    {
+        owned_port_t *p = list_entry(l, owned_port_t, list);
+        if(p->usbif_priv == up && p->guest_port == port)
+        {
+            spin_unlock_irqrestore(&owned_ports_lock, flags);
+            return p;
+        }
+    }
+    spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+    return NULL;
+}
+
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
+{
+    owned_port_t *port = find_guest_port(up, portid);
+    int ret = 0;
+
+
+    /* Allowing the guest to actually reset the device causes more problems
+     * than it's worth.  We just fake it out in software but we will do a real
+     * reset when the interface is destroyed. */
+
+    dump_port(port);
+
+    port->guest_address = 0;
+    /* If there's an attached device then the port is now enabled. */
+    if ( port->dev_present )
+        port->enabled = 1;
+    else
+        port->enabled = 0;
+
+    make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
+}
+
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid)
+{
+    owned_port_t *port = find_guest_port(up, portid);
+    int ret;
+    if ( port != NULL )
+        ret = port->dev_present;
+    else
+    {
+        ret = -EINVAL;
+        printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
+              "(port %ld)\n", portid);
+    }
+
+    /* Probe result is sent back in-band.  Probes don't have an associated id
+     * right now... */
+    make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
+}
+
+/**
+ * check_iso_schedule - safety check the isochronous schedule for an URB
+ * @purb : the URB in question
+ */
+static int check_iso_schedule(struct urb *purb)
+{
+    int i;
+    unsigned long total_length = 0;
+    
+    for ( i = 0; i < purb->number_of_packets; i++ )
+    {
+        struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
+        
+        if ( desc->offset >= purb->transfer_buffer_length
+            || ( desc->offset + desc->length) > purb->transfer_buffer_length )
+            return -EINVAL;
+
+        total_length += desc->length;
+
+        if ( total_length > purb->transfer_buffer_length )
+            return -EINVAL;
+    }
+    
+    return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
+
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
+{
+    unsigned long buffer_mach;
+    int i = 0, offset = 0,
+        pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+    pending_req_t *pending_req;
+    unsigned long  remap_prot;
+    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+    struct urb *purb = NULL;
+    owned_port_t *port;
+    unsigned char *setup;    
+
+    dump_request(req);
+
+    if ( NR_PENDING_REQS == MAX_PENDING_REQS )
+    {
+        printk(KERN_WARNING "usbback: Max requests already queued. "
+              "Giving up!\n");
+        
+        return;
+    }
+
+    port = find_port_for_request(up, req);
+
+    if ( port == NULL )
+    {
+       printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
+       dump_request(req);
+
+        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+       return;
+    }
+    else if ( !port->dev_present )
+    {
+        /* In normal operation, we'll only get here if a device is unplugged
+         * and the frontend hasn't noticed yet. */
+        make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+       return;
+    }
+        
+
+    setup = kmalloc(8, GFP_KERNEL);
+
+    if ( setup == NULL )
+        goto no_mem;
+   
+    /* Copy request out for safety. */
+    memcpy(setup, req->setup, 8);
+
+    if( setup[0] == 0x0 && setup[1] == 0x5)
+    {
+        /* To virtualise the USB address space, we need to intercept
+         * set_address messages and emulate.  From the USB specification:
+         * bmRequestType = 0x0;
+         * Brequest = SET_ADDRESS (i.e. 0x5)
+         * wValue = device address
+         * wIndex = 0
+         * wLength = 0
+         * data = None
+         */
+        /* Store into the guest transfer buffer using cpu_to_le16 */
+        port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
+        /* Make a successful response.  That was easy! */
+
+        make_response(up, req->id, req->operation, 0, 0, 0);
+
+       kfree(setup);
+        return;
+    }
+    else if ( setup[0] == 0x0 && setup[1] == 0x9 )
+    {
+        /* The host kernel needs to know what device configuration is in use
+         * because various error checks get confused otherwise.  We just do
+         * configuration settings here, under controlled conditions.
+         */
+
+      /* Ignore configuration setting and hope that the host kernel
+        did it right. */
+        /* usb_set_configuration(port->dev, setup[2]); */
+
+        make_response(up, req->id, req->operation, 0, 0, 0);
+
+        kfree(setup);
+        return;
+    }
+    else if ( setup[0] == 0x1 && setup[1] == 0xB )
+    {
+        /* The host kernel needs to know what device interface is in use
+         * because various error checks get confused otherwise.  We just do
+         * configuration settings here, under controlled conditions.
+         */
+        usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
+                          (setup[2] | setup[3] << 8) );
+
+        make_response(up, req->id, req->operation, 0, 0, 0);
+
+        kfree(setup);
+        return;
+    }
+
+    if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
+          + req->length )
+        > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
+    {
+        printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
+              req->length);
+        make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+        kfree(setup);
+        return;
+    }
+    
+    buffer_mach = req->transfer_buffer;
+
+    if( buffer_mach == 0 )
+       goto no_remap;
+
+    ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
+    ASSERT(buffer_mach);
+
+    /* Always map writeable for now. */
+    remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
+    for ( i = 0, offset = 0; offset < req->length;
+          i++, offset += PAGE_SIZE )
+    {
+       mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+       mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+        mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
+        mcl[i].args[2] = 0;
+        mcl[i].args[3] = up->domid;
+        
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+            FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
+
+        ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
+               == buffer_mach + i << PAGE_SHIFT);
+    }
+
+    if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
+    {
+        /* Map in ISO schedule, if necessary. */
+        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+        mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+        mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
+        mcl[i].args[2] = 0;
+        mcl[i].args[3] = up->domid;
+
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+            FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
+    
+        i++;
+    }
+
+    if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
+        BUG();
+    
+    {
+        int j;
+        for ( j = 0; j < i; j++ )
+        {
+            if ( unlikely(mcl[j].args[5] != 0) )
+            {
+                printk(KERN_WARNING
+                      "invalid buffer %d -- could not remap it\n", j);
+                fast_flush_area(pending_idx, i);
+                goto bad_descriptor;
+            }
+       }
+    }
+    
+ no_remap:
+
+    ASSERT(i <= MMAP_PAGES_PER_REQUEST);
+    ASSERT(i * PAGE_SIZE >= req->length);
+
+    /* We have to do this because some things might complete out of order. */
+    pending_req = &pending_reqs[pending_idx];
+    pending_req->usbif_priv= up;
+    pending_req->id        = req->id;
+    pending_req->operation = req->operation;
+    pending_req->nr_pages  = i;
+
+    pending_cons++;
+
+    usbif_get(up);
+    
+    /* Fill out an actual request for the USB layer. */
+    purb = usb_alloc_urb(req->num_iso);
+
+    if ( purb == NULL )
+    {
+        usbif_put(up);
+        free_pending(pending_idx);
+        goto no_mem;
+    }
+
+    purb->dev = port->dev;
+    purb->context = pending_req;
+    purb->transfer_buffer =
+        (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
+    if(buffer_mach == 0)
+      purb->transfer_buffer = NULL;
+    purb->complete = __end_usb_io_op;
+    purb->transfer_buffer_length = req->length;
+    purb->transfer_flags = req->transfer_flags;
+
+    purb->pipe = 0;
+    purb->pipe |= req->direction << 7;
+    purb->pipe |= port->dev->devnum << 8;
+    purb->pipe |= req->speed << 26;
+    purb->pipe |= req->pipe_type << 30;
+    purb->pipe |= req->endpoint << 15;
+
+    purb->number_of_packets = req->num_iso;
+
+    if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
+        goto urb_error;
+
+    /* Make sure there's always some kind of timeout. */
+    purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
+                    :  1000;
+
+    purb->setup_packet = setup;
+
+    if ( req->pipe_type == 0 ) /* ISO */
+    {
+        int j;
+        usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
+
+        /* If we're dealing with an iso pipe, we need to copy in a schedule. */
+        for ( j = 0; j < purb->number_of_packets; j++ )
+        {
+            purb->iso_frame_desc[j].length = iso_sched[j].length;
+            purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
+            iso_sched[j].status = 0;
+        }
+    }
+
+    if ( check_iso_schedule(purb) != 0 )
+        goto urb_error;
+
+    if ( usb_submit_urb(purb) != 0 )
+        goto urb_error;
+
+    return;
+
+ urb_error:
+    dump_urb(purb);    
+    usbif_put(up);
+    free_pending(pending_idx);
+
+ bad_descriptor:
+    kfree ( setup );
+    if ( purb != NULL )
+        usb_free_urb(purb);
+    make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+    return;
+    
+ no_mem:
+    if ( setup != NULL )
+        kfree(setup);
+    make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
+    return;
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(usbif_priv_t *up, unsigned long id,
+                          unsigned short op, int st, int inband,
+                         unsigned long length)
+{
+    usbif_response_t *resp;
+    unsigned long     flags;
+    usbif_back_ring_t *usb_ring = &up->usb_ring;
+
+    /* Place on the response ring for the relevant domain. */ 
+    spin_lock_irqsave(&up->usb_ring_lock, flags);
+    resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
+    resp->id        = id;
+    resp->operation = op;
+    resp->status    = st;
+    resp->data      = inband;
+    resp->length = length;
+    wmb(); /* Ensure other side can see the response fields. */
+
+    dump_response(resp);
+
+    usb_ring->rsp_prod_pvt++;
+    RING_PUSH_RESPONSES(usb_ring);
+    spin_unlock_irqrestore(&up->usb_ring_lock, flags);
+
+    /* Kick the relevant domain. */
+    notify_via_evtchn(up->evtchn);
+}
+
+/**
+ * usbif_claim_port - claim devices on a port on behalf of guest
+ *
+ * Once completed, this will ensure that any device attached to that
+ * port is claimed by this driver for use by the guest.
+ */
+int usbif_claim_port(usbif_be_claim_port_t *msg)
+{
+    owned_port_t *o_p;
+    
+    /* Sanity... */
+    if ( usbif_find_port(msg->path) != NULL )
+    {
+        printk(KERN_WARNING "usbback: Attempted to claim USB port "
+               "we already own!\n");
+        return -EINVAL;
+    }
+
+    /* No need for a slab cache - this should be infrequent. */
+    o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
+
+    if ( o_p == NULL )
+        return -ENOMEM;
+
+    o_p->enabled = 0;
+    o_p->usbif_priv = usbif_find(msg->domid);
+    o_p->guest_port = msg->usbif_port;
+    o_p->dev_present = 0;
+    o_p->guest_address = 0; /* Default address. */
+
+    strcpy(o_p->path, msg->path);
+
+    spin_lock_irq(&owned_ports_lock);
+    
+    list_add(&o_p->list, &owned_ports);
+
+    spin_unlock_irq(&owned_ports_lock);
+
+    printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
+          msg->domid, msg->usbif_port);
+
+    /* Force a reprobe for unclaimed devices. */
+    usb_scan_devices();
+
+    return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
+{
+    unsigned long flags;
+    struct list_head *port;
+
+    /* I'm assuming this is not called from IRQ context - correct?  I think
+     * it's probably only called in response to control messages or plug events
+     * in the USB hub kernel thread, so should be OK. */
+    spin_lock_irqsave(&owned_ports_lock, flags);
+    list_for_each(port, &owned_ports)
+    {
+        owned_port_t *p = list_entry(port, owned_port_t, list);
+        if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled )
+         {
+              dump_port(p);
+
+             spin_unlock_irqrestore(&owned_ports_lock, flags);
+              return p;
+         }
+    }
+    spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+    return NULL;    
+}
+
+owned_port_t *__usbif_find_port(char *path)
+{
+    struct list_head *port;
+
+    list_for_each(port, &owned_ports)
+    {
+        owned_port_t *p = list_entry(port, owned_port_t, list);
+        if(!strcmp(path, p->path))
+        {
+            return p;
+        }
+    }
+
+    return NULL;
+}
+
+owned_port_t *usbif_find_port(char *path)
+{
+    owned_port_t *ret;
+    unsigned long flags;
+
+    spin_lock_irqsave(&owned_ports_lock, flags);
+    ret = __usbif_find_port(path);    
+    spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+    return ret;
+}
+
+
+static void *probe(struct usb_device *dev, unsigned iface,
+                   const struct usb_device_id *id)
+{
+    owned_port_t *p;
+
+    /* We don't care what the device is - if we own the port, we want it.  We
+     * don't deal with device-specifics in this driver, so we don't care what
+     * the device actually is ;-) */
+    if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
+    {
+        printk(KERN_INFO "usbback: claimed device attached to owned port\n");
+
+        p->dev_present = 1;
+        p->dev = dev;
+        set_bit(iface, &p->ifaces);
+        
+        return p->usbif_priv;
+    }
+    else
+        printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n",
+              dev->devpath);
+   
+
+    return NULL;
+}
+
+static void disconnect(struct usb_device *dev, void *usbif)
+{
+    /* Note the device is removed so we can tell the guest when it probes. */
+    owned_port_t *port = usbif_find_port(dev->devpath);
+    port->dev_present = 0;
+    port->dev = NULL;
+    port->ifaces = 0;
+}
+
+
+struct usb_driver driver =
+{
+    .owner      = THIS_MODULE,
+    .name       = "Xen USB Backend",
+    .probe      = probe,
+    .disconnect = disconnect,
+    .id_table   = NULL,
+};
+
+/* __usbif_release_port - internal mechanics for releasing a port */
+void __usbif_release_port(owned_port_t *p)
+{
+    int i;
+
+    for ( i = 0; p->ifaces != 0; i++)
+        if ( p->ifaces & 1 << i )
+        {
+            usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
+            clear_bit(i, &p->ifaces);
+        }
+    list_del(&p->list);
+
+    /* Reset the real device.  We don't simulate disconnect / probe for other
+     * drivers in this kernel because we assume the device is completely under
+     * the control of ourselves (i.e. the guest!).  This should ensure that the
+     * device is in a sane state for the next customer ;-) */
+
+    /* MAW NB: we're not resetting the real device here.  This looks perfectly
+     * valid to me but it causes memory corruption.  We seem to get away with not
+     * resetting for now, although it'd be nice to have this tracked down. */
+/*     if ( p->dev != NULL) */
+/*         usb_reset_device(p->dev); */
+
+    kfree(p);
+}
+
+
+/**
+ * usbif_release_port - stop claiming devices on a port on behalf of guest
+ */
+void usbif_release_port(usbif_be_release_port_t *msg)
+{
+    owned_port_t *p;
+
+    spin_lock_irq(&owned_ports_lock);
+    p = __usbif_find_port(msg->path);
+    __usbif_release_port(p);
+    spin_unlock_irq(&owned_ports_lock);
+}
+
+void usbif_release_ports(usbif_priv_t *up)
+{
+    struct list_head *port, *tmp;
+    unsigned long flags;
+    
+    spin_lock_irqsave(&owned_ports_lock, flags);
+    list_for_each_safe(port, tmp, &owned_ports)
+    {
+        owned_port_t *p = list_entry(port, owned_port_t, list);
+        if ( p->usbif_priv == up )
+            __usbif_release_port(p);
+    }
+    spin_unlock_irqrestore(&owned_ports_lock, flags);
+}
+
+static int __init usbif_init(void)
+{
+    int i;
+
+    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
+         !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+        return 0;
+    
+    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+        BUG();
+
+    pending_cons = 0;
+    pending_prod = MAX_PENDING_REQS;
+    memset(pending_reqs, 0, sizeof(pending_reqs));
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+
+    spin_lock_init(&pend_prod_lock);
+
+    spin_lock_init(&owned_ports_lock);
+    INIT_LIST_HEAD(&owned_ports);
+
+    spin_lock_init(&usbio_schedule_list_lock);
+    INIT_LIST_HEAD(&usbio_schedule_list);
+
+    if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+        BUG();
+    
+    usbif_interface_init();
+
+    usbif_ctrlif_init();
+
+    usb_register(&driver);
+
+    printk(KERN_INFO "Xen USB Backend Initialised");
+
+    return 0;
+}
+
+__initcall(usbif_init);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
new file mode 100644 (file)
index 0000000..46cca30
--- /dev/null
@@ -0,0 +1,1664 @@
+/*
+ * Xen Virtual USB Frontend Driver 
+ *
+ * This file contains the first version of the Xen virtual USB hub
+ * that I've managed not to delete by mistake (3rd time lucky!).
+ *
+ * Based on Linux's uhci.c, original copyright notices are displayed
+ * below.  Portions also (c) 2004 Intel Research Cambridge
+ * and (c) 2004, 2005 Mark Williamson
+ *
+ * Contact <mark.williamson@cl.cam.ac.uk> or
+ * <xen-devel@lists.sourceforge.net> regarding this code.
+ *
+ * Still to be (maybe) implemented:
+ * - migration / backend restart support?
+ * - support for building / using as a module
+ */
+
+/*
+ * Universal Host Controller Interface driver for USB.
+ *
+ * Maintainer: Johannes Erdfelt <johannes@erdfelt.com>
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com
+ * (C) Copyright 1999 Randy Dunlap
+ * (C) Copyright 1999 Georg Acher, acher@in.tum.de
+ * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de
+ * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch
+ * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at
+ * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
+ *               support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
+ * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
+ *
+ * Intel documents this fairly well, and as far as I know there
+ * are no royalties or anything like that, but even so there are
+ * people who decided that they want to do the same thing in a
+ * completely different way.
+ *
+ * WARNING! The USB documentation is downright evil. Most of it
+ * is just crap, written by a committee. You're better off ignoring
+ * most of it, the important stuff is:
+ *  - the low-level protocol (fairly simple but lots of small details)
+ *  - working around the horridness of the rest
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#ifdef CONFIG_USB_DEBUG
+#define DEBUG
+#else
+#undef DEBUG
+#endif
+#include <linux/usb.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+
+#include "xhci.h"
+
+#include "../../../../../drivers/usb/hcd.h"
+
+#include <asm-xen/xen-public/io/usbif.h>
+#include <asm/ctrl_if.h>
+#include <asm/xen-public/io/domain_controller.h>
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v1.0"
+#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
+                      "Randy Dunlap, Georg Acher, Deti Fliegl, " \
+                      "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
+#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
+
+/*
+ * debug = 0, no debugging messages
+ * debug = 1, dump failed URB's except for stalls
+ * debug = 2, dump all failed URB's (including stalls)
+ */
+#ifdef DEBUG
+static int debug = 1;
+#else
+static int debug = 0;
+#endif
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "Debug level");
+static char *errbuf;
+#define ERRBUF_LEN    (PAGE_SIZE * 8)
+
+static int rh_submit_urb(struct urb *urb);
+static int rh_unlink_urb(struct urb *urb);
+static int xhci_unlink_urb(struct urb *urb);
+static void xhci_call_completion(struct urb *urb);
+static void xhci_drain_ring(void);
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
+static void xhci_finish_completion(void);
+
+#define MAX_URB_LOOP   2048            /* Maximum number of linked URB's */
+
+static kmem_cache_t *xhci_up_cachep;   /* urb_priv cache */
+static struct xhci *xhci;               /* XHCI structure for the interface */
+
+/******************************************************************************
+ * DEBUGGING
+ */
+
+#ifdef DEBUG
+
+static void dump_urb(struct urb *urb)
+{
+    printk(KERN_DEBUG "dumping urb @ %p\n"
+           "  hcpriv = %p\n"
+           "  next = %p\n"
+           "  dev = %p\n"
+           "  pipe = 0x%lx\n"
+           "  status = %d\n"
+           "  transfer_flags = 0x%lx\n"
+           "  transfer_buffer = %p\n"
+           "  transfer_buffer_length = %d\n"
+           "  actual_length = %d\n"
+           "  bandwidth = %d\n"
+           "  setup_packet = %p\n",
+           urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
+           urb->transfer_flags, urb->transfer_buffer,
+           urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
+           urb->setup_packet);
+    if ( urb->setup_packet != NULL )
+        printk(KERN_DEBUG
+               "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
+               urb->setup_packet[0], urb->setup_packet[1],
+               urb->setup_packet[2], urb->setup_packet[3],
+               urb->setup_packet[4], urb->setup_packet[5],
+               urb->setup_packet[6], urb->setup_packet[7]);
+    printk(KERN_DEBUG "complete = %p\n"
+           "interval = %d\n", urb->complete, urb->interval);
+        
+}
+
+static void xhci_show_resp(usbif_response_t *r)
+{
+        printk(KERN_DEBUG "dumping response @ %p\n"
+               "  id=0x%lx\n"
+               "  op=0x%x\n"
+               "  data=0x%x\n"
+               "  status=0x%x\n"
+               "  length=0x%lx\n",
+               r->id, r->operation, r->data, r->status, r->length);
+}
+
+#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
+
+#else /* DEBUG */
+
+#define dump_urb(blah) ((void)0)
+#define xhci_show_resp(blah) ((void)0)
+#define DPRINTK(blah,...) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************************
+ * RING REQUEST HANDLING
+ */
+
+/**
+ * xhci_construct_isoc - add isochronous information to a request
+ */
+static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
+{
+        usbif_iso_t *schedule;
+        int i;
+        struct urb_priv *urb_priv = urb->hcpriv;
+        
+        req->num_iso = urb->number_of_packets;
+        schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
+
+        if ( schedule == NULL )
+            return -ENOMEM;
+
+        for ( i = 0; i < req->num_iso; i++ )
+        {
+                schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
+                schedule[i].length = urb->iso_frame_desc[i].length;
+        }
+
+        urb_priv->schedule = schedule;
+       req->iso_schedule = virt_to_machine(schedule);
+
+        return 0;
+}
+
+/**
+ * xhci_queue_req - construct and queue request for an URB
+ */
+static int xhci_queue_req(struct urb *urb)
+{
+        usbif_request_t *req;
+        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+        printk(KERN_DEBUG
+               "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n",
+               usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+               usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+        
+
+        if ( RING_FULL(usb_ring) )
+        {
+                printk(KERN_WARNING
+                       "xhci_queue_req(): USB ring full, not queuing request\n");
+                return -ENOBUFS;
+        }
+
+        /* Stick something in the shared communications ring. */
+       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+        req->operation       = USBIF_OP_IO;
+        req->port            = 0; /* We don't care what the port is. */
+        req->id              = (unsigned long) urb->hcpriv;
+        req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+       req->devnum          = usb_pipedevice(urb->pipe);
+        req->direction       = usb_pipein(urb->pipe);
+       req->speed           = usb_pipeslow(urb->pipe);
+        req->pipe_type       = usb_pipetype(urb->pipe);
+        req->length          = urb->transfer_buffer_length;
+        req->transfer_flags  = urb->transfer_flags;
+       req->endpoint        = usb_pipeendpoint(urb->pipe);
+       req->speed           = usb_pipeslow(urb->pipe);
+       req->timeout         = urb->timeout * (1000 / HZ);
+
+        if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+        {
+            int ret = xhci_construct_isoc(req, urb);
+            if ( ret != 0 )
+                return ret;
+        }
+
+       if(urb->setup_packet != NULL)
+                memcpy(req->setup, urb->setup_packet, 8);
+        else
+                memset(req->setup, 0, 8);
+        
+        usb_ring->req_prod_pvt++;
+        RING_PUSH_REQUESTS(usb_ring);
+
+       notify_via_evtchn(xhci->evtchn);
+
+        DPRINTK("Queued request for an URB.\n");
+        dump_urb(urb);
+
+        return -EINPROGRESS;
+}
+
+/**
+ * xhci_queue_probe - queue a probe request for a particular port
+ */
+static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
+{
+        usbif_request_t *req;
+        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+       printk(KERN_DEBUG
+               "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
+               "resp_cons = %d\n", usbif->req_prod,
+               virt_to_machine(&usbif->req_prod),
+              usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+        
+        if ( RING_FULL(usb_ring) )
+        {
+                printk(KERN_WARNING
+                       "xhci_queue_probe(): ring full, not queuing request\n");
+                return NULL;
+        }
+
+        /* Stick something in the shared communications ring. */
+        req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+        memset(req, sizeof(*req), 0);
+
+        req->operation       = USBIF_OP_PROBE;
+        req->port            = port;
+
+        usb_ring->req_prod_pvt++;
+        RING_PUSH_REQUESTS(usb_ring);
+
+       notify_via_evtchn(xhci->evtchn);
+
+        return req;
+}
+
+/**
+ * xhci_port_reset - queue a reset request for a particular port
+ */
+static int xhci_port_reset(usbif_vdev_t port)
+{
+        usbif_request_t *req;
+        usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+        /* We only reset one port at a time, so we only need one variable per
+         * hub. */
+        xhci->awaiting_reset = 1;
+        
+        /* Stick something in the shared communications ring. */
+       req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+        memset(req, sizeof(*req), 0);
+
+        req->operation       = USBIF_OP_RESET;
+        req->port            = port;
+        
+        usb_ring->req_prod_pvt++;
+       RING_PUSH_REQUESTS(usb_ring);
+
+       notify_via_evtchn(xhci->evtchn);
+
+        while ( xhci->awaiting_reset > 0 )
+        {
+                mdelay(1);
+                xhci_drain_ring();
+        }
+
+       xhci->rh.ports[port].pe = 1;
+       xhci->rh.ports[port].pe_chg = 1;
+
+        return xhci->awaiting_reset;
+}
+
+
+/******************************************************************************
+ * RING RESPONSE HANDLING
+ */
+
+static void receive_usb_reset(usbif_response_t *resp)
+{
+    xhci->awaiting_reset = resp->status;
+    rmb();
+    
+}
+
+static void receive_usb_probe(usbif_response_t *resp)
+{
+    spin_lock(&xhci->rh.port_state_lock);
+
+    if ( resp->status > 0 )
+    {
+        if ( resp->status == 1 )
+        {
+            /* If theres a device there and there wasn't one before there must
+             * have been a connection status change. */
+            if( xhci->rh.ports[resp->data].cs == 0 )
+           {
+                xhci->rh.ports[resp->data].cs = 1;
+                xhci->rh.ports[resp->data].ccs = 1;
+                xhci->rh.ports[resp->data].cs_chg = 1;
+           }
+        }
+        else
+            printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
+                   "for port %d\n", resp->status, resp->data);
+    }
+    else if ( resp->status < 0)
+        printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
+               resp->status);
+
+    spin_unlock(&xhci->rh.port_state_lock);
+}
+
+static void receive_usb_io(usbif_response_t *resp)
+{
+        struct urb_priv *urbp = (struct urb_priv *)resp->id;
+        struct urb *urb = urbp->urb;
+
+        urb->actual_length = resp->length;
+        urbp->in_progress = 0;
+
+        if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+        {
+                int i;
+              
+                /* Copy ISO schedule results back in. */
+                for ( i = 0; i < urb->number_of_packets; i++ )
+                {
+                        urb->iso_frame_desc[i].status
+                                = urbp->schedule[i].status;
+                        urb->iso_frame_desc[i].actual_length
+                                = urbp->schedule[i].length;
+                }
+                free_page((unsigned long)urbp->schedule);
+        }
+
+        /* Only set status if it's not been changed since submission.  It might
+         * have been changed if the URB has been unlinked asynchronously, for
+         * instance. */
+       if ( urb->status == -EINPROGRESS )
+                urbp->status = urb->status = resp->status;
+}
+
+/**
+ * xhci_drain_ring - drain responses from the ring, calling handlers
+ *
+ * This may be called from interrupt context when an event is received from the
+ * backend domain, or sometimes in process context whilst waiting for a port
+ * reset or URB completion.
+ */
+static void xhci_drain_ring(void)
+{
+       struct list_head *tmp, *head;
+       usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+       usbif_response_t *resp;
+        RING_IDX i, rp;
+
+        /* Walk the ring here to get responses, updating URBs to show what
+         * completed. */
+        
+        rp = usb_ring->sring->rsp_prod;
+        rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+        /* Take items off the comms ring, taking care not to overflow. */
+        for ( i = usb_ring->rsp_cons; i != rp; i++ )
+        {
+            resp = RING_GET_RESPONSE(usb_ring, i);
+            
+            /* May need to deal with batching and with putting a ceiling on
+               the number dispatched for performance and anti-dos reasons */
+
+            xhci_show_resp(resp);
+
+            switch ( resp->operation )
+            {
+            case USBIF_OP_PROBE:
+                receive_usb_probe(resp);
+                break;
+                
+            case USBIF_OP_IO:
+                receive_usb_io(resp);
+                break;
+
+            case USBIF_OP_RESET:
+                receive_usb_reset(resp);
+                break;
+
+            default:
+                printk(KERN_WARNING
+                       "error: unknown USB io operation response [%d]\n",
+                       resp->operation);
+                break;
+            }
+        }
+
+        usb_ring->rsp_cons = i;
+
+       /* Walk the list of pending URB's to see which ones completed and do
+         * callbacks, etc. */
+       spin_lock(&xhci->urb_list_lock);
+       head = &xhci->urb_list;
+       tmp = head->next;
+       while (tmp != head) {
+               struct urb *urb = list_entry(tmp, struct urb, urb_list);
+
+               tmp = tmp->next;
+
+               /* Checks the status and does all of the magic necessary */
+               xhci_transfer_result(xhci, urb);
+       }
+       spin_unlock(&xhci->urb_list_lock);
+
+       xhci_finish_completion();
+}
+
+
+static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
+{
+        xhci_drain_ring();
+}
+
+/******************************************************************************
+ * HOST CONTROLLER FUNCTIONALITY
+ */
+
+/**
+ * no-op implementation of private device alloc / free routines
+ */
+static int xhci_do_nothing_dev(struct usb_device *dev)
+{
+       return 0;
+}
+
+static inline void xhci_add_complete(struct urb *urb)
+{
+       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xhci->complete_list_lock, flags);
+       list_add_tail(&urbp->complete_list, &xhci->complete_list);
+       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+/* When this returns, the owner of the URB may free its
+ * storage.
+ *
+ * We spin and wait for the URB to complete before returning.
+ *
+ * Call with urb->lock acquired.
+ */
+static void xhci_delete_urb(struct urb *urb)
+{
+        struct urb_priv *urbp;
+
+       urbp = urb->hcpriv;
+
+        /* If there's no urb_priv structure for this URB then it can't have
+         * been submitted at all. */
+       if ( urbp == NULL )
+               return;
+
+       /* For now we just spin until the URB completes.  It shouldn't take too
+         * long and we don't expect to have to do this very often. */
+       while ( urb->status == -EINPROGRESS )
+        {
+            xhci_drain_ring();
+            mdelay(1);
+        }
+
+       /* Now we know that further transfers to the buffer won't
+        * occur, so we can safely return. */
+}
+
+static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
+{
+       struct urb_priv *urbp;
+
+       urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
+       if (!urbp) {
+               err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n");
+               return NULL;
+       }
+
+       memset((void *)urbp, 0, sizeof(*urbp));
+
+       urbp->inserttime = jiffies;
+       urbp->urb = urb;
+       urbp->dev = urb->dev;
+       
+       INIT_LIST_HEAD(&urbp->complete_list);
+
+       urb->hcpriv = urbp;
+
+       return urbp;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+/* When is this called?  Do we need to stop the transfer (as we
+ * currently do)? */
+static void xhci_destroy_urb_priv(struct urb *urb)
+{
+    struct urb_priv *urbp;
+    
+    urbp = (struct urb_priv *)urb->hcpriv;
+    if (!urbp)
+        return;
+
+    if (!list_empty(&urb->urb_list))
+        warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
+    
+    if (!list_empty(&urbp->complete_list))
+        warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb);
+    
+    kmem_cache_free(xhci_up_cachep, urb->hcpriv);
+
+    urb->hcpriv = NULL;
+}
+
+/**
+ * Try to find URBs in progress on the same pipe to the same device.
+ *
+ * MUST be called with xhci->urb_list_lock acquired
+ */
+static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
+{
+       struct list_head *tmp, *head;
+
+       /* We don't match Isoc transfers since they are special */
+       if (usb_pipeisoc(urb->pipe))
+               return NULL;
+
+       head = &xhci->urb_list;
+       tmp = head->next;
+       while (tmp != head) {
+               struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+               tmp = tmp->next;
+
+               if (u->dev == urb->dev && u->pipe == urb->pipe &&
+                   u->status == -EINPROGRESS)
+                       return u;
+       }
+
+       return NULL;
+}
+
+static int xhci_submit_urb(struct urb *urb)
+{
+       int ret = -EINVAL;
+       unsigned long flags;
+       struct urb *eurb;
+       int bustime;
+
+        DPRINTK("URB submitted to XHCI driver.\n");
+        dump_urb(urb);
+
+       if (!urb)
+               return -EINVAL;
+
+       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
+               warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb);
+               return -ENODEV;
+       }
+
+        if ( urb->dev->devpath == NULL )
+                BUG();
+
+       usb_inc_dev_use(urb->dev);
+
+       spin_lock_irqsave(&xhci->urb_list_lock, flags);
+       spin_lock(&urb->lock);
+
+       if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
+           urb->status == -ECONNABORTED) {
+               dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status);
+               /* Since we can have problems on the out path */
+               spin_unlock(&urb->lock);
+               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+               usb_dec_dev_use(urb->dev);
+
+               return ret;
+       }
+
+       INIT_LIST_HEAD(&urb->urb_list);
+       if (!xhci_alloc_urb_priv(urb)) {
+               ret = -ENOMEM;
+
+               goto out;
+       }
+
+        ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
+
+       eurb = xhci_find_urb_ep(xhci, urb);
+       if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
+               ret = -ENXIO;
+
+               goto out;
+       }
+
+       /* Short circuit the virtual root hub */
+       if (urb->dev == xhci->rh.dev) {
+               ret = rh_submit_urb(urb);
+
+               goto out;
+       }
+
+       switch (usb_pipetype(urb->pipe)) {
+       case PIPE_CONTROL:
+       case PIPE_BULK:
+               ret = xhci_queue_req(urb);
+               break;
+
+       case PIPE_INTERRUPT:
+               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
+                       bustime = usb_check_bandwidth(urb->dev, urb);
+                       if (bustime < 0)
+                               ret = bustime;
+                       else {
+                               ret = xhci_queue_req(urb);
+                               if (ret == -EINPROGRESS)
+                                       usb_claim_bandwidth(urb->dev, urb,
+                                                            bustime, 0);
+                       }
+               } else          /* bandwidth is already set */
+                       ret = xhci_queue_req(urb);
+               break;
+
+       case PIPE_ISOCHRONOUS:
+               if (urb->bandwidth == 0) {      /* not yet checked/allocated */
+                       if (urb->number_of_packets <= 0) {
+                               ret = -EINVAL;
+                               break;
+                       }
+                       bustime = usb_check_bandwidth(urb->dev, urb);
+                       if (bustime < 0) {
+                               ret = bustime;
+                               break;
+                       }
+
+                       ret = xhci_queue_req(urb);
+                       if (ret == -EINPROGRESS)
+                               usb_claim_bandwidth(urb->dev, urb, bustime, 1);
+               } else          /* bandwidth is already set */
+                       ret = xhci_queue_req(urb);
+               break;
+       }
+out:
+       urb->status = ret;
+
+       if (ret == -EINPROGRESS) {
+               /* We use _tail to make find_urb_ep more efficient */
+               list_add_tail(&urb->urb_list, &xhci->urb_list);
+
+               spin_unlock(&urb->lock);
+               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+               return 0;
+       }
+
+       xhci_delete_urb(urb);
+
+       spin_unlock(&urb->lock);
+       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+       /* Only call completion if it was successful */
+       if (!ret)
+               xhci_call_completion(urb);
+
+       return ret;
+}
+
+/*
+ * Return the result of a transfer
+ *
+ * MUST be called with urb_list_lock acquired
+ */
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
+{
+       int ret = 0;
+       unsigned long flags;
+       struct urb_priv *urbp;
+
+       /* The root hub is special */
+       if (urb->dev == xhci->rh.dev)
+               return;
+
+       spin_lock_irqsave(&urb->lock, flags);
+
+       urbp = (struct urb_priv *)urb->hcpriv;
+
+        if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
+                ret = -EINPROGRESS;
+
+        if (urb->actual_length < urb->transfer_buffer_length) {
+                if (urb->transfer_flags & USB_DISABLE_SPD) {
+                        ret = -EREMOTEIO;
+                }
+        }
+
+       if (urb->status == -EPIPE)
+        {
+                ret = urb->status;
+               /* endpoint has stalled - mark it halted */
+               usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
+                                  usb_pipeout(urb->pipe));
+        }
+
+       if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
+            (ret != 0 && debug > 1)) {
+               /* Some debugging code */
+               dbg("xhci_result_interrupt/bulk() failed with status %x",
+                       status);
+       }
+
+       if (ret == -EINPROGRESS)
+               goto out;
+
+       switch (usb_pipetype(urb->pipe)) {
+       case PIPE_CONTROL:
+       case PIPE_BULK:
+       case PIPE_ISOCHRONOUS:
+               /* Release bandwidth for Interrupt or Isoc. transfers */
+               /* Spinlock needed ? */
+               if (urb->bandwidth)
+                       usb_release_bandwidth(urb->dev, urb, 1);
+               xhci_delete_urb(urb);
+               break;
+       case PIPE_INTERRUPT:
+               /* Interrupts are an exception */
+               if (urb->interval)
+                       goto out_complete;
+
+               /* Release bandwidth for Interrupt or Isoc. transfers */
+               /* Spinlock needed ? */
+               if (urb->bandwidth)
+                       usb_release_bandwidth(urb->dev, urb, 0);
+               xhci_delete_urb(urb);
+               break;
+       default:
+               info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
+                     usb_pipetype(urb->pipe), urb);
+       }
+
+       /* Remove it from xhci->urb_list */
+       list_del_init(&urb->urb_list);
+
+out_complete:
+       xhci_add_complete(urb);
+
+out:
+       spin_unlock_irqrestore(&urb->lock, flags);
+}
+
+static int xhci_unlink_urb(struct urb *urb)
+{
+       unsigned long flags;
+       struct urb_priv *urbp = urb->hcpriv;
+
+       if (!urb)
+               return -EINVAL;
+
+       if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
+               return -ENODEV;
+
+       spin_lock_irqsave(&xhci->urb_list_lock, flags);
+       spin_lock(&urb->lock);
+
+       /* Release bandwidth for Interrupt or Isoc. transfers */
+       /* Spinlock needed ? */
+       if (urb->bandwidth) {
+               switch (usb_pipetype(urb->pipe)) {
+               case PIPE_INTERRUPT:
+                       usb_release_bandwidth(urb->dev, urb, 0);
+                       break;
+               case PIPE_ISOCHRONOUS:
+                       usb_release_bandwidth(urb->dev, urb, 1);
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (urb->status != -EINPROGRESS) {
+               spin_unlock(&urb->lock);
+               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+               return 0;
+       }
+
+       list_del_init(&urb->urb_list);
+
+       /* Short circuit the virtual root hub */
+       if (urb->dev == xhci->rh.dev) {
+               rh_unlink_urb(urb);
+
+               spin_unlock(&urb->lock);
+               spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+               xhci_call_completion(urb);
+       } else {
+               if (urb->transfer_flags & USB_ASYNC_UNLINK) {
+                        /* We currently don't currently attempt to cancel URBs
+                         * that have been queued in the ring.  We handle async
+                         * unlinked URBs when they complete. */
+                       urbp->status = urb->status = -ECONNABORTED;
+                       spin_unlock(&urb->lock);
+                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+               } else {
+                       urb->status = -ENOENT;
+
+                       spin_unlock(&urb->lock);
+                       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+                       if (in_interrupt()) {   /* wait at least 1 frame */
+                               static int errorcount = 10;
+
+                               if (errorcount--)
+                                       dbg("xhci_unlink_urb called from interrupt for urb %p", urb);
+                               udelay(1000);
+                       } else
+                               schedule_timeout(1+1*HZ/1000); 
+
+                        xhci_delete_urb(urb);
+
+                       xhci_call_completion(urb);
+               }
+       }
+
+       return 0;
+}
+
+static void xhci_call_completion(struct urb *urb)
+{
+       struct urb_priv *urbp;
+       struct usb_device *dev = urb->dev;
+       int is_ring = 0, killed, resubmit_interrupt, status;
+       struct urb *nurb;
+       unsigned long flags;
+
+       spin_lock_irqsave(&urb->lock, flags);
+
+       urbp = (struct urb_priv *)urb->hcpriv;
+       if (!urbp || !urb->dev) {
+               spin_unlock_irqrestore(&urb->lock, flags);
+               return;
+       }
+
+       killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
+                       urb->status == -ECONNRESET);
+       resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
+                       urb->interval);
+
+       nurb = urb->next;
+       if (nurb && !killed) {
+               int count = 0;
+
+               while (nurb && nurb != urb && count < MAX_URB_LOOP) {
+                       if (nurb->status == -ENOENT ||
+                           nurb->status == -ECONNABORTED ||
+                           nurb->status == -ECONNRESET) {
+                               killed = 1;
+                               break;
+                       }
+
+                       nurb = nurb->next;
+                       count++;
+               }
+
+               if (count == MAX_URB_LOOP)
+                       err("xhci_call_completion: too many linked URB's, loop? (first loop)");
+
+               /* Check to see if chain is a ring */
+               is_ring = (nurb == urb);
+       }
+
+       status = urbp->status;
+       if (!resubmit_interrupt || killed)
+               /* We don't need urb_priv anymore */
+               xhci_destroy_urb_priv(urb);
+
+       if (!killed)
+               urb->status = status;
+
+       spin_unlock_irqrestore(&urb->lock, flags);
+
+       if (urb->complete)
+               urb->complete(urb);
+
+       if (resubmit_interrupt)
+               /* Recheck the status. The completion handler may have */
+               /*  unlinked the resubmitting interrupt URB */
+               killed = (urb->status == -ENOENT ||
+                         urb->status == -ECONNABORTED ||
+                         urb->status == -ECONNRESET);
+
+       if (resubmit_interrupt && !killed) {
+                if ( urb->dev != xhci->rh.dev )
+                        xhci_queue_req(urb); /* XXX What if this fails? */
+                /* Don't need to resubmit URBs for the virtual root dev. */
+       } else {
+               if (is_ring && !killed) {
+                       urb->dev = dev;
+                       xhci_submit_urb(urb);
+               } else {
+                       /* We decrement the usage count after we're done */
+                       /*  with everything */
+                       usb_dec_dev_use(dev);
+               }
+       }
+}
+
+static void xhci_finish_completion(void)
+{
+       struct list_head *tmp, *head;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xhci->complete_list_lock, flags);
+       head = &xhci->complete_list;
+       tmp = head->next;
+       while (tmp != head) {
+               struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
+                                                   complete_list);
+               struct urb *urb = urbp->urb;
+
+               list_del_init(&urbp->complete_list);
+               spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+
+               xhci_call_completion(urb);
+
+               spin_lock_irqsave(&xhci->complete_list_lock, flags);
+               head = &xhci->complete_list;
+               tmp = head->next;
+       }
+       spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+static struct usb_operations xhci_device_operations = {
+       .allocate = xhci_do_nothing_dev,
+       .deallocate = xhci_do_nothing_dev,
+        /* It doesn't look like any drivers actually care what the frame number
+        * is at the moment!  If necessary, we could approximate the current
+        * frame nubmer by passing it from the backend in response messages. */
+       .get_frame_number = NULL,
+       .submit_urb = xhci_submit_urb,
+       .unlink_urb = xhci_unlink_urb
+};
+
+/******************************************************************************
+ * VIRTUAL ROOT HUB EMULATION
+ */
+
+static __u8 root_hub_dev_des[] =
+{
+       0x12,                   /*  __u8  bLength; */
+       0x01,                   /*  __u8  bDescriptorType; Device */
+       0x00,                   /*  __u16 bcdUSB; v1.0 */
+       0x01,
+       0x09,                   /*  __u8  bDeviceClass; HUB_CLASSCODE */
+       0x00,                   /*  __u8  bDeviceSubClass; */
+       0x00,                   /*  __u8  bDeviceProtocol; */
+       0x08,                   /*  __u8  bMaxPacketSize0; 8 Bytes */
+       0x00,                   /*  __u16 idVendor; */
+       0x00,
+       0x00,                   /*  __u16 idProduct; */
+       0x00,
+       0x00,                   /*  __u16 bcdDevice; */
+       0x00,
+       0x00,                   /*  __u8  iManufacturer; */
+       0x02,                   /*  __u8  iProduct; */
+       0x01,                   /*  __u8  iSerialNumber; */
+       0x01                    /*  __u8  bNumConfigurations; */
+};
+
+
+/* Configuration descriptor */
+static __u8 root_hub_config_des[] =
+{
+       0x09,                   /*  __u8  bLength; */
+       0x02,                   /*  __u8  bDescriptorType; Configuration */
+       0x19,                   /*  __u16 wTotalLength; */
+       0x00,
+       0x01,                   /*  __u8  bNumInterfaces; */
+       0x01,                   /*  __u8  bConfigurationValue; */
+       0x00,                   /*  __u8  iConfiguration; */
+       0x40,                   /*  __u8  bmAttributes;
+                                       Bit 7: Bus-powered, 6: Self-powered,
+                                       Bit 5 Remote-wakeup, 4..0: resvd */
+       0x00,                   /*  __u8  MaxPower; */
+
+       /* interface */
+       0x09,                   /*  __u8  if_bLength; */
+       0x04,                   /*  __u8  if_bDescriptorType; Interface */
+       0x00,                   /*  __u8  if_bInterfaceNumber; */
+       0x00,                   /*  __u8  if_bAlternateSetting; */
+       0x01,                   /*  __u8  if_bNumEndpoints; */
+       0x09,                   /*  __u8  if_bInterfaceClass; HUB_CLASSCODE */
+       0x00,                   /*  __u8  if_bInterfaceSubClass; */
+       0x00,                   /*  __u8  if_bInterfaceProtocol; */
+       0x00,                   /*  __u8  if_iInterface; */
+
+       /* endpoint */
+       0x07,                   /*  __u8  ep_bLength; */
+       0x05,                   /*  __u8  ep_bDescriptorType; Endpoint */
+       0x81,                   /*  __u8  ep_bEndpointAddress; IN Endpoint 1 */
+       0x03,                   /*  __u8  ep_bmAttributes; Interrupt */
+       0x08,                   /*  __u16 ep_wMaxPacketSize; 8 Bytes */
+       0x00,
+       0xff                    /*  __u8  ep_bInterval; 255 ms */
+};
+
+static __u8 root_hub_hub_des[] =
+{
+       0x09,                   /*  __u8  bLength; */
+       0x29,                   /*  __u8  bDescriptorType; Hub-descriptor */
+       0x02,                   /*  __u8  bNbrPorts; */
+       0x00,                   /* __u16  wHubCharacteristics; */
+       0x00,
+       0x01,                   /*  __u8  bPwrOn2pwrGood; 2ms */
+       0x00,                   /*  __u8  bHubContrCurrent; 0 mA */
+       0x00,                   /*  __u8  DeviceRemovable; *** 7 Ports max *** */
+       0xff                    /*  __u8  PortPwrCtrlMask; *** 7 ports max *** */
+};
+
+/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
+static int rh_send_irq(struct urb *urb)
+{
+       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+        xhci_port_t *ports = xhci->rh.ports;
+       unsigned long flags;
+       int i, len = 1;
+       __u16 data = 0;
+
+       spin_lock_irqsave(&urb->lock, flags);
+       for (i = 0; i < xhci->rh.numports; i++) {
+                /* Set a bit if anything at all has changed on the port, as per
+                * USB spec 11.12 */
+               data |= (ports[i].cs_chg || ports[i].pe_chg )
+                        ? (1 << (i + 1))
+                        : 0;
+
+               len = (i + 1) / 8 + 1;
+       }
+
+       *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
+       urb->actual_length = len;
+       urbp->status = 0;
+
+       spin_unlock_irqrestore(&urb->lock, flags);
+
+       if ((data > 0) && (xhci->rh.send != 0)) {
+               dbg("root-hub INT complete: data: %x", data);
+               xhci_call_completion(urb);
+       }
+
+       return 0;
+}
+
+/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
+static int rh_init_int_timer(struct urb *urb);
+
+static void rh_int_timer_do(unsigned long ptr)
+{
+       struct urb *urb = (struct urb *)ptr;
+       struct list_head list, *tmp, *head;
+       unsigned long flags;
+       int i;
+
+       for ( i = 0; i < xhci->rh.numports; i++)
+                xhci_queue_probe(i);
+
+       if (xhci->rh.send)
+               rh_send_irq(urb);
+
+       INIT_LIST_HEAD(&list);
+
+       spin_lock_irqsave(&xhci->urb_list_lock, flags);
+       head = &xhci->urb_list;
+       tmp = head->next;
+       while (tmp != head) {
+               struct urb *u = list_entry(tmp, struct urb, urb_list);
+               struct urb_priv *up = (struct urb_priv *)u->hcpriv;
+
+               tmp = tmp->next;
+
+               spin_lock(&u->lock);
+
+               /* Check if the URB timed out */
+               if (u->timeout && time_after_eq(jiffies,
+                                                up->inserttime + u->timeout)) {
+                       list_del(&u->urb_list);
+                       list_add_tail(&u->urb_list, &list);
+               }
+
+               spin_unlock(&u->lock);
+       }
+       spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+       head = &list;
+       tmp = head->next;
+       while (tmp != head) {
+               struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+               tmp = tmp->next;
+
+               u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
+               xhci_unlink_urb(u);
+       }
+
+       rh_init_int_timer(urb);
+}
+
+/* Root Hub INTs are polled by this timer */
+static int rh_init_int_timer(struct urb *urb)
+{
+       xhci->rh.interval = urb->interval;
+       init_timer(&xhci->rh.rh_int_timer);
+       xhci->rh.rh_int_timer.function = rh_int_timer_do;
+       xhci->rh.rh_int_timer.data = (unsigned long)urb;
+       xhci->rh.rh_int_timer.expires = jiffies
+                + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
+       add_timer(&xhci->rh.rh_int_timer);
+
+       return 0;
+}
+
+#define OK(x)                  len = (x); break
+
+/* Root Hub Control Pipe */
+static int rh_submit_urb(struct urb *urb)
+{
+       unsigned int pipe = urb->pipe;
+       struct usb_ctrlrequest *cmd =
+                (struct usb_ctrlrequest *)urb->setup_packet;
+       void *data = urb->transfer_buffer;
+       int leni = urb->transfer_buffer_length;
+       int len = 0;
+       xhci_port_t *status;
+       int stat = 0;
+       int i;
+       int retstatus;
+        unsigned long flags;
+        
+       __u16 cstatus;
+       __u16 bmRType_bReq;
+       __u16 wValue;
+       __u16 wIndex;
+       __u16 wLength;
+
+       if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
+               xhci->rh.urb = urb;
+               xhci->rh.send = 1;
+               xhci->rh.interval = urb->interval;
+               rh_init_int_timer(urb);
+
+               return -EINPROGRESS;
+       }
+
+       bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
+       wValue = le16_to_cpu(cmd->wValue);
+       wIndex = le16_to_cpu(cmd->wIndex);
+       wLength = le16_to_cpu(cmd->wLength);
+
+       for (i = 0; i < 8; i++)
+               xhci->rh.c_p_r[i] = 0;
+
+        status = &xhci->rh.ports[wIndex - 1];
+
+        spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
+
+       switch (bmRType_bReq) {
+               /* Request Destination:
+                  without flags: Device,
+                  RH_INTERFACE: interface,
+                  RH_ENDPOINT: endpoint,
+                  RH_CLASS means HUB here,
+                  RH_OTHER | RH_CLASS  almost ever means HUB_PORT here
+               */
+
+       case RH_GET_STATUS:
+               *(__u16 *)data = cpu_to_le16(1);
+               OK(2);
+       case RH_GET_STATUS | RH_INTERFACE:
+               *(__u16 *)data = cpu_to_le16(0);
+               OK(2);
+       case RH_GET_STATUS | RH_ENDPOINT:
+               *(__u16 *)data = cpu_to_le16(0);
+               OK(2);
+       case RH_GET_STATUS | RH_CLASS:
+               *(__u32 *)data = cpu_to_le32(0);
+               OK(4);          /* hub power */
+       case RH_GET_STATUS | RH_OTHER | RH_CLASS:
+               cstatus = (status->cs_chg) |
+                       (status->pe_chg << 1) |
+                       (xhci->rh.c_p_r[wIndex - 1] << 4);
+               retstatus = (status->ccs) |
+                       (status->pe << 1) |
+                       (status->susp << 2) |
+                       (status->pr << 8) |
+                       (1 << 8) |      /* power on */
+                       (status->lsda << 9);
+               *(__u16 *)data = cpu_to_le16(retstatus);
+               *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
+               OK(4);
+       case RH_CLEAR_FEATURE | RH_ENDPOINT:
+               switch (wValue) {
+               case RH_ENDPOINT_STALL:
+                       OK(0);
+               }
+               break;
+       case RH_CLEAR_FEATURE | RH_CLASS:
+               switch (wValue) {
+               case RH_C_HUB_OVER_CURRENT:
+                       OK(0);  /* hub power over current */
+               }
+               break;
+       case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
+               switch (wValue) {
+               case RH_PORT_ENABLE:
+                        status->pe     = 0;
+                       OK(0);
+               case RH_PORT_SUSPEND:
+                        status->susp   = 0;
+                       OK(0);
+               case RH_PORT_POWER:
+                       OK(0);  /* port power */
+               case RH_C_PORT_CONNECTION:
+                        status->cs_chg = 0;
+                       OK(0);
+               case RH_C_PORT_ENABLE:
+                        status->pe_chg = 0;
+                       OK(0);
+               case RH_C_PORT_SUSPEND:
+                       /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
+                       OK(0);
+               case RH_C_PORT_OVER_CURRENT:
+                       OK(0);  /* port power over current */
+               case RH_C_PORT_RESET:
+                       xhci->rh.c_p_r[wIndex - 1] = 0;
+                       OK(0);
+               }
+               break;
+       case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
+               switch (wValue) {
+               case RH_PORT_SUSPEND:
+                        status->susp = 1;      
+                       OK(0);
+               case RH_PORT_RESET:
+                {
+                        int ret;
+                        xhci->rh.c_p_r[wIndex - 1] = 1;
+                        status->pr = 0;
+                        status->pe = 1;
+                        ret = xhci_port_reset(wIndex - 1);
+                        /* XXX MAW: should probably cancel queued transfers during reset... *\/ */
+                        if ( ret == 0 ) { OK(0); }
+                        else { return ret; }
+                }
+                break;
+               case RH_PORT_POWER:
+                       OK(0); /* port power ** */
+               case RH_PORT_ENABLE:
+                        status->pe = 1;
+                       OK(0);
+               }
+               break;
+       case RH_SET_ADDRESS:
+               xhci->rh.devnum = wValue;
+               OK(0);
+       case RH_GET_DESCRIPTOR:
+               switch ((wValue & 0xff00) >> 8) {
+               case 0x01:      /* device descriptor */
+                       len = min_t(unsigned int, leni,
+                                 min_t(unsigned int,
+                                     sizeof(root_hub_dev_des), wLength));
+                       memcpy(data, root_hub_dev_des, len);
+                       OK(len);
+               case 0x02:      /* configuration descriptor */
+                       len = min_t(unsigned int, leni,
+                                 min_t(unsigned int,
+                                     sizeof(root_hub_config_des), wLength));
+                       memcpy (data, root_hub_config_des, len);
+                       OK(len);
+               case 0x03:      /* string descriptors */
+                       len = usb_root_hub_string (wValue & 0xff,
+                               0, "XHCI-alt",
+                               data, wLength);
+                       if (len > 0) {
+                               OK(min_t(int, leni, len));
+                       } else 
+                               stat = -EPIPE;
+               }
+               break;
+       case RH_GET_DESCRIPTOR | RH_CLASS:
+               root_hub_hub_des[2] = xhci->rh.numports;
+               len = min_t(unsigned int, leni,
+                         min_t(unsigned int, sizeof(root_hub_hub_des), wLength));
+               memcpy(data, root_hub_hub_des, len);
+               OK(len);
+       case RH_GET_CONFIGURATION:
+               *(__u8 *)data = 0x01;
+               OK(1);
+       case RH_SET_CONFIGURATION:
+               OK(0);
+       case RH_GET_INTERFACE | RH_INTERFACE:
+               *(__u8 *)data = 0x00;
+               OK(1);
+       case RH_SET_INTERFACE | RH_INTERFACE:
+               OK(0);
+       default:
+               stat = -EPIPE;
+       }
+
+        spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
+
+       urb->actual_length = len;
+
+       return stat;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+static int rh_unlink_urb(struct urb *urb)
+{
+       if (xhci->rh.urb == urb) {
+               urb->status = -ENOENT;
+               xhci->rh.send = 0;
+               xhci->rh.urb = NULL;
+               del_timer(&xhci->rh.rh_int_timer);
+       }
+       return 0;
+}
+
+/******************************************************************************
+ * CONTROL PLANE FUNCTIONALITY
+ */
+
+/**
+ * alloc_xhci - initialise a new virtual root hub for a new USB device channel
+ */
+static int alloc_xhci(void)
+{
+       int retval;
+       struct usb_bus *bus;
+
+       retval = -EBUSY;
+
+       xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
+       if (!xhci) {
+               err("couldn't allocate xhci structure");
+               retval = -ENOMEM;
+               goto err_alloc_xhci;
+       }
+
+       xhci->state = USBIF_STATE_CLOSED;
+
+       spin_lock_init(&xhci->urb_list_lock);
+       INIT_LIST_HEAD(&xhci->urb_list);
+
+       spin_lock_init(&xhci->complete_list_lock);
+       INIT_LIST_HEAD(&xhci->complete_list);
+
+       spin_lock_init(&xhci->frame_list_lock);
+
+       bus = usb_alloc_bus(&xhci_device_operations);
+
+       if (!bus) {
+               err("unable to allocate bus");
+               goto err_alloc_bus;
+       }
+
+       xhci->bus = bus;
+       bus->bus_name = "XHCI";
+       bus->hcpriv = xhci;
+
+       usb_register_bus(xhci->bus);
+
+       /* Initialize the root hub */
+
+       xhci->rh.numports = 0;
+
+       xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
+       if (!xhci->rh.dev) {
+               err("unable to allocate root hub");
+               goto err_alloc_root_hub;
+       }
+
+       xhci->state = 0;
+
+       return 0;
+
+/*
+ * error exits:
+ */
+err_alloc_root_hub:
+        usb_deregister_bus(xhci->bus);
+       usb_free_bus(xhci->bus);
+       xhci->bus = NULL;
+
+err_alloc_bus:
+       kfree(xhci);
+
+err_alloc_xhci:
+       return retval;
+}
+
+/**
+ * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
+ */
+static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
+{
+    ctrl_msg_t                   cmsg;
+    usbif_fe_interface_connect_t up;
+    long rc;
+    usbif_sring_t *sring;
+
+    switch ( status->status )
+    {
+    case USBIF_INTERFACE_STATUS_DESTROYED:
+        printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
+               xhci->state);
+        break;
+
+    case USBIF_INTERFACE_STATUS_DISCONNECTED:
+        if ( xhci->state != USBIF_STATE_CLOSED )
+        {
+            printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
+                   " in state %d\n", xhci->state);
+            break;
+            /* Not bothering to do recovery here for now.  Keep things
+             * simple. */
+        }
+
+        /* Move from CLOSED to DISCONNECTED state. */
+        sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
+        SHARED_RING_INIT(sring);
+        FRONT_RING_INIT(&xhci->usb_ring, sring);
+        xhci->state  = USBIF_STATE_DISCONNECTED;
+
+        /* Construct an interface-CONNECT message for the domain controller. */
+        cmsg.type      = CMSG_USBIF_FE;
+        cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
+        cmsg.length    = sizeof(usbif_fe_interface_connect_t);
+        up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+        memcpy(cmsg.msg, &up, sizeof(up));
+        
+        /* Tell the controller to bring up the interface. */
+        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+        break;
+
+    case USBIF_INTERFACE_STATUS_CONNECTED:
+        if ( xhci->state == USBIF_STATE_CLOSED )
+        {
+            printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
+                   " in state %d\n", xhci->state);
+            break;
+        }
+
+        xhci->evtchn = status->evtchn;
+        xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
+       xhci->bandwidth = status->bandwidth;
+       xhci->rh.numports = status->num_ports;
+
+        xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL);
+        memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
+
+       usb_connect(xhci->rh.dev);
+
+       if (usb_new_device(xhci->rh.dev) != 0) {
+               err("unable to start root hub");
+       }
+
+       /* Allocate the appropriate USB bandwidth here...  Need to
+         * somehow know what the total available is thought to be so we
+         * can calculate the reservation correctly. */
+       usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
+                           1000 - xhci->bandwidth, 0);
+
+        if ( (rc = request_irq(xhci->irq, xhci_interrupt, 
+                               SA_SAMPLE_RANDOM, "usbif", xhci)) )
+                printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
+
+       DPRINTK(KERN_INFO __FILE__
+                ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
+                xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
+                xhci->evtchn, xhci->irq);
+
+        xhci->state = USBIF_STATE_CONNECTED;
+        
+        break;
+
+    default:
+        printk(KERN_WARNING "Status change to unknown value %d\n", 
+               status->status);
+        break;
+    }
+}
+
+/**
+ * usbif_ctrlif_rx - demux control messages by subtype
+ */
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->subtype )
+    {
+    case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
+        if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) )
+            goto parse_error;
+        usbif_status_change((usbif_fe_interface_status_changed_t *)
+                            &msg->msg[0]);
+        break;        
+
+        /* New interface...? */
+    default:
+        goto parse_error;
+    }
+
+    ctrl_if_send_response(msg);
+    return;
+
+ parse_error:
+    msg->length = 0;
+    ctrl_if_send_response(msg);
+}
+
+
+static int __init xhci_hcd_init(void)
+{
+       int retval = -ENOMEM, i;
+        usbif_fe_interface_status_changed_t st;
+        control_msg_t cmsg;
+
+       if ( (xen_start_info.flags & SIF_INITDOMAIN)
+            || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+                return 0;
+
+       info(DRIVER_DESC " " DRIVER_VERSION);
+
+       if (debug) {
+               errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
+               if (!errbuf)
+                       goto errbuf_failed;
+       }
+
+       xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
+               sizeof(struct urb_priv), 0, 0, NULL, NULL);
+       if (!xhci_up_cachep)
+               goto up_failed;
+
+        /* Let the domain controller know we're here.  For now we wait until
+         * connection, as for the block and net drivers.  This is only strictly
+         * necessary if we're going to boot off a USB device. */
+        printk(KERN_INFO "Initialising Xen virtual USB hub\n");
+    
+        (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
+                                        CALLBACK_IN_BLOCKING_CONTEXT);
+        
+       alloc_xhci();
+
+        /* Send a driver-UP notification to the domain controller. */
+        cmsg.type      = CMSG_USBIF_FE;
+        cmsg.subtype   = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
+        cmsg.length    = sizeof(usbif_fe_driver_status_changed_t);
+        st.status      = USBIF_DRIVER_STATUS_UP;
+        memcpy(cmsg.msg, &st, sizeof(st));
+        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+        
+        /*
+         * We should read 'nr_interfaces' from response message and wait
+         * for notifications before proceeding. For now we assume that we
+         * will be notified of exactly one interface.
+         */
+        for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+        {
+            set_current_state(TASK_INTERRUPTIBLE);
+            schedule_timeout(1);
+        }
+        
+        if (xhci->state != USBIF_STATE_CONNECTED)
+            printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
+       
+       return 0;
+
+up_failed:
+       if (errbuf)
+               kfree(errbuf);
+
+errbuf_failed:
+       return retval;
+}
+
+module_init(xhci_hcd_init);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
new file mode 100644 (file)
index 0000000..f503e59
--- /dev/null
@@ -0,0 +1,180 @@
+/******************************************************************************
+ * xhci.h
+ *
+ * Private definitions for the Xen Virtual USB Controller.  Based on
+ * drivers/usb/host/uhci.h from Linux.  Copyright for the imported content is
+ * retained by the original authors.
+ *
+ * Modifications are:
+ * Copyright (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2004, 2005 Mark Williamson
+ */
+
+#ifndef __LINUX_XHCI_H
+#define __LINUX_XHCI_H
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <asm-xen/xen-public/io/usbif.h>
+#include <linux/spinlock.h>
+
+/* xhci_port_t - current known state of a virtual hub ports */
+typedef struct {
+        unsigned int cs     :1; /* Connection status.  do we really need this /and/ ccs? */
+        unsigned int cs_chg :1; /* Connection status change.  */
+        unsigned int pe     :1; /* Port enable.               */
+        unsigned int pe_chg :1; /* Port enable change.        */
+        unsigned int ccs    :1; /* Current connect status.    */
+        unsigned int susp   :1; /* Suspended.                 */
+        unsigned int lsda   :1; /* Low speed device attached. */
+        unsigned int pr     :1; /* Port reset.                */
+} xhci_port_t;
+
+/* struct virt_root_hub - state related to the virtual root hub */
+struct virt_root_hub {
+       struct usb_device *dev;
+       int devnum;             /* Address of Root Hub endpoint */
+       struct urb *urb;
+       void *int_addr;
+       int send;
+       int interval;
+       int numports;
+       int c_p_r[8];
+       struct timer_list rh_int_timer;
+        spinlock_t port_state_lock;
+        xhci_port_t *ports;
+};
+
+/* struct xhci - contains the state associated with a single USB interface */
+struct xhci {
+
+#ifdef CONFIG_PROC_FS
+       /* procfs */
+       int num;
+       struct proc_dir_entry *proc_entry;
+#endif
+
+        int evtchn;                        /* Interdom channel to backend */
+        int irq;                           /* Bound to evtchn */
+        enum { USBIF_STATE_CONNECTED    = 2,
+               USBIF_STATE_DISCONNECTED = 1,
+               USBIF_STATE_CLOSED       = 0
+        } state; /* State of this USB interface */
+        unsigned long bandwidth;
+
+       struct usb_bus *bus;
+
+       /* Main list of URB's currently controlled by this HC */
+       spinlock_t urb_list_lock;
+       struct list_head urb_list;              /* P: xhci->urb_list_lock */
+
+       /* List of URB's awaiting completion callback */
+       spinlock_t complete_list_lock;
+       struct list_head complete_list;         /* P: xhci->complete_list_lock */
+
+       struct virt_root_hub rh;        /* private data of the virtual root hub */
+
+        usbif_front_ring_t usb_ring;
+
+        int awaiting_reset;
+};
+
+/* per-URB private data structure for the host controller */
+struct urb_priv {
+       struct urb *urb;
+        usbif_iso_t *schedule;
+       struct usb_device *dev;
+
+        int in_progress : 1;           /* QH was queued (not linked in) */
+       int short_control_packet : 1;   /* If we get a short packet during */
+                                       /*  a control transfer, retrigger */
+                                       /*  the status phase */
+
+       int status;                     /* Final status */
+
+       unsigned long inserttime;       /* In jiffies */
+
+       struct list_head complete_list; /* P: xhci->complete_list_lock */
+};
+
+/*
+ * Locking in xhci.c
+ *
+ * spinlocks are used extensively to protect the many lists and data
+ * structures we have. It's not that pretty, but it's necessary. We
+ * need to be done with all of the locks (except complete_list_lock) when
+ * we call urb->complete. I've tried to make it simple enough so I don't
+ * have to spend hours racking my brain trying to figure out if the
+ * locking is safe.
+ *
+ * Here's the safe locking order to prevent deadlocks:
+ *
+ * #1 xhci->urb_list_lock
+ * #2 urb->lock
+ * #3 xhci->urb_remove_list_lock
+ * #4 xhci->complete_list_lock
+ *
+ * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
+ * at the lowest level FIRST and NEVER grab locks at the same level at the
+ * same time.
+ * 
+ * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
+ */
+
+/* -------------------------------------------------------------------------
+   Virtual Root HUB
+   ------------------------------------------------------------------------- */
+/* destination of request */
+#define RH_DEVICE              0x00
+#define RH_INTERFACE           0x01
+#define RH_ENDPOINT            0x02
+#define RH_OTHER               0x03
+
+#define RH_CLASS               0x20
+#define RH_VENDOR              0x40
+
+/* Requests: bRequest << 8 | bmRequestType */
+#define RH_GET_STATUS          0x0080
+#define RH_CLEAR_FEATURE       0x0100
+#define RH_SET_FEATURE         0x0300
+#define RH_SET_ADDRESS         0x0500
+#define RH_GET_DESCRIPTOR      0x0680
+#define RH_SET_DESCRIPTOR      0x0700
+#define RH_GET_CONFIGURATION   0x0880
+#define RH_SET_CONFIGURATION   0x0900
+#define RH_GET_STATE           0x0280
+#define RH_GET_INTERFACE       0x0A80
+#define RH_SET_INTERFACE       0x0B00
+#define RH_SYNC_FRAME          0x0C80
+/* Our Vendor Specific Request */
+#define RH_SET_EP              0x2000
+
+/* Hub port features */
+#define RH_PORT_CONNECTION     0x00
+#define RH_PORT_ENABLE         0x01
+#define RH_PORT_SUSPEND                0x02
+#define RH_PORT_OVER_CURRENT   0x03
+#define RH_PORT_RESET          0x04
+#define RH_PORT_POWER          0x08
+#define RH_PORT_LOW_SPEED      0x09
+#define RH_C_PORT_CONNECTION   0x10
+#define RH_C_PORT_ENABLE       0x11
+#define RH_C_PORT_SUSPEND      0x12
+#define RH_C_PORT_OVER_CURRENT 0x13
+#define RH_C_PORT_RESET                0x14
+
+/* Hub features */
+#define RH_C_HUB_LOCAL_POWER   0x00
+#define RH_C_HUB_OVER_CURRENT  0x01
+#define RH_DEVICE_REMOTE_WAKEUP        0x00
+#define RH_ENDPOINT_STALL      0x01
+
+/* Our Vendor Specific feature */
+#define RH_REMOVE_EP           0x00
+
+#define RH_ACK                 0x01
+#define RH_REQ_ERR             -1
+#define RH_NACK                        0x00
+
+#endif
+
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
new file mode 100644 (file)
index 0000000..421a81f
--- /dev/null
@@ -0,0 +1,59 @@
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#if 1
+       printk("smpboot_clear_io_apic_irqs\n");
+#else
+       io_apic_irqs = 0;
+#endif
+}
+
+static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+{
+#if 1
+       printk("smpboot_setup_warm_reset_vector\n");
+#else
+       CMOS_WRITE(0xa, 0xf);
+       local_flush_tlb();
+       Dprintk("1.\n");
+       *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+       Dprintk("2.\n");
+       *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+       Dprintk("3.\n");
+#endif
+}
+
+static inline void smpboot_restore_warm_reset_vector(void)
+{
+       /*
+        * Install writable page 0 entry to set BIOS data area.
+        */
+       local_flush_tlb();
+
+       /*
+        * Paranoid:  Set warm reset code and vector here back
+        * to default values.
+        */
+       CMOS_WRITE(0, 0xf);
+
+       *((volatile long *) phys_to_virt(0x467)) = 0;
+}
+
+static inline void smpboot_setup_io_apic(void)
+{
+#if 1
+       printk("smpboot_setup_io_apic\n");
+#else
+       /*
+        * Here we can be sure that there is an IO-APIC in the system. Let's
+        * go and set it up:
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+#endif
+}
+
+
+#define        smp_found_config        (HYPERVISOR_shared_info->n_vcpu > 1)
index d0acc128a17aa2c1a4076febe5e05114cd5318ac..e3dfb002c7dbf6ee7e95baeba5b37bf34ab4a56f 100644 (file)
@@ -114,7 +114,6 @@ static inline unsigned long pgd_val(pgd_t x)
        if (ret) ret = machine_to_phys(ret);
        return ret;
 }
-#define pgd_val_ma(x)   ((x).pgd)
 #define pgprot_val(x)  ((x).pgprot)
 
 static inline pte_t __pte(unsigned long x)
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
new file mode 100644 (file)
index 0000000..d7189a7
--- /dev/null
@@ -0,0 +1,250 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+asmlinkage int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+       unsigned int break_lock;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 3f\n" \
+       "2:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0,%0\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       "3:\n\t"
+
+#define spin_lock_string_flags \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "jns 4f\n\t" \
+       "2:\t" \
+       "testl $0x200, %1\n\t" \
+       "jz 3f\n\t" \
+       "#sti\n\t" \
+       "3:\t" \
+       "rep;nop\n\t" \
+       "cmpb $0, %0\n\t" \
+       "jle 3b\n\t" \
+       "#cli\n\t" \
+       "jmp 1b\n" \
+       "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+       "movb $1,%0" \
+               :"=m" (lock->slock) : : "memory"
+
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#else
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->slock) \
+               :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(lock->magic != SPINLOCK_MAGIC);
+       BUG_ON(!spin_is_locked(lock));
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#endif
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->slock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+               printk("eip: %p\n", __builtin_return_address(0));
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string_flags
+               :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+       unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+       unsigned int break_lock;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define read_can_lock(x) ((int)(x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void _raw_read_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+       BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       atomic_dec(count);
+       if (atomic_read(count) >= 0)
+               return 1;
+       atomic_inc(count);
+       return 0;
+}
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */