From: cl349@firebug.cl.cam.ac.uk Date: Sat, 12 Mar 2005 21:16:57 +0000 (+0000) Subject: bitkeeper revision 1.1236.25.9 (42335c497Bt0QbOvYK3fGa02eZ4_Sw) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~17857^2~26^2~12 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=9e0e7739a01ee25d1d7cd3b58ecdacb5763b094f;p=xen.git bitkeeper revision 1.1236.25.9 (42335c497Bt0QbOvYK3fGa02eZ4_Sw) Update to Linux 2.6.11. Signed-off-by: Christian Limpach --- diff --git a/.rootkeys b/.rootkeys index be7ae7e789..6f0b7f4aea 100644 --- a/.rootkeys +++ b/.rootkeys @@ -134,24 +134,6 @@ 3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.29-xen-sparse/mm/swapfile.c 41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.29-xen-sparse/mm/vmalloc.c 41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.29-xen-sparse/net/core/skbuff.c -41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c -41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c -41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c -418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c -41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile -41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c -41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h -41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c -41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c -41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c -41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h -41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c -41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c -41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c -41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c -41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h -41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h -4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h 40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.11-xen-sparse/arch/xen/Kconfig 40f56237utH41NPukqHksuNf29IC9A linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers 40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.11-xen-sparse/arch/xen/Makefile @@ -169,12 +151,15 @@ 40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S 40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c 40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c +41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c 40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c 4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c 40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c 40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c 40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c +41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c +41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c 40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c 40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile 40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c @@ -200,6 +185,7 @@ 412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c 414c113396tK1HTVeUalm3u-1DF16g linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c +418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c 3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c 41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.11-xen-sparse/drivers/Makefile 4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.11-xen-sparse/drivers/char/mem.c @@ -218,6 +204,12 @@ 40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c 40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h 40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c +41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile +41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c +41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h +41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c +41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c +41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c 40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.11-xen-sparse/drivers/xen/console/Makefile 3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.11-xen-sparse/drivers/xen/console/console.c 40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile @@ -232,6 +224,12 @@ 405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile 3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c +41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h +41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c +41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c +41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c +41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c +41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h 412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h 40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h 4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h @@ -242,6 +240,7 @@ 40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h 40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h 40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h +41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h 4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h 40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h 40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h @@ -254,6 +253,7 @@ 412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h 40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h 40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h +4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c deleted file mode 100644 index 6cd16ccfdc..0000000000 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the lowest level x86-specific interrupt - * entry, irq-stacks and irq statistics code. All the remaining - * irq logic is done by the generic kernel/irq/ code and - * by the x86-specific irq controller code. (e.g. i8259.c and - * io_apic.c.) - */ - -#include -#include -#include -#include -#include - -#ifndef CONFIG_X86_LOCAL_APIC -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk("unexpected IRQ trap at vector %02x\n", irq); -} -#endif - -#ifdef CONFIG_4KSTACKS -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -}; - -static union irq_ctx *hardirq_ctx[NR_CPUS]; -static union irq_ctx *softirq_ctx[NR_CPUS]; -#endif - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) -{ - /* high bits used in ret_from_ code */ - int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS); -#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx, *irqctx; - u32 *isp; -#endif - - irq_enter(); -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 1KB free? */ - { - long esp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (THREAD_SIZE - 1)); - if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - -#ifdef CONFIG_4KSTACKS - - curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[smp_processor_id()]; - - /* - * this is where we switch to the IRQ stack. However, if we are - * already using the IRQ stack (because we interrupted a hardirq - * handler) we can't do that and just have to keep using the - * current stack (which is the irq stack already after all) - */ - if (curctx != irqctx) { - int arg1, arg2, ebx; - - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_IRQ \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) - : "0" (irq), "1" (regs), "2" (isp) - : "memory", "cc", "ecx" - ); - } else -#endif - __do_IRQ(irq, regs); - - irq_exit(); - - return 1; -} - -#ifdef CONFIG_4KSTACKS - -/* - * These should really be __section__(".bss.page_aligned") as well, but - * gcc's 3.0 and earlier don't handle that correctly. - */ -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); - -/* - * allocate per-cpu stacks for hardirq and for softirq processing - */ -void irq_ctx_init(int cpu) -{ - union irq_ctx *irqctx; - - if (hardirq_ctx[cpu]) - return; - - irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - hardirq_ctx[cpu] = irqctx; - - irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - softirq_ctx[cpu] = irqctx; - - printk("CPU %u irqstacks, hard=%p soft=%p\n", - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); -} - -extern asmlinkage void __do_softirq(void); - -asmlinkage void do_softirq(void) -{ - unsigned long flags; - struct thread_info *curctx; - union irq_ctx *irqctx; - u32 *isp; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - curctx = current_thread_info(); - irqctx = softirq_ctx[smp_processor_id()]; - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_softirq \n" - " movl %%ebx,%%esp \n" - : "=b"(isp) - : "0"(isp) - : "memory", "cc", "edx", "ecx", "eax" - ); - } - - local_irq_restore(flags); -} - -EXPORT_SYMBOL(do_softirq); -#endif - -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for (j=0; jtypename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { - seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", - irq_stat[j].apic_timer_irqs); - seq_putc(p, '\n'); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } - return 0; -} diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c deleted file mode 100644 index 9fabbfe043..0000000000 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Intel SMP support routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar - * - * This code is released under the GNU General Public License version 2 or - * later. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#if 0 -#include -#endif -#include - -#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg) - -/* - * Some notes on x86 processor bugs affecting SMP operation: - * - * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. - * The Linux implications for SMP are handled as follows: - * - * Pentium III / [Xeon] - * None of the E1AP-E3AP errata are visible to the user. - * - * E1AP. see PII A1AP - * E2AP. see PII A2AP - * E3AP. see PII A3AP - * - * Pentium II / [Xeon] - * None of the A1AP-A3AP errata are visible to the user. - * - * A1AP. see PPro 1AP - * A2AP. see PPro 2AP - * A3AP. see PPro 7AP - * - * Pentium Pro - * None of 1AP-9AP errata are visible to the normal user, - * except occasional delivery of 'spurious interrupt' as trap #15. - * This is very rare and a non-problem. - * - * 1AP. Linux maps APIC as non-cacheable - * 2AP. worked around in hardware - * 3AP. fixed in C0 and above steppings microcode update. - * Linux does not use excessive STARTUP_IPIs. - * 4AP. worked around in hardware - * 5AP. symmetric IO mode (normal Linux operation) not affected. - * 'noapic' mode has vector 0xf filled out properly. - * 6AP. 'noapic' mode might be affected - fixed in later steppings - * 7AP. We do not assume writes to the LVT deassering IRQs - * 8AP. We do not enable low power mode (deep sleep) during MP bootup - * 9AP. We do not use mixed mode - * - * Pentium - * There is a marginal case where REP MOVS on 100MHz SMP - * machines with B stepping processors can fail. XXX should provide - * an L1cache=Writethrough or L1cache=off option. - * - * B stepping CPUs may hang. There are hardware work arounds - * for this. We warn about it in case your board doesn't have the work - * arounds. Basically thats so I can tell anyone with a B stepping - * CPU and SMP problems "tough". - * - * Specific items [From Pentium Processor Specification Update] - * - * 1AP. Linux doesn't use remote read - * 2AP. Linux doesn't trust APIC errors - * 3AP. We work around this - * 4AP. Linux never generated 3 interrupts of the same priority - * to cause a lost local interrupt. - * 5AP. Remote read is never used - * 6AP. not affected - worked around in hardware - * 7AP. not affected - worked around in hardware - * 8AP. worked around in hardware - we get explicit CS errors if not - * 9AP. only 'noapic' mode affected. Might generate spurious - * interrupts, we log only the first one and count the - * rest silently. - * 10AP. not affected - worked around in hardware - * 11AP. Linux reads the APIC between writes to avoid this, as per - * the documentation. Make sure you preserve this as it affects - * the C stepping chips too. - * 12AP. not affected - worked around in hardware - * 13AP. not affected - worked around in hardware - * 14AP. we always deassert INIT during bootup - * 15AP. not affected - worked around in hardware - * 16AP. not affected - worked around in hardware - * 17AP. not affected - worked around in hardware - * 18AP. not affected - worked around in hardware - * 19AP. not affected - worked around in BIOS - * - * If this sounds worrying believe me these bugs are either ___RARE___, - * or are signal timing bugs worked around in hardware and there's - * about nothing of note with C stepping upwards. - */ - -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR (unsigned int shortcut, int vector) -{ - return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; -} - -static inline int __prepare_ICR2 (unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]); - -static inline void __send_IPI_one(unsigned int cpu, int vector) -{ - unsigned int evtchn; - - evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; - // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn); - if (evtchn) { -#if 0 - shared_info_t *s = HYPERVISOR_shared_info; - while (synch_test_bit(evtchn, &s->evtchn_pending[0]) || - synch_test_bit(evtchn, &s->evtchn_mask[0])) - ; -#endif - notify_via_evtchn(evtchn); - } else - printk("send_IPI to unbound port %d/%d", - cpu, vector); -} - -void __send_IPI_shortcut(unsigned int shortcut, int vector) -{ - int cpu; - - switch (shortcut) { - case APIC_DEST_SELF: - __send_IPI_one(smp_processor_id(), vector); - break; - case APIC_DEST_ALLBUT: - for (cpu = 0; cpu < NR_CPUS; ++cpu) { - if (cpu == smp_processor_id()) - continue; - if (cpu_isset(cpu, cpu_online_map)) { - __send_IPI_one(cpu, vector); - } - } - break; - default: - printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, - vector); - break; - } -} - -void fastcall send_IPI_self(int vector) -{ - __send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -/* - * This is only used on smaller machines. - */ -void send_IPI_mask_bitmask(cpumask_t mask, int vector) -{ - unsigned long flags; - unsigned int cpu; - - local_irq_save(flags); - - for (cpu = 0; cpu < NR_CPUS; ++cpu) { - if (cpu_isset(cpu, mask)) { - __send_IPI_one(cpu, vector); - } - } - - local_irq_restore(flags); -} - -inline void send_IPI_mask_sequence(cpumask_t mask, int vector) -{ - - send_IPI_mask_bitmask(mask, vector); -} - -#include /* must come after the send_IPI functions above for inlining */ - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - */ - -static cpumask_t flush_cpumask; -static struct mm_struct * flush_mm; -static unsigned long flush_va; -static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED; -#define FLUSH_ALL 0xffffffff - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -static inline void leave_mm (unsigned long cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - */ - -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - unsigned long cpu; - - cpu = get_cpu(); - - if (!cpu_isset(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } - smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); - smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); - - return IRQ_HANDLED; -} - -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, - unsigned long va) -{ - cpumask_t tmp; - /* - * A couple of (to be removed) sanity checks: - * - * - we do not send IPIs to not-yet booted CPUs. - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * Temporarily this turns IRQs off, so that lockups are - * detected by the NMI watchdog. - */ - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; -#if NR_CPUS <= BITS_PER_LONG - atomic_set_mask(cpumask, &flush_cpumask); -#else - { - int k; - unsigned long *flush_mask = (unsigned long *)&flush_cpumask; - unsigned long *cpu_mask = (unsigned long *)&cpumask; - for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) - atomic_set_mask(cpu_mask[k], &flush_mask[k]); - } -#endif - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpus_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - mb(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm (struct mm_struct * mm) -{ - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; - - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); - - if (current->active_mm == mm) { - if(current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} - -static void do_flush_tlb_all(void* info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1, 1); -} - -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -void smp_send_reschedule(int cpu) -{ - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static struct call_data_struct * call_data; - -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * The function to run. This must be fast and non-blocking. - * An arbitrary pointer to pass to the function. - * currently unused. - * If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute <> or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return 0; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - spin_lock(&call_lock); - call_data = &data; - mb(); - - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - barrier(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - barrier(); - spin_unlock(&call_lock); - - return 0; -} - -static void stop_this_cpu (void * dummy) -{ - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); -#if 1 - xxprint("stop_this_cpu disable_local_APIC\n"); -#else - disable_local_APIC(); -#endif - if (cpu_data[smp_processor_id()].hlt_works_ok) - for(;;) __asm__("hlt"); - for (;;); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ - -void smp_send_stop(void) -{ - smp_call_function(stop_this_cpu, NULL, 1, 0); - - local_irq_disable(); -#if 1 - xxprint("smp_send_stop disable_local_APIC\n"); -#else - disable_local_APIC(); -#endif - local_irq_enable(); -} - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - - return IRQ_HANDLED; -} - -#include -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - irq_enter(); - (*func)(info); - irq_exit(); - - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } - - return IRQ_HANDLED; -} - diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c deleted file mode 100644 index 4dd03ba6cd..0000000000 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c +++ /dev/null @@ -1,1364 +0,0 @@ -/* - * x86 SMP booting functions - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * Original development of Linux SMP code supported by Caldera. - * - * This code is released under the GNU General Public License version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIPS report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - * Maciej W. Rozycki : Bits for genuine 82489DX APICs - * Martin J. Bligh : Added support for multi-quad systems - * Dave Jones : Report invalid combinations of Athlon CPUs. -* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if 1 -#define Dprintk(args...) -#else -#include -#endif -#include -#include - -/* Set if we find a B stepping CPU */ -static int __initdata smp_b_stepping; - -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ - -/* bitmap of online cpus */ -cpumask_t cpu_online_map; - -static cpumask_t cpu_callin_map; -cpumask_t cpu_callout_map; -static cpumask_t smp_commenced_mask; - -/* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; - -u8 x86_cpu_to_apicid[NR_CPUS] = - { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); - -/* Set when the idlers are all forked */ -int smp_threads_ready; - -#if 0 -/* - * Trampoline 80x86 program as an array. - */ - -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; -static unsigned char *trampoline_base; -static int trampoline_exec; - -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __init setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} -#endif - -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -void __init smp_alloc_memory(void) -{ -#if 1 - int cpu; - - for (cpu = 1; cpu < NR_CPUS; cpu++) { - cpu_gdt_descr[cpu].address = (unsigned long) - alloc_bootmem_low_pages(PAGE_SIZE); - /* XXX free unused pages later */ - } -#else - trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); - /* - * Has to be in very low memory so we can execute - * real-mode AP code. - */ - if (__pa(trampoline_base) >= 0x9F000) - BUG(); - /* - * Make the SMP trampoline executable: - */ - trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); -#endif -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -static void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = cpu_data + id; - - *c = boot_cpu_data; - if (id!=0) - identify_cpu(c); - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - - /* Athlon 660/661 is valid. */ - if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model==7) && (c->x86_mask==0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. - * It's worth noting that the A5 stepping (662) of some Athlon XP's - * have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. - */ - if (((c->x86_model==6) && (c->x86_mask>=2)) || - ((c->x86_model==7) && (c->x86_mask>=1)) || - (c->x86_model> 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, it's not a certified SMP capable AMD system. */ - tainted |= TAINT_UNSAFE_SMP; - } - -valid_k7: - ; -} - -#if 0 -/* - * TSC synchronization. - * - * We first check whether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ - -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -static void __init synchronize_tsc_bp (void) -{ - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - unsigned long one_usec; - int buggy = 0; - - printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); - - /* convert from kcyc/sec to cyc/usec */ - one_usec = cpu_khz / 1000; - - atomic_set(&tsc_start_flag, 1); - wmb(); - - /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. - */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); - - rdtscll(tsc_values[smp_processor_id()]); - /* - * We clear the TSC in the last loop: - */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - /* - * Wait for all APs to leave the synchronization point: - */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); - } - - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; - sum += t0; - } - } - avg = sum; - do_div(avg, num_booting_cpus()); - - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*one_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = delta; - do_div(realdelta, one_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; - - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); - } - - sum += delta; - } - if (!buggy) - printk("passed.\n"); -} - -static void __init synchronize_tsc_ap (void) -{ - int i; - - /* - * Not every cpu is online at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: - */ - while (!atomic_read(&tsc_start_flag)) mb(); - - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) - mb(); - - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); - - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); - } -} -#undef NR_LOOPS -#endif - -extern void calibrate_delay(void); - -static atomic_t init_deasserted; - -void __init smp_callin(void) -{ - int cpuid, phys_id; - unsigned long timeout; - -#if 0 - /* - * If waken up by an INIT in an 82489DX configuration - * we may get here before an INIT-deassert IPI reaches - * our local APIC. We have to wait for the IPI or we'll - * lock up on an APIC access. - */ - wait_for_init_deassert(&init_deasserted); -#endif - - /* - * (This works even if the APIC is not enabled.) - */ - phys_id = smp_processor_id(); - cpuid = smp_processor_id(); - if (cpu_isset(cpuid, cpu_callin_map)) { - printk("huh, phys CPU#%d, CPU#%d already present??\n", - phys_id, cpuid); - BUG(); - } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies, timeout)) { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (cpu_isset(cpuid, cpu_callout_map)) - break; - rep_nop(); - } - - if (!time_before(jiffies, timeout)) { - printk("BUG: CPU%d started up but did not get a callout!\n", - cpuid); - BUG(); - } - -#if 0 - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - Dprintk("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); - setup_local_APIC(); -#endif - map_cpu_to_logical_apicid(); - - local_irq_enable(); - - /* - * Get our bogomips. - */ - calibrate_delay(); - Dprintk("Stack at about %p\n",&cpuid); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - -#if 0 - disable_APIC_timer(); -#endif - local_irq_disable(); - /* - * Allow the master to continue. - */ - cpu_set(cpuid, cpu_callin_map); - -#if 0 - /* - * Synchronize the TSC with the BP - */ - if (cpu_has_tsc && cpu_khz) - synchronize_tsc_ap(); -#endif -} - -int cpucount; - -extern int cpu_idle(void); - - -static irqreturn_t local_debug_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - - return IRQ_HANDLED; -} - -static struct irqaction local_irq_debug = { - local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug", - NULL, NULL -}; - -void local_setup_debug(void) -{ - (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug); -} - - -extern void local_setup_timer(void); - -/* - * Activate a secondary processor. - */ -int __init start_secondary(void *unused) -{ - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - smp_callin(); - while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) - rep_nop(); - local_setup_timer(); - local_setup_debug(); /* XXX */ - smp_intr_init(); - local_irq_enable(); - /* - * low-memory mappings have been cleared, flush them from - * the local TLBs too. - */ - local_flush_tlb(); - cpu_set(smp_processor_id(), cpu_online_map); - wmb(); - if (0) { - char *msg2 = "delay2\n"; - int timeout; - for (timeout = 0; timeout < 50000; timeout++) { - udelay(1000); - if (timeout == 2000) { - (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2); - timeout = 0; - } - } - } - return cpu_idle(); -} - -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __init initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"r" (current->thread.esp),"r" (current->thread.eip)); -} - -extern struct { - void * esp; - unsigned short ss; -} stack_start; - -#ifdef CONFIG_NUMA - -/* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = - { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; -/* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; -EXPORT_SYMBOL(cpu_2_node); - -/* set up a mapping between cpu and node. */ -static inline void map_cpu_to_node(int cpu, int node) -{ - printk("Mapping cpu %d to node %d\n", cpu, node); - cpu_set(cpu, node_2_cpu_mask[node]); - cpu_2_node[cpu] = node; -} - -/* undo a mapping between cpu and node. */ -static inline void unmap_cpu_to_node(int cpu) -{ - int node; - - printk("Unmapping cpu %d from all nodes\n", cpu); - for (node = 0; node < MAX_NUMNODES; node ++) - cpu_clear(cpu, node_2_cpu_mask[node]); - cpu_2_node[cpu] = 0; -} -#else /* !CONFIG_NUMA */ - -#define map_cpu_to_node(cpu, node) ({}) -#define unmap_cpu_to_node(cpu) ({}) - -#endif /* CONFIG_NUMA */ - -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - -void map_cpu_to_logical_apicid(void) -{ - int cpu = smp_processor_id(); - int apicid = smp_processor_id(); - - cpu_2_logical_apicid[cpu] = apicid; - map_cpu_to_node(cpu, apicid_to_node(apicid)); -} - -void unmap_cpu_to_logical_apicid(int cpu) -{ - cpu_2_logical_apicid[cpu] = BAD_APICID; - unmap_cpu_to_node(cpu); -} - -#if APIC_DEBUG -static inline void __inquire_remote_apic(int apicid) -{ - int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; - char *names[] = { "ID", "VERSION", "SPIV" }; - int timeout, status; - - printk("Inquiring remote APIC #%d...\n", apicid); - - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); - - timeout = 0; - do { - udelay(100); - status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; - } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); - - switch (status) { - case APIC_ICR_RR_VALID: - status = apic_read(APIC_RRR); - printk("%08x\n", status); - break; - default: - printk("failed\n"); - } - } -} -#endif - -#if 0 -#ifdef WAKE_SECONDARY_VIA_NMI -/* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. - */ -static int __init -wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) -{ - unsigned long send_status = 0, accept_status = 0; - int timeout, maxlvt; - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - maxlvt = get_maxlvt(); - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - Dprintk("NMI sent.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_NMI */ - -#ifdef WAKE_SECONDARY_VIA_INIT -static int __init -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - unsigned long send_status = 0, accept_status = 0; - int maxlvt, timeout, num_starts, j; - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - Dprintk("Asserting INIT.\n"); - - /* - * Turn INIT on target chip - */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* - * Send IPI - */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT - | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - mdelay(10); - - Dprintk("Deasserting INIT.\n"); - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Send IPI */ - apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - atomic_set(&init_deasserted, 1); - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't send the STARTUP IPIs. - */ - if (APIC_INTEGRATED(apic_version[phys_apicid])) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - Dprintk("#startup loops: %d.\n", num_starts); - - maxlvt = get_maxlvt(); - - for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n",j); - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); - - /* - * STARTUP IPI - */ - - /* Target chip */ - apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - - /* Boot on the stack */ - /* Kick the second */ - apic_write_around(APIC_ICR, APIC_DM_STARTUP - | (start_eip >> 12)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(300); - - Dprintk("Startup point 1.\n"); - - Dprintk("Waiting for send to finish...\n"); - timeout = 0; - do { - Dprintk("+"); - udelay(100); - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - /* - * Due to the Pentium erratum 3AP. - */ - if (maxlvt > 3) { - apic_read_around(APIC_SPIV); - apic_write(APIC_ESR, 0); - } - accept_status = (apic_read(APIC_ESR) & 0xEF); - if (send_status || accept_status) - break; - } - Dprintk("After Startup.\n"); - - if (send_status) - printk("APIC never delivered???\n"); - if (accept_status) - printk("APIC delivery error (%lx).\n", accept_status); - - return (send_status | accept_status); -} -#endif /* WAKE_SECONDARY_VIA_INIT */ -#endif - -extern cpumask_t cpu_initialized; - -static int __init do_boot_cpu(int apicid) -/* - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad - * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. - */ -{ - struct task_struct *idle; - unsigned long boot_error; - int timeout, cpu; - unsigned long start_eip; -#if 0 - unsigned short nmi_high = 0, nmi_low = 0; -#endif - full_execution_context_t ctxt; - extern void startup_32_smp(void); - extern void hypervisor_callback(void); - extern void failsafe_callback(void); - extern int smp_trap_init(trap_info_t *); - int i; - - cpu = ++cpucount; - /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. - */ - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); - idle->thread.eip = (unsigned long) start_secondary; - /* start_eip had better be page-aligned! */ - start_eip = (unsigned long)startup_32_smp; - - /* So we see what's up */ - printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); - /* Stack for startup_32 can be just as for start_secondary onwards */ - stack_start.esp = (void *) idle->thread.esp; - - irq_ctx_init(cpu); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - atomic_set(&init_deasserted, 0); - -#if 1 - if (cpu_gdt_descr[0].size > PAGE_SIZE) - BUG(); - cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; - memcpy((void *)cpu_gdt_descr[cpu].address, - (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); - memset((char *)cpu_gdt_descr[cpu].address + - FIRST_RESERVED_GDT_ENTRY * 8, 0, - NR_RESERVED_GDT_ENTRIES * 8); - - memset(&ctxt, 0, sizeof(ctxt)); - - ctxt.cpu_ctxt.ds = __USER_DS; - ctxt.cpu_ctxt.es = __USER_DS; - ctxt.cpu_ctxt.fs = 0; - ctxt.cpu_ctxt.gs = 0; - ctxt.cpu_ctxt.ss = __KERNEL_DS; - ctxt.cpu_ctxt.cs = __KERNEL_CS; - ctxt.cpu_ctxt.eip = start_eip; - ctxt.cpu_ctxt.esp = idle->thread.esp; - ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); - - /* FPU is set up to default initial state. */ - memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); - - /* Virtual IDT is empty at start-of-day. */ - for ( i = 0; i < 256; i++ ) - { - ctxt.trap_ctxt[i].vector = i; - ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; - } - ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt); - - /* No LDT. */ - ctxt.ldt_ents = 0; - - { - unsigned long va; - int f; - - for (va = cpu_gdt_descr[cpu].address, f = 0; - va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; - va += PAGE_SIZE, f++) { - ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT; - make_page_readonly((void *)va); - } - ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8; - flush_page_update_queue(); - } - - /* Ring 1 stack is the initial stack. */ - ctxt.kernel_ss = __KERNEL_DS; - ctxt.kernel_esp = idle->thread.esp; - - /* Callback handlers. */ - ctxt.event_callback_cs = __KERNEL_CS; - ctxt.event_callback_eip = (unsigned long)hypervisor_callback; - ctxt.failsafe_callback_cs = __KERNEL_CS; - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - - ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir); - - boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - } - } - x86_cpu_to_apicid[cpu] = apicid; - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpucount--; - } - -#else - Dprintk("Setting warm reset code and vector.\n"); - - store_NMI_vector(&nmi_high, &nmi_low); - - smpboot_setup_warm_reset_vector(start_eip); - - /* - * Starting actual IPI sequence... - */ - boot_error = wakeup_secondary_cpu(apicid, start_eip); - - if (!boot_error) { - /* - * allow APs to start initializing. - */ - Dprintk("Before Callout %d.\n", cpu); - cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); - - /* - * Wait 5s total for a response - */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_isset(cpu, cpu_callin_map)) - break; /* It has booted */ - udelay(100); - } - - if (cpu_isset(cpu, cpu_callin_map)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); - Dprintk("CPU has booted.\n"); - } else { - boot_error= 1; - if (*((volatile unsigned char *)trampoline_base) - == 0xA5) - /* trampoline started but...? */ - printk("Stuck ??\n"); - else - /* trampoline code not run */ - printk("Not responding.\n"); - inquire_remote_apic(apicid); - } - } - x86_cpu_to_apicid[cpu] = apicid; - if (boot_error) { - /* Try to put things back the way they were before ... */ - unmap_cpu_to_logical_apicid(cpu); - cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ - cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpucount--; - } - - /* mark "stuck" area as not stuck */ - *((volatile unsigned long *)trampoline_base) = 0; -#endif - - return boot_error; -} - -cycles_t cacheflush_time; -unsigned long cache_decay_ticks; - -static void smp_tune_scheduling (void) -{ - unsigned long cachesize; /* kB */ - unsigned long bandwidth = 350; /* MB/s */ - /* - * Rough estimation for SMP scheduling, this is the number of - * cycles it takes for a fully memory-limited process to flush - * the SMP-local cache. - * - * (For a P5 this pretty much means we will choose another idle - * CPU almost always at wakeup time (this is due to the small - * L1 cache), on PIIs it's around 50-100 usecs, depending on - * the cache size) - */ - - if (!cpu_khz) { - /* - * this basically disables processor-affinity - * scheduling on SMP without a TSC. - */ - cacheflush_time = 0; - return; - } else { - cachesize = boot_cpu_data.x86_cache_size; - if (cachesize == -1) { - cachesize = 16; /* Pentiums, 2x8kB cache */ - bandwidth = 100; - } - - cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth; - } - - cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1; - - printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", - (long)cacheflush_time/(cpu_khz/1000), - ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); - printk("task migration cache decay timeout: %ld msecs.\n", - cache_decay_ticks); -} - -/* - * Cycle through the processors sending APIC IPIs to boot each. - */ - -#if 0 -static int boot_cpu_logical_apicid; -#endif -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; - -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; - -static void __init smp_boot_cpus(unsigned int max_cpus) -{ - int cpu, kicked; - unsigned long bogosum = 0; -#if 0 - int apicid, bit; -#endif - - /* - * Setup boot CPU information - */ - smp_store_cpu_info(0); /* Final full version of the data */ - printk("CPU%d: ", 0); - print_cpu_info(&cpu_data[0]); - -#if 0 - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - boot_cpu_logical_apicid = logical_smp_processor_id(); - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; -#else - // boot_cpu_physical_apicid = 0; - // boot_cpu_logical_apicid = 0; - x86_cpu_to_apicid[0] = 0; -#endif - - current_thread_info()->cpu = 0; - smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config /* && !acpi_lapic) */) { - printk(KERN_NOTICE "SMP motherboard not detected.\n"); - smpboot_clear_io_apic_irqs(); -#if 0 - phys_cpu_present_map = physid_mask_of_physid(0); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); -#endif - map_cpu_to_logical_apicid(); - return; - } - -#if 0 - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - return; - } - - verify_local_APIC(); -#endif - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - HYPERVISOR_shared_info->n_vcpu = 1; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - smpboot_clear_io_apic_irqs(); -#if 0 - phys_cpu_present_map = physid_mask_of_physid(0); -#endif - return; - } - - smp_intr_init(); - -#if 0 - connect_bsp_APIC(); - setup_local_APIC(); -#endif - map_cpu_to_logical_apicid(); -#if 0 - - - setup_portio_remap(); - - /* - * Scan the CPU present map and fire up the other CPUs via do_boot_cpu - * - * In clustered apic mode, phys_cpu_present_map is a constructed thus: - * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the - * clustered apic ID. - */ - Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); -#endif - Dprintk("CPU present map: %lx\n", - (1UL << HYPERVISOR_shared_info->n_vcpu) - 1); - - kicked = 1; - for (cpu = 1; kicked < NR_CPUS && - cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) { - if (max_cpus <= cpucount+1) - continue; - - if (do_boot_cpu(cpu)) - printk("CPU #%d not responding - cannot use it.\n", - cpu); - else - ++kicked; - } - -#if 0 - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); -#endif - - /* - * Allow the user to impress friends. - */ - Dprintk("Before bogomips.\n"); - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data[cpu].loops_per_jiffy; - printk(KERN_INFO - "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpucount+1, - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); - - Dprintk("Before bogocount - setting activated=1.\n"); - - if (smp_b_stepping) - printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - if (tainted & TAINT_UNSAFE_SMP) { - if (cpucount) - printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); - else - tainted &= ~TAINT_UNSAFE_SMP; - } - - Dprintk("Boot done.\n"); - - /* - * construct cpu_sibling_map[], so that we can tell sibling CPUs - * efficiently. - */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpus_clear(cpu_sibling_map[cpu]); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int siblings = 0; - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; - - if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) - printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); - } - -#if 0 - if (nmi_watchdog == NMI_LOCAL_APIC) - check_nmi_watchdog(); - - smpboot_setup_io_apic(); - - setup_boot_APIC_clock(); - - /* - * Synchronize the TSC with the AP - */ - if (cpu_has_tsc && cpucount && cpu_khz) - synchronize_tsc_bp(); -#endif -} - -/* These are wrappers to interface to the new boot process. Someone - who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ -void __init smp_prepare_cpus(unsigned int max_cpus) -{ - smp_boot_cpus(max_cpus); -} - -void __devinit smp_prepare_boot_cpu(void) -{ - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); -} - -int __devinit __cpu_up(unsigned int cpu) -{ - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); - return -ENOSYS; - } - - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) { - local_irq_enable(); - return -EIO; - } - - local_irq_enable(); - /* Unleash the CPU! */ - cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) - mb(); - return 0; -} - -void __init smp_cpus_done(unsigned int max_cpus) -{ -#if 1 -#else -#ifdef CONFIG_X86_IO_APIC - setup_ioapic_dest(); -#endif - zap_low_mappings(); - /* - * Disable executability of the SMP trampoline: - */ - set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); -#endif -} - -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); - -static struct irqaction reschedule_irq = { - smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule", - NULL, NULL -}; - -extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *); - -static struct irqaction invalidate_irq = { - smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate", - NULL, NULL -}; - -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); - -static struct irqaction call_function_irq = { - smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE, - "call_function", NULL, NULL -}; - -void __init smp_intr_init(void) -{ - - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR), - &reschedule_irq); - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR), - &invalidate_irq); - (void)setup_irq( - bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR), - &call_function_irq); -} diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c deleted file mode 100644 index 51addc6c76..0000000000 --- a/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 2004, Christian Limpach */ - -#include -#include -#include - -unsigned int __initdata maxcpus = NR_CPUS; - - -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) -{ - printk("setup_profiling_timer\n"); - - return 0; -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile deleted file mode 100644 index 80b7ca0627..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile +++ /dev/null @@ -1,3 +0,0 @@ - -obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o - diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c deleted file mode 100644 index a9a00677bc..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c +++ /dev/null @@ -1,87 +0,0 @@ -/****************************************************************************** - * blktap.c - * - * XenLinux virtual block-device tap. - * - * Copyright (c) 2004, Andrew Warfield - * - * Based on the original split block driver: - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - * Copyright (c) 2004, Christian Limpach - * - * Note that unlike the split block driver code, this driver has been developed - * strictly for Linux 2.6 - */ - -#include "blktap.h" - -int __init xlblktap_init(void) -{ - ctrl_msg_t cmsg; - blkif_fe_driver_status_t fe_st; - blkif_be_driver_status_t be_st; - - printk(KERN_INFO "Initialising Xen block tap device\n"); - - DPRINTK(" tap - Backend connection init:\n"); - - - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_fe_driver_status_t); - fe_st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &fe_st, sizeof(fe_st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - - DPRINTK(" tap - Frontend connection init:\n"); - - active_reqs_init(); - blkif_interface_init(); - blkdev_schedule_init(); - - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_be_driver_status_t); - be_st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &be_st, sizeof(be_st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - - DPRINTK(" tap - Userland channel init:\n"); - - blktap_init(); - - DPRINTK("Blkif tap device initialized.\n"); - - return 0; -} - -#if 0 /* tap doesn't handle suspend/resume */ -void blkdev_suspend(void) -{ -} - -void blkdev_resume(void) -{ - ctrl_msg_t cmsg; - blkif_fe_driver_status_t st; - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_fe_driver_status_t); - st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} -#endif - -__initcall(xlblktap_init); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h deleted file mode 100644 index eb084e8f06..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h +++ /dev/null @@ -1,250 +0,0 @@ -/* - * blktap.h - * - * Interfaces for the Xen block tap driver. - * - * (c) 2004, Andrew Warfield, University of Cambridge - * - */ - -#ifndef __BLKTAP_H__ -#define __BLKTAP_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Used to signal to the backend that this is a tap domain. */ -#define BLKTAP_COOKIE 0xbeadfeed - -/* -------[ debug / pretty printing ]--------------------------------- */ - -#if 0 -#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -#if 1 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } -#else -#define ASSERT(_p) ((void)0) -#endif - -#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) - - -/* -------[ state descriptors ]--------------------------------------- */ - -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 - -/* -------[ connection tracking ]------------------------------------- */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#endif - -extern spinlock_t blkif_io_lock; - -typedef struct blkif_st { - /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; - /* Physical parameters of the comms window. */ - unsigned long shmem_frame; - unsigned int evtchn; - int irq; - /* Comms information. */ - blkif_back_ring_t blk_ring; - - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; - /* - * DISCONNECT response is deferred until pending requests are ack'ed. - * We therefore need to store the id from the original request. - */ - u8 disconnect_rspid; - struct blkif_st *hash_next; - struct list_head blkdev_list; - spinlock_t blk_ring_lock; - atomic_t refcnt; - struct work_struct work; -} blkif_t; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); -void blkif_disconnect_complete(blkif_t *blkif); -#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) -#define blkif_put(_b) \ - do { \ - if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - blkif_disconnect_complete(_b); \ - } while (0) - - -/* -------[ active request tracking ]--------------------------------- */ - -typedef struct { - blkif_t *blkif; - unsigned long id; - int nr_pages; - unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int next_free; -} active_req_t; - -typedef unsigned int ACTIVE_RING_IDX; - -active_req_t *lookup_active_req(ACTIVE_RING_IDX idx); - -extern inline unsigned int ID_TO_IDX(unsigned long id) -{ - return ( id & 0x0000ffff ); -} - -extern inline domid_t ID_TO_DOM(unsigned long id) -{ - return (id >> 16); -} - -void active_reqs_init(void); - -/* -------[ interposition -> character device interface ]------------- */ - -/* /dev/xen/blktap resides at device number major=10, minor=200 */ -#define BLKTAP_MINOR 202 - -/* size of the extra VMA area to map in attached pages. */ -#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE - -/* blktap IOCTLs: */ -#define BLKTAP_IOCTL_KICK_FE 1 -#define BLKTAP_IOCTL_KICK_BE 2 -#define BLKTAP_IOCTL_SETMODE 3 -#define BLKTAP_IOCTL_PRINT_IDXS 100 - -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ -#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ -#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 -#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 -#define BLKTAP_MODE_COPY_FE 0x00000004 -#define BLKTAP_MODE_COPY_BE 0x00000008 -#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 -#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 - -#define BLKTAP_MODE_INTERPOSE \ - (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) - -#define BLKTAP_MODE_COPY_BOTH \ - (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) - -#define BLKTAP_MODE_COPY_BOTH_PAGES \ - (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) - -static inline int BLKTAP_MODE_VALID(unsigned long arg) -{ - return ( - ( arg == BLKTAP_MODE_PASSTHROUGH ) || - ( arg == BLKTAP_MODE_INTERCEPT_FE ) || - ( arg == BLKTAP_MODE_INTERCEPT_BE ) || - ( arg == BLKTAP_MODE_INTERPOSE ) || - ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || - ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || - ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) - ); -} - - - -/* -------[ Mappings to User VMA ]------------------------------------ */ -#define MAX_PENDING_REQS 64 -#define BATCH_PER_DOMAIN 16 -extern struct vm_area_struct *blktap_vma; - -/* The following are from blkback.c and should probably be put in a - * header and included from there. - * The mmap area described here is where attached data pages eill be mapped. - */ - -extern unsigned long mmap_vstart; -#define MMAP_PAGES_PER_REQUEST \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) -#define MMAP_PAGES \ - (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - -/* immediately before the mmap area, we have a bunch of pages reserved - * for shared memory rings. - */ - -#define RING_PAGES 3 /* Ctrl, Front, and Back */ -extern unsigned long rings_vstart; - - -/* -------[ Here be globals ]----------------------------------------- */ -extern unsigned long blktap_mode; - -/* Connection to a single backend domain. */ -extern blkif_front_ring_t blktap_be_ring; -extern unsigned int blktap_be_evtchn; -extern unsigned int blktap_be_state; - -/* User ring status. */ -extern unsigned long blktap_ring_ok; - -/* -------[ ...and function prototypes. ]----------------------------- */ - -/* init function for character device interface. */ -int blktap_init(void); - -/* init function for the blkif cache. */ -void __init blkif_interface_init(void); -void __init blkdev_schedule_init(void); -void blkif_deschedule(blkif_t *blkif); - -/* interfaces to the char driver, passing messages to and from apps. */ -void blktap_kick_user(void); - -/* user ring access functions: */ -int blktap_write_fe_ring(blkif_request_t *req); -int blktap_write_be_ring(blkif_response_t *rsp); -int blktap_write_ctrl_ring(ctrl_msg_t *msg); - -/* fe/be ring access functions: */ -int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp); -int write_req_to_be_ring(blkif_request_t *req); - -/* event notification functions */ -void kick_fe_domain(blkif_t *blkif); -void kick_be_domain(void); - -/* Interrupt handlers. */ -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, - struct pt_regs *ptregs); -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs); - -/* Control message receiver. */ -extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id); - -/* debug */ -void print_vm_ring_idxs(void); - -#define __BLKINT_H__ -#endif diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c deleted file mode 100644 index 9ce74c7442..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c +++ /dev/null @@ -1,517 +0,0 @@ -/****************************************************************************** - * blktap_controlmsg.c - * - * XenLinux virtual block-device tap. - * Control interfaces to the frontend and backend drivers. - * - * Copyright (c) 2004, Andrew Warfield - * - */ - -#include "blktap.h" - -static char *blkif_state_name[] = { - [BLKIF_STATE_CLOSED] = "closed", - [BLKIF_STATE_DISCONNECTED] = "disconnected", - [BLKIF_STATE_CONNECTED] = "connected", -}; - -static char * blkif_status_name[] = { - [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", - [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", - [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", - [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", -}; - -static unsigned blktap_be_irq; -unsigned int blktap_be_state = BLKIF_STATE_CLOSED; -unsigned int blktap_be_evtchn; - -/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) - -static kmem_cache_t *blkif_cachep; -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) - blkif = blkif->hash_next; - return blkif; -} - -static void __blkif_disconnect_complete(void *arg) -{ - blkif_t *blkif = (blkif_t *)arg; - ctrl_msg_t cmsg; - blkif_be_disconnect_t disc; - - /* - * These can't be done in blkif_disconnect() because at that point there - * may be outstanding requests at the disc whose asynchronous responses - * must still be notified to the remote driver. - */ - unbind_evtchn_from_irq(blkif->evtchn); - vfree(blkif->blk_ring.sring); - - /* Construct the deferred response message. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; - cmsg.id = blkif->disconnect_rspid; - cmsg.length = sizeof(blkif_be_disconnect_t); - disc.domid = blkif->domid; - disc.blkif_handle = blkif->handle; - disc.status = BLKIF_BE_STATUS_OKAY; - memcpy(cmsg.msg, &disc, sizeof(disc)); - - /* - * Make sure message is constructed /before/ status change, because - * after the status change the 'blkif' structure could be deallocated at - * any time. Also make sure we send the response /after/ status change, - * as otherwise a subsequent CONNECT request could spuriously fail if - * another CPU doesn't see the status change yet. - */ - mb(); - if ( blkif->status != DISCONNECTING ) - BUG(); - blkif->status = DISCONNECTED; - mb(); - - /* Send the successful response. */ - ctrl_if_send_response(&cmsg); -} - -void blkif_disconnect_complete(blkif_t *blkif) -{ - INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); - schedule_work(&blkif->work); -} - -void blkif_ptfe_create(blkif_be_create_t *create) -{ - blkif_t *blkif, **pblkif; - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - - - /* May want to store info on the connecting domain here. */ - - DPRINTK("PT got BE_CREATE\n"); - - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) - { - DPRINTK("Could not create blkif: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - /* blkif struct init code from blkback.c */ - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->status = DISCONNECTED; - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 0); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - DPRINTK("Could not create blkif: already exists\n"); - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - kmem_cache_free(blkif_cachep, blkif); - return; - } - pblkif = &(*pblkif)->hash_next; - } - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - create->status = BLKIF_BE_STATUS_OKAY; -} - - -void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) -{ - /* Clear anything that we initialized above. */ - - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTK("PT got BE_DESTROY\n"); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - if ( blkif->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: - *pblkif = blkif->hash_next; - kmem_cache_free(blkif_cachep, blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_ptfe_connect(blkif_be_connect_t *connect) -{ - domid_t domid = connect->domid; - unsigned int handle = connect->blkif_handle; - unsigned int evtchn = connect->evtchn; - unsigned long shmem_frame = connect->shmem_frame; - struct vm_struct *vma; - pgprot_t prot; - int error; - blkif_t *blkif; - blkif_sring_t *sring; - - DPRINTK("PT got BE_CONNECT\n"); - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", - connect->domid, connect->blkif_handle); - connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) - { - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) { - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - WPRINTK("BE_CONNECT: MAPPING error!\n"); - } - else - connect->status = BLKIF_BE_STATUS_ERROR; - vfree(vma->addr); - return; - } - - if ( blkif->status != DISCONNECTED ) - { - connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - vfree(vma->addr); - return; - } - - sring = (blkif_sring_t *)vma->addr; - SHARED_RING_INIT(sring); - BACK_RING_INIT(&blkif->blk_ring, sring); - - blkif->evtchn = evtchn; - blkif->irq = bind_evtchn_to_irq(evtchn); - blkif->shmem_frame = shmem_frame; - blkif->status = CONNECTED; - blkif_get(blkif); - - request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); - - connect->status = BLKIF_BE_STATUS_OKAY; -} - -int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) -{ - domid_t domid = disconnect->domid; - unsigned int handle = disconnect->blkif_handle; - blkif_t *blkif; - - DPRINTK("PT got BE_DISCONNECT\n"); - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - DPRINTK("blkif_disconnect attempted for non-existent blkif" - " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); - disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return 1; /* Caller will send response error message. */ - } - - if ( blkif->status == CONNECTED ) - { - blkif->status = DISCONNECTING; - blkif->disconnect_rspid = rsp_id; - wmb(); /* Let other CPUs see the status change. */ - free_irq(blkif->irq, blkif); - blkif_deschedule(blkif); - blkif_put(blkif); - return 0; /* Caller should not send response message. */ - } - - disconnect->status = BLKIF_BE_STATUS_OKAY; - return 1; -} - -/*-----[ Control Messages to/from Backend VM ]----------------------------*/ - -/* Tell the controller to bring up the interface. */ -static void blkif_ptbe_send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; - msg->handle = 0; - msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT; - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -static void blkif_ptbe_close(void) -{ -} - -/* Move from CLOSED to DISCONNECTED state. */ -static void blkif_ptbe_disconnect(void) -{ - blkif_sring_t *sring; - - sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blktap_be_ring, sring); - blktap_be_state = BLKIF_STATE_DISCONNECTED; - DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); - blkif_ptbe_send_interface_connect(); -} - -static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) -{ - int err = 0; - - blktap_be_evtchn = status->evtchn; - blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn); - - err = request_irq(blktap_be_irq, blkif_ptbe_int, - SA_SAMPLE_RANDOM, "blkif", NULL); - if ( err ) { - WPRINTK("blkfront request_irq failed (%d)\n", err); - return; - } else { - /* transtion to connected in case we need to do a - a partion probe on a whole disk */ - blktap_be_state = BLKIF_STATE_CONNECTED; - } -} - -static void unexpected(blkif_fe_interface_status_t *status) -{ - WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", - blkif_status_name[status->status], - blkif_state_name[blktap_be_state]); -} - -static void blkif_ptbe_status( - blkif_fe_interface_status_t *status) -{ - if ( status->handle != 0 ) - { - DPRINTK("Status change on unsupported blkif %d\n", - status->handle); - return; - } - - DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]); - - switch ( status->status ) - { - case BLKIF_INTERFACE_STATUS_CLOSED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_ptbe_close(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - blkif_ptbe_disconnect(); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n"); - unexpected(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - blkif_ptbe_disconnect(); - blkif_ptbe_connect(status); - break; - case BLKIF_STATE_DISCONNECTED: - blkif_ptbe_connect(status); - break; - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_ptbe_connect(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - case BLKIF_STATE_DISCONNECTED: - unexpected(status); - break; - case BLKIF_STATE_CONNECTED: - /* vbd_update(); */ - /* tap doesn't really get state changes... */ - unexpected(status); - break; - } - break; - - default: - DPRINTK("Status change to unknown value %d\n", status->status); - break; - } -} - -/*-----[ All control messages enter here: ]-------------------------------*/ - -void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->type ) - { - case CMSG_BLKIF_FE: - - switch ( msg->subtype ) - { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - if ( msg->length != sizeof(blkif_fe_interface_status_t) ) - goto parse_error; - blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]); - break; - - default: - goto parse_error; - } - - case CMSG_BLKIF_BE: - - /* send a copy of the message to user if wanted */ - - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { - - blktap_write_ctrl_ring(msg); - } - - switch ( msg->subtype ) - { - case CMSG_BLKIF_BE_CREATE: - if ( msg->length != sizeof(blkif_be_create_t) ) - goto parse_error; - blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DESTROY: - if ( msg->length != sizeof(blkif_be_destroy_t) ) - goto parse_error; - blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_CONNECT: - if ( msg->length != sizeof(blkif_be_connect_t) ) - goto parse_error; - blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DISCONNECT: - if ( msg->length != sizeof(blkif_be_disconnect_t) ) - goto parse_error; - if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0], - msg->id) ) - return; - break; - - /* We just ignore anything to do with vbds for now. */ - - case CMSG_BLKIF_BE_VBD_CREATE: - DPRINTK("PT got VBD_CREATE\n"); - ((blkif_be_vbd_create_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - case CMSG_BLKIF_BE_VBD_DESTROY: - DPRINTK("PT got VBD_DESTROY\n"); - ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - case CMSG_BLKIF_BE_VBD_GROW: - DPRINTK("PT got VBD_GROW\n"); - ((blkif_be_vbd_grow_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - case CMSG_BLKIF_BE_VBD_SHRINK: - DPRINTK("PT got VBD_SHRINK\n"); - ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - default: - goto parse_error; - } - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - -/*-----[ Initialization ]-------------------------------------------------*/ - -void __init blkif_interface_init(void) -{ - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); - memset(blkif_hash, 0, sizeof(blkif_hash)); - - blktap_be_ring.sring = NULL; -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c deleted file mode 100644 index e88c5629a6..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c +++ /dev/null @@ -1,472 +0,0 @@ -/****************************************************************************** - * blktap_datapath.c - * - * XenLinux virtual block-device tap. - * Block request routing data path. - * - * Copyright (c) 2004, Andrew Warfield - * -- see full header in blktap.c - */ - -#include "blktap.h" -#include - -/*-----[ The data paths ]-------------------------------------------------*/ - -/* Connection to a single backend domain. */ -blkif_front_ring_t blktap_be_ring; - -/*-----[ Tracking active requests ]---------------------------------------*/ - -/* this must be the same as MAX_PENDING_REQS in blkback.c */ -#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) - -active_req_t active_reqs[MAX_ACTIVE_REQS]; -ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; -spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; -ACTIVE_RING_IDX active_prod, active_cons; -#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) -#define ACTIVE_IDX(_ar) (_ar - active_reqs) -#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) - -inline active_req_t *get_active_req(void) -{ - ACTIVE_RING_IDX idx; - active_req_t *ar; - unsigned long flags; - - ASSERT(active_cons != active_prod); - - spin_lock_irqsave(&active_req_lock, flags); - idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; - ar = &active_reqs[idx]; - spin_unlock_irqrestore(&active_req_lock, flags); - - return ar; -} - -inline void free_active_req(active_req_t *ar) -{ - unsigned long flags; - - spin_lock_irqsave(&active_req_lock, flags); - active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); - spin_unlock_irqrestore(&active_req_lock, flags); -} - -active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) -{ - return &active_reqs[idx]; -} - -void active_reqs_init(void) -{ - ACTIVE_RING_IDX i; - - active_cons = 0; - active_prod = MAX_ACTIVE_REQS; - memset(active_reqs, 0, sizeof(active_reqs)); - for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) - active_req_ring[i] = i; -} - -/* Requests passing through the tap to the backend hijack the id field - * in the request message. In it we put the AR index _AND_ the fe domid. - * the domid is used by the backend to map the pages properly. - */ - -static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) -{ - return ( (fe_dom << 16) | idx ); -} - -/*-----[ Ring helpers ]---------------------------------------------------*/ - -inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) -{ - blkif_response_t *resp_d; - active_req_t *ar; - - ar = &active_reqs[ID_TO_IDX(rsp->id)]; - rsp->id = ar->id; - - resp_d = RING_GET_RESPONSE(&blkif->blk_ring, - blkif->blk_ring.rsp_prod_pvt); - memcpy(resp_d, rsp, sizeof(blkif_response_t)); - wmb(); - blkif->blk_ring.rsp_prod_pvt++; - - blkif_put(ar->blkif); - free_active_req(ar); - - return 0; -} - -inline int write_req_to_be_ring(blkif_request_t *req) -{ - blkif_request_t *req_d; - - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { - WPRINTK("Tap trying to access an unconnected backend!\n"); - return 0; - } - - req_d = RING_GET_REQUEST(&blktap_be_ring, - blktap_be_ring.req_prod_pvt); - memcpy(req_d, req, sizeof(blkif_request_t)); - wmb(); - blktap_be_ring.req_prod_pvt++; - - return 0; -} - -void kick_fe_domain(blkif_t *blkif) -{ - RING_PUSH_RESPONSES(&blkif->blk_ring); - notify_via_evtchn(blkif->evtchn); - DPRINTK("notified FE(dom %u)\n", blkif->domid); - -} - -void kick_be_domain(void) -{ - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) - return; - - wmb(); /* Ensure that the frontend can see the requests. */ - RING_PUSH_REQUESTS(&blktap_be_ring); - notify_via_evtchn(blktap_be_evtchn); - DPRINTK("notified BE\n"); -} - -/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ - -/*-----[ Scheduler list maint -from blkback ]--- */ - -static struct list_head blkio_schedule_list; -static spinlock_t blkio_schedule_list_lock; - -static int __on_blkdev_list(blkif_t *blkif) -{ - return blkif->blkdev_list.next != NULL; -} - -static void remove_from_blkdev_list(blkif_t *blkif) -{ - unsigned long flags; - if ( !__on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if ( __on_blkdev_list(blkif) ) - { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = NULL; - blkif_put(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - -static void add_to_blkdev_list_tail(blkif_t *blkif) -{ - unsigned long flags; - if ( __on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) - { - list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); - blkif_get(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - - -/*-----[ Scheduler functions - from blkback ]--- */ - -static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); - -static int do_block_io_op(blkif_t *blkif, int max_to_do); - -static int blkio_schedule(void *arg) -{ - DECLARE_WAITQUEUE(wq, current); - - blkif_t *blkif; - struct list_head *ent; - - daemonize( - "xentapd" - ); - - for ( ; ; ) - { - /* Wait for work to do. */ - add_wait_queue(&blkio_schedule_wait, &wq); - set_current_state(TASK_INTERRUPTIBLE); - if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || - list_empty(&blkio_schedule_list) ) - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&blkio_schedule_wait, &wq); - - /* Queue up a batch of requests. */ - while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && - !list_empty(&blkio_schedule_list) ) - { - ent = blkio_schedule_list.next; - blkif = list_entry(ent, blkif_t, blkdev_list); - blkif_get(blkif); - remove_from_blkdev_list(blkif); - if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) - add_to_blkdev_list_tail(blkif); - blkif_put(blkif); - } - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - /* Push the batch through to disc. */ - run_task_queue(&tq_disk); -#endif - } -} - -static void maybe_trigger_blkio_schedule(void) -{ - /* - * Needed so that two processes, who together make the following predicate - * true, don't both read stale values and evaluate the predicate - * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... - */ - smp_mb(); - - if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/ - !list_empty(&blkio_schedule_list) ) - wake_up(&blkio_schedule_wait); -} - -void blkif_deschedule(blkif_t *blkif) -{ - remove_from_blkdev_list(blkif); -} - -void __init blkdev_schedule_init(void) -{ - spin_lock_init(&blkio_schedule_list_lock); - INIT_LIST_HEAD(&blkio_schedule_list); - - if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) - BUG(); -} - -/*-----[ Interrupt entry from a frontend ]------ */ - -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) -{ - blkif_t *blkif = dev_id; - - add_to_blkdev_list_tail(blkif); - maybe_trigger_blkio_schedule(); - return IRQ_HANDLED; -} - -/*-----[ Other Frontend Ring functions ]-------- */ - -/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ -static int do_block_io_op(blkif_t *blkif, int max_to_do) -{ - /* we have pending messages from the real frontend. */ - - blkif_request_t *req_s; - RING_IDX i, rp; - unsigned long flags; - active_req_t *ar; - int more_to_do = 0; - int notify_be = 0, notify_user = 0; - - DPRINTK("PT got FE interrupt.\n"); - - if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1; - - /* lock both rings */ - spin_lock_irqsave(&blkif_io_lock, flags); - - rp = blkif->blk_ring.sring->req_prod; - rmb(); - - for ( i = blkif->blk_ring.req_cons; - (i != rp) && - !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i); - i++ ) - { - - if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) - { - more_to_do = 1; - break; - } - - req_s = RING_GET_REQUEST(&blkif->blk_ring, i); - /* This is a new request: - * Assign an active request record, and remap the id. - */ - ar = get_active_req(); - ar->id = req_s->id; - ar->nr_pages = req_s->nr_segments; - blkif_get(blkif); - ar->blkif = blkif; - req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); - /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ - - /* FE -> BE interposition point is here. */ - - /* ------------------------------------------------------------- */ - /* BLKIF_OP_PROBE_HACK: */ - /* Signal to the backend that we are a tap domain. */ - - if (req_s->operation == BLKIF_OP_PROBE) { - DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); - req_s->frame_and_sects[1] = BLKTAP_COOKIE; - } - - /* ------------------------------------------------------------- */ - - /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { - - /* Copy the response message to UFERing */ - /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ - /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ - - DPRINTK("req->UFERing\n"); - blktap_write_fe_ring(req_s); - notify_user = 1; - } - - /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { - - /* be included to prevent noise from the fe when its off */ - /* copy the request message to the BERing */ - - DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", - (unsigned)i & (RING_SIZE(&blktap_be_ring)-1), - (unsigned)blktap_be_ring.req_prod_pvt & - (RING_SIZE((&blktap_be_ring)-1))); - - write_req_to_be_ring(req_s); - notify_be = 1; - } - } - - blkif->blk_ring.req_cons = i; - - /* unlock rings */ - spin_unlock_irqrestore(&blkif_io_lock, flags); - - if (notify_user) - blktap_kick_user(); - if (notify_be) - kick_be_domain(); - - return more_to_do; -} - -/*-----[ Data to/from Backend (server) VM ]------------------------------*/ - - -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, - struct pt_regs *ptregs) -{ - blkif_response_t *resp_s; - blkif_t *blkif; - RING_IDX rp, i; - unsigned long flags; - - DPRINTK("PT got BE interrupt.\n"); - - /* lock both rings */ - spin_lock_irqsave(&blkif_io_lock, flags); - - rp = blktap_be_ring.sring->rsp_prod; - rmb(); - - for ( i = blktap_be_ring.rsp_cons; i != rp; i++) - { - resp_s = RING_GET_RESPONSE(&blktap_be_ring, i); - - /* BE -> FE interposition point is here. */ - - blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; - - /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_COPY_BE) ) { - - /* Copy the response message to UBERing */ - /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ - /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ - - DPRINTK("rsp->UBERing\n"); - blktap_write_be_ring(resp_s); - blktap_kick_user(); - - } - - /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { - - /* (fe included to prevent random interference from the BE) */ - /* Copy the response message to FERing */ - - DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", - (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1), - (unsigned)blkif->blk_ring.rsp_prod_pvt & - (RING_SIZE((&blkif->blk_ring)-1))); - - write_resp_to_fe_ring(blkif, resp_s); - kick_fe_domain(blkif); - - } - } - - blktap_be_ring.rsp_cons = i; - - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; -} - -/* Debug : print the current ring indices. */ - -void print_vm_ring_idxs(void) -{ - int i; - blkif_t *blkif; - - WPRINTK("FE Rings: \n---------\n"); - for ( i = 0; i < 50; i++) { - blkif = blkif_find_by_handle((domid_t)i, 0); - if (blkif != NULL) { - if (blkif->blk_ring.sring != NULL) { - WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", i, - blkif->blk_ring.req_cons, - blkif->blk_ring.rsp_prod_pvt, - blkif->blk_ring.sring->req_prod, - blkif->blk_ring.sring->rsp_prod); - } else { - WPRINTK("%2d: [no device channel yet]\n", i); - } - } - } - if (blktap_be_ring.sring != NULL) { - WPRINTK("BE Ring: \n--------\n"); - WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_be_ring.rsp_cons, - blktap_be_ring.req_prod_pvt, - blktap_be_ring.sring->req_prod, - blktap_be_ring.sring->rsp_prod); - } -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c deleted file mode 100644 index 3cc307fddf..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c +++ /dev/null @@ -1,489 +0,0 @@ -/****************************************************************************** - * blktap_userdev.c - * - * XenLinux virtual block-device tap. - * Control interface between the driver and a character device. - * - * Copyright (c) 2004, Andrew Warfield - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for control ring. */ - -#include "blktap.h" - - -unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; - -/* Only one process may open /dev/xen/blktap at any time. */ -static unsigned long blktap_dev_inuse; -unsigned long blktap_ring_ok; /* make this ring->state */ - -/* for poll: */ -static wait_queue_head_t blktap_wait; - -/* Where things are inside the device mapping. */ -struct vm_area_struct *blktap_vma; -unsigned long mmap_vstart; -unsigned long rings_vstart; - -/* Rings up to user space. */ -static blkif_front_ring_t blktap_ufe_ring; -static blkif_back_ring_t blktap_ube_ring; -static ctrl_front_ring_t blktap_uctrl_ring; - -/* local prototypes */ -static int blktap_read_fe_ring(void); -static int blktap_read_be_ring(void); - -/* -------[ blktap vm ops ]------------------------------------------- */ - -static struct page *blktap_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - /* - * if the page has not been mapped in by the driver then generate - * a SIGBUS to the domain. - */ - - force_sig(SIGBUS, current); - - return 0; -} - -struct vm_operations_struct blktap_vm_ops = { - nopage: blktap_nopage, -}; - -/* -------[ blktap file ops ]----------------------------------------- */ - -static int blktap_open(struct inode *inode, struct file *filp) -{ - blkif_sring_t *sring; - ctrl_sring_t *csring; - - if ( test_and_set_bit(0, &blktap_dev_inuse) ) - return -EBUSY; - - printk(KERN_ALERT "blktap open.\n"); - - /* Allocate the ctrl ring. */ - csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); - if (csring == NULL) - goto fail_nomem; - - SetPageReserved(virt_to_page(csring)); - - SHARED_RING_INIT(csring); - FRONT_RING_INIT(&blktap_uctrl_ring, csring); - - - /* Allocate the fe ring. */ - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); - if (sring == NULL) - goto fail_free_ctrl; - - SetPageReserved(virt_to_page(sring)); - - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blktap_ufe_ring, sring); - - /* Allocate the be ring. */ - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); - if (sring == NULL) - goto fail_free_fe; - - SetPageReserved(virt_to_page(sring)); - - SHARED_RING_INIT(sring); - BACK_RING_INIT(&blktap_ube_ring, sring); - - DPRINTK(KERN_ALERT "blktap open.\n"); - - return 0; - - fail_free_ctrl: - free_page( (unsigned long) blktap_uctrl_ring.sring); - - fail_free_fe: - free_page( (unsigned long) blktap_ufe_ring.sring); - - fail_nomem: - return -ENOMEM; -} - -static int blktap_release(struct inode *inode, struct file *filp) -{ - blktap_dev_inuse = 0; - blktap_ring_ok = 0; - - printk(KERN_ALERT "blktap closed.\n"); - - /* Free the ring page. */ - ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); - free_page((unsigned long) blktap_uctrl_ring.sring); - - ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); - free_page((unsigned long) blktap_ufe_ring.sring); - - ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); - free_page((unsigned long) blktap_ube_ring.sring); - - return 0; -} - -/* Note on mmap: - * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio - * work to do direct page access from userspace, this ended up being a - * problem. The bigger issue seems to be that there is no way to map - * a foreign page in to user space and have the virtual address of that - * page map sanely down to a mfn. - * Removing the VM_IO flag results in a loop in get_user_pages, as - * pfn_valid() always fails on a foreign page. - */ -static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int size; - - printk(KERN_ALERT "blktap mmap (%lx, %lx)\n", - vma->vm_start, vma->vm_end); - - vma->vm_ops = &blktap_vm_ops; - - size = vma->vm_end - vma->vm_start; - if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { - printk(KERN_INFO - "blktap: you _must_ map exactly %d pages!\n", - MMAP_PAGES + RING_PAGES); - return -EAGAIN; - } - - size >>= PAGE_SHIFT; - printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); - - rings_vstart = vma->vm_start; - mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); - - /* Map the ring pages to the start of the region and reserve it. */ - - /* not sure if I really need to do this... */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start, - __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("ctrl_ring: remap_pfn_range failure!\n"); - } - - - DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, - __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("be_ring: remap_pfn_range failure!\n"); - } - - DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), - __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) { - WPRINTK("fe_ring: remap_pfn_range failure!\n"); - } - - blktap_vma = vma; - blktap_ring_ok = 1; - - return 0; -} - -static int blktap_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - switch(cmd) { - case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ - return blktap_read_fe_ring(); - - case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ - return blktap_read_be_ring(); - - case BLKTAP_IOCTL_SETMODE: - if (BLKTAP_MODE_VALID(arg)) { - blktap_mode = arg; - /* XXX: may need to flush rings here. */ - printk(KERN_INFO "blktap: set mode to %lx\n", arg); - return 0; - } - case BLKTAP_IOCTL_PRINT_IDXS: - { - print_vm_ring_idxs(); - WPRINTK("User Rings: \n-----------\n"); - WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_ufe_ring.rsp_cons, - blktap_ufe_ring.req_prod_pvt, - blktap_ufe_ring.sring->req_prod, - blktap_ufe_ring.sring->rsp_prod); - WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_ube_ring.req_cons, - blktap_ube_ring.rsp_prod_pvt, - blktap_ube_ring.sring->req_prod, - blktap_ube_ring.sring->rsp_prod); - - } - } - return -ENOIOCTLCMD; -} - -static unsigned int blktap_poll(struct file *file, poll_table *wait) -{ - poll_wait(file, &blktap_wait, wait); - - if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) || - RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || - RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { - - RING_PUSH_REQUESTS(&blktap_uctrl_ring); - RING_PUSH_REQUESTS(&blktap_ufe_ring); - RING_PUSH_RESPONSES(&blktap_ube_ring); - return POLLIN | POLLRDNORM; - } - - return 0; -} - -void blktap_kick_user(void) -{ - /* blktap_ring->req_prod = blktap_req_prod; */ - wake_up_interruptible(&blktap_wait); -} - -static struct file_operations blktap_fops = { - owner: THIS_MODULE, - poll: blktap_poll, - ioctl: blktap_ioctl, - open: blktap_open, - release: blktap_release, - mmap: blktap_mmap, -}; - -/*-----[ Data to/from user space ]----------------------------------------*/ - - -int blktap_write_fe_ring(blkif_request_t *req) -{ - blkif_request_t *target; - int error, i; - - /* - * This is called to pass a request from the real frontend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: ufe_ring not ready for a request!\n"); - return 0; - } - - if ( RING_FULL(&blktap_ufe_ring) ) { - DPRINTK("blktap: fe_ring is full, can't add.\n"); - return 0; - } - - target = RING_GET_REQUEST(&blktap_ufe_ring, - blktap_ufe_ring.req_prod_pvt); - memcpy(target, req, sizeof(*req)); - - /* Attempt to map the foreign pages directly in to the application */ - for (i=0; inr_segments; i++) { - - error = direct_remap_area_pages(blktap_vma->vm_mm, - MMAP_VADDR(ID_TO_IDX(req->id), i), - target->frame_and_sects[i] & PAGE_MASK, - PAGE_SIZE, - blktap_vma->vm_page_prot, - ID_TO_DOM(req->id)); - if ( error != 0 ) { - printk(KERN_INFO "remapping attached page failed! (%d)\n", error); - /* the request is now dropped on the floor. */ - return 0; - } - } - - blktap_ufe_ring.req_prod_pvt++; - - return 0; -} - -int blktap_write_be_ring(blkif_response_t *rsp) -{ - blkif_response_t *target; - - /* - * This is called to pass a request from the real backend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: be_ring not ready for a request!\n"); - return 0; - } - - /* No test for fullness in the response direction. */ - - target = RING_GET_RESPONSE(&blktap_ube_ring, - blktap_ube_ring.rsp_prod_pvt); - memcpy(target, rsp, sizeof(*rsp)); - - /* no mapping -- pages were mapped in blktap_write_fe_ring() */ - - blktap_ube_ring.rsp_prod_pvt++; - - return 0; -} - -static void blktap_fast_flush_area(int idx, int nr_pages) -{ - multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; - int i; - - for ( i = 0; i < nr_pages; i++ ) - { - mcl[i].op = __HYPERVISOR_update_va_mapping; - mcl[i].args[0] = MMAP_VADDR(idx, i); - mcl[i].args[1] = 0; - mcl[i].args[2] = 0; - } - - mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) - BUG(); -} - -static int blktap_read_fe_ring(void) -{ - /* This is called to read responses from the UFE ring. */ - - RING_IDX i, rp; - blkif_response_t *resp_s; - blkif_t *blkif; - active_req_t *ar; - - DPRINTK("blktap_read_fe_ring()\n"); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { - - /* for each outstanding message on the UFEring */ - rp = blktap_ufe_ring.sring->rsp_prod; - rmb(); - - for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) - { - resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i); - - DPRINTK("resp->fe_ring\n"); - ar = lookup_active_req(ID_TO_IDX(resp_s->id)); - blkif = ar->blkif; - blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); - write_resp_to_fe_ring(blkif, resp_s); - kick_fe_domain(blkif); - } - - blktap_ufe_ring.rsp_cons = i; - } - return 0; -} - -static int blktap_read_be_ring(void) -{ - /* This is called to read requests from the UBE ring. */ - - RING_IDX i, rp; - blkif_request_t *req_s; - - DPRINTK("blktap_read_be_ring()\n"); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { - - /* for each outstanding message on the UFEring */ - rp = blktap_ube_ring.sring->req_prod; - rmb(); - for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) - { - req_s = RING_GET_REQUEST(&blktap_ube_ring, i); - - DPRINTK("req->be_ring\n"); - write_req_to_be_ring(req_s); - kick_be_domain(); - } - - blktap_ube_ring.req_cons = i; - } - - return 0; -} - -int blktap_write_ctrl_ring(ctrl_msg_t *msg) -{ - ctrl_msg_t *target; - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: be_ring not ready for a request!\n"); - return 0; - } - - /* No test for fullness in the response direction. */ - - target = RING_GET_REQUEST(&blktap_uctrl_ring, - blktap_uctrl_ring.req_prod_pvt); - memcpy(target, msg, sizeof(*msg)); - - blktap_uctrl_ring.req_prod_pvt++; - - /* currently treat the ring as unidirectional. */ - blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; - - return 0; - -} - -/* -------[ blktap module setup ]------------------------------------- */ - -static struct miscdevice blktap_miscdev = { - .minor = BLKTAP_MINOR, - .name = "blktap", - .fops = &blktap_fops, - .devfs_name = "misc/blktap", -}; - -int blktap_init(void) -{ - int err; - - err = misc_register(&blktap_miscdev); - if ( err != 0 ) - { - printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); - return err; - } - - init_waitqueue_head(&blktap_wait); - - - return 0; -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h b/linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h deleted file mode 100644 index bcab2041bc..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h +++ /dev/null @@ -1,85 +0,0 @@ - -#ifndef __USBIF__BACKEND__COMMON_H__ -#define __USBIF__BACKEND__COMMON_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#if 0 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } -#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) -#else -#define ASSERT(_p) ((void)0) -#define DPRINTK(_f, _a...) ((void)0) -#endif - -typedef struct usbif_priv_st usbif_priv_t; - -struct usbif_priv_st { - /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; - /* Physical parameters of the comms window. */ - unsigned long shmem_frame; - unsigned int evtchn; - int irq; - /* Comms Information */ - usbif_back_ring_t usb_ring; - /* Private fields. */ - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; - /* - * DISCONNECT response is deferred until pending requests are ack'ed. - * We therefore need to store the id from the original request. - */ - u8 disconnect_rspid; - usbif_priv_t *hash_next; - struct list_head usbif_list; - spinlock_t usb_ring_lock; - atomic_t refcnt; - - struct work_struct work; -}; - -void usbif_create(usbif_be_create_t *create); -void usbif_destroy(usbif_be_destroy_t *destroy); -void usbif_connect(usbif_be_connect_t *connect); -int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id); -void usbif_disconnect_complete(usbif_priv_t *up); - -void usbif_release_port(usbif_be_release_port_t *msg); -int usbif_claim_port(usbif_be_claim_port_t *msg); -void usbif_release_ports(usbif_priv_t *up); - -usbif_priv_t *usbif_find(domid_t domid); -#define usbif_get(_b) (atomic_inc(&(_b)->refcnt)) -#define usbif_put(_b) \ - do { \ - if ( atomic_dec_and_test(&(_b)->refcnt) ) \ - usbif_disconnect_complete(_b); \ - } while (0) - - -void usbif_interface_init(void); -void usbif_ctrlif_init(void); - -void usbif_deschedule(usbif_priv_t *up); -void remove_from_usbif_list(usbif_priv_t *up); - -irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs); - -#endif /* __USBIF__BACKEND__COMMON_H__ */ diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c deleted file mode 100644 index 899394a629..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c +++ /dev/null @@ -1,77 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/usbif/backend/control.c - * - * Routines for interfacing with the control plane. - * - * Copyright (c) 2004, Keir Fraser - */ - -#include "common.h" - -static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype); - - switch ( msg->subtype ) - { - case CMSG_USBIF_BE_CREATE: - if ( msg->length != sizeof(usbif_be_create_t) ) - goto parse_error; - usbif_create((usbif_be_create_t *)&msg->msg[0]); - break; - case CMSG_USBIF_BE_DESTROY: - if ( msg->length != sizeof(usbif_be_destroy_t) ) - goto parse_error; - usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]); - break; - case CMSG_USBIF_BE_CONNECT: - if ( msg->length != sizeof(usbif_be_connect_t) ) - goto parse_error; - usbif_connect((usbif_be_connect_t *)&msg->msg[0]); - break; - case CMSG_USBIF_BE_DISCONNECT: - if ( msg->length != sizeof(usbif_be_disconnect_t) ) - goto parse_error; - if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) ) - return; /* Sending the response is deferred until later. */ - break; - case CMSG_USBIF_BE_CLAIM_PORT: - if ( msg->length != sizeof(usbif_be_claim_port_t) ) - goto parse_error; - usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]); - break; - case CMSG_USBIF_BE_RELEASE_PORT: - if ( msg->length != sizeof(usbif_be_release_port_t) ) - goto parse_error; - usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]); - break; - default: - goto parse_error; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - DPRINTK("Parse error while reading message subtype %d, len %d\n", - msg->subtype, msg->length); - msg->length = 0; - ctrl_if_send_response(msg); -} - -void usbif_ctrlif_init(void) -{ - ctrl_msg_t cmsg; - usbif_be_driver_status_changed_t st; - - (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_USBIF_BE; - cmsg.subtype = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED; - cmsg.length = sizeof(usbif_be_driver_status_changed_t); - st.status = USBIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c deleted file mode 100644 index 4630da8951..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c +++ /dev/null @@ -1,252 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/usbif/backend/interface.c - * - * USB device interface management. - * - * by Mark Williamson, Copyright (c) 2004 - */ - - -/****************************************************************************** - * arch/xen/drivers/blkif/backend/interface.c - * - * Block-device interface management. - * - * Copyright (c) 2004, Keir Fraser - */ - -#include "common.h" - -#define USBIF_HASHSZ 1024 -#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1)) - -static kmem_cache_t *usbif_priv_cachep; -static usbif_priv_t *usbif_priv_hash[USBIF_HASHSZ]; - -usbif_priv_t *usbif_find(domid_t domid) -{ - usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)]; - while ( (up != NULL ) && ( up->domid != domid ) ) - up = up->hash_next; - return up; -} - -static void __usbif_disconnect_complete(void *arg) -{ - usbif_priv_t *usbif = (usbif_priv_t *)arg; - ctrl_msg_t cmsg; - usbif_be_disconnect_t disc; - - /* - * These can't be done in usbif_disconnect() because at that point there - * may be outstanding requests at the device whose asynchronous responses - * must still be notified to the remote driver. - */ - unbind_evtchn_from_irq(usbif->evtchn); - vfree(usbif->usb_ring.sring); - - /* Construct the deferred response message. */ - cmsg.type = CMSG_USBIF_BE; - cmsg.subtype = CMSG_USBIF_BE_DISCONNECT; - cmsg.id = usbif->disconnect_rspid; - cmsg.length = sizeof(usbif_be_disconnect_t); - disc.domid = usbif->domid; - disc.status = USBIF_BE_STATUS_OKAY; - memcpy(cmsg.msg, &disc, sizeof(disc)); - - /* - * Make sure message is constructed /before/ status change, because - * after the status change the 'usbif' structure could be deallocated at - * any time. Also make sure we send the response /after/ status change, - * as otherwise a subsequent CONNECT request could spuriously fail if - * another CPU doesn't see the status change yet. - */ - mb(); - if ( usbif->status != DISCONNECTING ) - BUG(); - usbif->status = DISCONNECTED; - mb(); - - /* Send the successful response. */ - ctrl_if_send_response(&cmsg); -} - -void usbif_disconnect_complete(usbif_priv_t *up) -{ - INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up); - schedule_work(&up->work); -} - -void usbif_create(usbif_be_create_t *create) -{ - domid_t domid = create->domid; - usbif_priv_t **pup, *up; - - if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL ) - { - DPRINTK("Could not create usbif: out of memory\n"); - create->status = USBIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - memset(up, 0, sizeof(*up)); - up->domid = domid; - up->status = DISCONNECTED; - spin_lock_init(&up->usb_ring_lock); - atomic_set(&up->refcnt, 0); - - pup = &usbif_priv_hash[USBIF_HASH(domid)]; - while ( *pup != NULL ) - { - if ( (*pup)->domid == domid ) - { - create->status = USBIF_BE_STATUS_INTERFACE_EXISTS; - kmem_cache_free(usbif_priv_cachep, up); - return; - } - pup = &(*pup)->hash_next; - } - - up->hash_next = *pup; - *pup = up; - - create->status = USBIF_BE_STATUS_OKAY; -} - -void usbif_destroy(usbif_be_destroy_t *destroy) -{ - domid_t domid = destroy->domid; - usbif_priv_t **pup, *up; - - pup = &usbif_priv_hash[USBIF_HASH(domid)]; - while ( (up = *pup) != NULL ) - { - if ( up->domid == domid ) - { - if ( up->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pup = &up->hash_next; - } - - destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: - *pup = up->hash_next; - usbif_release_ports(up); - kmem_cache_free(usbif_priv_cachep, up); - destroy->status = USBIF_BE_STATUS_OKAY; -} - -void usbif_connect(usbif_be_connect_t *connect) -{ - domid_t domid = connect->domid; - unsigned int evtchn = connect->evtchn; - unsigned long shmem_frame = connect->shmem_frame; - struct vm_struct *vma; - pgprot_t prot; - int error; - usbif_priv_t *up; - usbif_sring_t *sring; - - up = usbif_find(domid); - if ( unlikely(up == NULL) ) - { - DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", - connect->domid); - connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) - { - connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<status = USBIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) - connect->status = USBIF_BE_STATUS_MAPPING_ERROR; - else - connect->status = USBIF_BE_STATUS_ERROR; - vfree(vma->addr); - return; - } - - if ( up->status != DISCONNECTED ) - { - connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED; - vfree(vma->addr); - return; - } - - sring = (usbif_sring_t *)vma->addr; - SHARED_RING_INIT(sring); - BACK_RING_INIT(&up->usb_ring, sring); - - up->evtchn = evtchn; - up->irq = bind_evtchn_to_irq(evtchn); - up->shmem_frame = shmem_frame; - up->status = CONNECTED; - usbif_get(up); - - request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up); - - connect->status = USBIF_BE_STATUS_OKAY; -} - -/* Remove URBs for this interface before destroying it. */ -void usbif_deschedule(usbif_priv_t *up) -{ - remove_from_usbif_list(up); -} - -int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id) -{ - domid_t domid = disconnect->domid; - usbif_priv_t *up; - - up = usbif_find(domid); - if ( unlikely(up == NULL) ) - { - DPRINTK("usbif_disconnect attempted for non-existent usbif" - " (%u)\n", disconnect->domid); - disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; - return 1; /* Caller will send response error message. */ - } - - if ( up->status == CONNECTED ) - { - up->status = DISCONNECTING; - up->disconnect_rspid = rsp_id; - wmb(); /* Let other CPUs see the status change. */ - free_irq(up->irq, up); - usbif_deschedule(up); - usbif_put(up); - return 0; /* Caller should not send response message. */ - } - - disconnect->status = USBIF_BE_STATUS_OKAY; - return 1; -} - -void __init usbif_interface_init(void) -{ - usbif_priv_cachep = kmem_cache_create("usbif_priv_cache", - sizeof(usbif_priv_t), - 0, 0, NULL, NULL); - memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash)); -} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c deleted file mode 100644 index b039b4506b..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c +++ /dev/null @@ -1,1070 +0,0 @@ -/****************************************************************************** - * arch/xen/drivers/usbif/backend/main.c - * - * Backend for the Xen virtual USB driver - provides an abstraction of a - * USB host controller to the corresponding frontend driver. - * - * by Mark Williamson - * Copyright (c) 2004 Intel Research Cambridge - * Copyright (c) 2004, 2005 Mark Williamson - * - * Based on arch/xen/drivers/blkif/backend/main.c - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - */ - -#include "common.h" - - -#include -#include -#include -#include -#include - -/* - * This is rather arbitrary. - */ -#define MAX_PENDING_REQS 4 -#define BATCH_PER_DOMAIN 1 - -static unsigned long mmap_vstart; - -/* Needs to be sufficiently large that we can map the (large) buffers - * the USB mass storage driver wants. */ -#define MMAP_PAGES_PER_REQUEST \ - (128) -#define MMAP_PAGES \ - (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) - -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - - -static spinlock_t owned_ports_lock; -LIST_HEAD(owned_ports); - -/* A list of these structures is used to track ownership of physical USB - * ports. */ -typedef struct -{ - usbif_priv_t *usbif_priv; - char path[16]; - int guest_port; - int enabled; - struct list_head list; - unsigned long guest_address; /* The USB device address that has been - * assigned by the guest. */ - int dev_present; /* Is there a device present? */ - struct usb_device * dev; - unsigned long ifaces; /* What interfaces are present on this device? */ -} owned_port_t; - - -/* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. The request is complete, the specified - * domain has a response queued for it, with the saved 'id' passed back. - */ -typedef struct { - usbif_priv_t *usbif_priv; - unsigned long id; - int nr_pages; - unsigned short operation; - int status; -} pending_req_t; - -/* - * We can't allocate pending_req's in order, since they may complete out of - * order. We therefore maintain an allocation ring. This ring also indicates - * when enough work has been passed down -- at that point the allocation ring - * will be empty. - */ -static pending_req_t pending_reqs[MAX_PENDING_REQS]; -static unsigned char pending_ring[MAX_PENDING_REQS]; -static spinlock_t pend_prod_lock; - -/* NB. We use a different index type to differentiate from shared usb rings. */ -typedef unsigned int PEND_RING_IDX; -#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) -static PEND_RING_IDX pending_prod, pending_cons; -#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) - -static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do); -static void make_response(usbif_priv_t *usbif, unsigned long id, - unsigned short op, int st, int inband, - unsigned long actual_length); -static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port); -static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req); -static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid); -static owned_port_t *usbif_find_port(char *); - -/****************************************************************** - * PRIVATE DEBUG FUNCTIONS - */ - -#undef DEBUG -#ifdef DEBUG - -static void dump_port(owned_port_t *p) -{ - printk(KERN_DEBUG "owned_port_t @ %p\n" - " usbif_priv @ %p\n" - " path: %s\n" - " guest_port: %d\n" - " guest_address: %ld\n" - " dev_present: %d\n" - " dev @ %p\n" - " ifaces: 0x%lx\n", - p, p->usbif_priv, p->path, p->guest_port, p->guest_address, - p->dev_present, p->dev, p->ifaces); -} - - -static void dump_request(usbif_request_t *req) -{ - printk(KERN_DEBUG "id = 0x%lx\n" - "devnum %d\n" - "endpoint 0x%x\n" - "direction %d\n" - "speed %d\n" - "pipe_type 0x%x\n" - "transfer_buffer 0x%lx\n" - "length 0x%lx\n" - "transfer_flags 0x%lx\n" - "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n" - "iso_schedule = 0x%lx\n" - "num_iso %ld\n", - req->id, req->devnum, req->endpoint, req->direction, req->speed, - req->pipe_type, req->transfer_buffer, req->length, - req->transfer_flags, req->setup[0], req->setup[1], req->setup[2], - req->setup[3], req->setup[4], req->setup[5], req->setup[6], - req->setup[7], req->iso_schedule, req->num_iso); -} - -static void dump_urb(struct urb *urb) -{ - printk(KERN_DEBUG "dumping urb @ %p\n", urb); - -#define DUMP_URB_FIELD(name, format) \ - printk(KERN_DEBUG " " # name " " format "\n", urb-> name) - - DUMP_URB_FIELD(pipe, "0x%x"); - DUMP_URB_FIELD(status, "%d"); - DUMP_URB_FIELD(transfer_flags, "0x%x"); - DUMP_URB_FIELD(transfer_buffer, "%p"); - DUMP_URB_FIELD(transfer_buffer_length, "%d"); - DUMP_URB_FIELD(actual_length, "%d"); -} - -static void dump_response(usbif_response_t *resp) -{ - printk(KERN_DEBUG "usbback: Sending response:\n" - " id = 0x%x\n" - " op = %d\n" - " status = %d\n" - " data = %d\n" - " length = %d\n", - resp->id, resp->op, resp->status, resp->data, resp->length); -} - -#else /* DEBUG */ - -#define dump_port(blah) ((void)0) -#define dump_request(blah) ((void)0) -#define dump_urb(blah) ((void)0) -#define dump_response(blah) ((void)0) - -#endif /* DEBUG */ - -/****************************************************************** - * MEMORY MANAGEMENT - */ - -static void fast_flush_area(int idx, int nr_pages) -{ - multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; - int i; - - for ( i = 0; i < nr_pages; i++ ) - { - mcl[i].op = __HYPERVISOR_update_va_mapping; - mcl[i].args[0] = MMAP_VADDR(idx, i); - mcl[i].args[1] = 0; - mcl[i].args[2] = 0; - } - - mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) - BUG(); -} - - -/****************************************************************** - * USB INTERFACE SCHEDULER LIST MAINTENANCE - */ - -static struct list_head usbio_schedule_list; -static spinlock_t usbio_schedule_list_lock; - -static int __on_usbif_list(usbif_priv_t *up) -{ - return up->usbif_list.next != NULL; -} - -void remove_from_usbif_list(usbif_priv_t *up) -{ - unsigned long flags; - if ( !__on_usbif_list(up) ) return; - spin_lock_irqsave(&usbio_schedule_list_lock, flags); - if ( __on_usbif_list(up) ) - { - list_del(&up->usbif_list); - up->usbif_list.next = NULL; - usbif_put(up); - } - spin_unlock_irqrestore(&usbio_schedule_list_lock, flags); -} - -static void add_to_usbif_list_tail(usbif_priv_t *up) -{ - unsigned long flags; - if ( __on_usbif_list(up) ) return; - spin_lock_irqsave(&usbio_schedule_list_lock, flags); - if ( !__on_usbif_list(up) && (up->status == CONNECTED) ) - { - list_add_tail(&up->usbif_list, &usbio_schedule_list); - usbif_get(up); - } - spin_unlock_irqrestore(&usbio_schedule_list_lock, flags); -} - -void free_pending(int pending_idx) -{ - unsigned long flags; - - /* Free the pending request. */ - spin_lock_irqsave(&pend_prod_lock, flags); - pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; - spin_unlock_irqrestore(&pend_prod_lock, flags); -} - -/****************************************************************** - * COMPLETION CALLBACK -- Called as urb->complete() - */ - -static void maybe_trigger_usbio_schedule(void); - -static void __end_usb_io_op(struct urb *purb) -{ - pending_req_t *pending_req; - int pending_idx; - - pending_req = purb->context; - - pending_idx = pending_req - pending_reqs; - - ASSERT(purb->actual_length <= purb->transfer_buffer_length); - ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE); - - /* An error fails the entire request. */ - if ( purb->status ) - { - printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status); - } - - if ( usb_pipetype(purb->pipe) == 0 ) - { - int i; - usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1); - - /* If we're dealing with an iso pipe, we need to copy back the schedule. */ - for ( i = 0; i < purb->number_of_packets; i++ ) - { - sched[i].length = purb->iso_frame_desc[i].actual_length; - ASSERT(sched[i].buffer_offset == - purb->iso_frame_desc[i].offset); - sched[i].status = purb->iso_frame_desc[i].status; - } - } - - fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages); - - kfree(purb->setup_packet); - - make_response(pending_req->usbif_priv, pending_req->id, - pending_req->operation, pending_req->status, 0, purb->actual_length); - usbif_put(pending_req->usbif_priv); - - usb_free_urb(purb); - - free_pending(pending_idx); - - rmb(); - - /* Check for anything still waiting in the rings, having freed a request... */ - maybe_trigger_usbio_schedule(); -} - -/****************************************************************** - * SCHEDULER FUNCTIONS - */ - -static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait); - -static int usbio_schedule(void *arg) -{ - DECLARE_WAITQUEUE(wq, current); - - usbif_priv_t *up; - struct list_head *ent; - - daemonize(); - - for ( ; ; ) - { - /* Wait for work to do. */ - add_wait_queue(&usbio_schedule_wait, &wq); - set_current_state(TASK_INTERRUPTIBLE); - if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || - list_empty(&usbio_schedule_list) ) - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&usbio_schedule_wait, &wq); - - /* Queue up a batch of requests. */ - while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && - !list_empty(&usbio_schedule_list) ) - { - ent = usbio_schedule_list.next; - up = list_entry(ent, usbif_priv_t, usbif_list); - usbif_get(up); - remove_from_usbif_list(up); - if ( do_usb_io_op(up, BATCH_PER_DOMAIN) ) - add_to_usbif_list_tail(up); - usbif_put(up); - } - } -} - -static void maybe_trigger_usbio_schedule(void) -{ - /* - * Needed so that two processes, who together make the following predicate - * true, don't both read stale values and evaluate the predicate - * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... - */ - smp_mb(); - - if ( !list_empty(&usbio_schedule_list) ) - wake_up(&usbio_schedule_wait); -} - - -/****************************************************************************** - * NOTIFICATION FROM GUEST OS. - */ - -irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs) -{ - usbif_priv_t *up = dev_id; - - smp_mb(); - - add_to_usbif_list_tail(up); - - /* Will in fact /always/ trigger an io schedule in this case. */ - maybe_trigger_usbio_schedule(); - - return IRQ_HANDLED; -} - - - -/****************************************************************** - * DOWNWARD CALLS -- These interface with the usb-device layer proper. - */ - -static int do_usb_io_op(usbif_priv_t *up, int max_to_do) -{ - usbif_back_ring_t *usb_ring = &up->usb_ring; - usbif_request_t *req; - RING_IDX i, rp; - int more_to_do = 0; - - rp = usb_ring->sring->req_prod; - rmb(); /* Ensure we see queued requests up to 'rp'. */ - - /* Take items off the comms ring, taking care not to overflow. */ - for ( i = usb_ring->req_cons; - (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i); - i++ ) - { - if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) - { - more_to_do = 1; - break; - } - - req = RING_GET_REQUEST(usb_ring, i); - - switch ( req->operation ) - { - case USBIF_OP_PROBE: - dispatch_usb_probe(up, req->id, req->port); - break; - - case USBIF_OP_IO: - /* Assemble an appropriate URB. */ - dispatch_usb_io(up, req); - break; - - case USBIF_OP_RESET: - dispatch_usb_reset(up, req->port); - break; - - default: - DPRINTK("error: unknown USB io operation [%d]\n", - req->operation); - make_response(up, req->id, req->operation, -EINVAL, 0, 0); - break; - } - } - - usb_ring->req_cons = i; - - return more_to_do; -} - -static owned_port_t *find_guest_port(usbif_priv_t *up, int port) -{ - unsigned long flags; - struct list_head *l; - - spin_lock_irqsave(&owned_ports_lock, flags); - list_for_each(l, &owned_ports) - { - owned_port_t *p = list_entry(l, owned_port_t, list); - if(p->usbif_priv == up && p->guest_port == port) - { - spin_unlock_irqrestore(&owned_ports_lock, flags); - return p; - } - } - spin_unlock_irqrestore(&owned_ports_lock, flags); - - return NULL; -} - -static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid) -{ - owned_port_t *port = find_guest_port(up, portid); - int ret = 0; - - - /* Allowing the guest to actually reset the device causes more problems - * than it's worth. We just fake it out in software but we will do a real - * reset when the interface is destroyed. */ - - dump_port(port); - - port->guest_address = 0; - /* If there's an attached device then the port is now enabled. */ - if ( port->dev_present ) - port->enabled = 1; - else - port->enabled = 0; - - make_response(up, 0, USBIF_OP_RESET, ret, 0, 0); -} - -static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid) -{ - owned_port_t *port = find_guest_port(up, portid); - int ret; - - if ( port != NULL ) - ret = port->dev_present; - else - { - ret = -EINVAL; - printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request " - "(port %ld)\n", portid); - } - - /* Probe result is sent back in-band. Probes don't have an associated id - * right now... */ - make_response(up, id, USBIF_OP_PROBE, ret, portid, 0); -} - -/** - * check_iso_schedule - safety check the isochronous schedule for an URB - * @purb : the URB in question - */ -static int check_iso_schedule(struct urb *purb) -{ - int i; - unsigned long total_length = 0; - - for ( i = 0; i < purb->number_of_packets; i++ ) - { - struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i]; - - if ( desc->offset >= purb->transfer_buffer_length - || ( desc->offset + desc->length) > purb->transfer_buffer_length ) - return -EINVAL; - - total_length += desc->length; - - if ( total_length > purb->transfer_buffer_length ) - return -EINVAL; - } - - return 0; -} - -owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req); - -static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req) -{ - unsigned long buffer_mach; - int i = 0, offset = 0, - pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - pending_req_t *pending_req; - unsigned long remap_prot; - multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; - struct urb *purb = NULL; - owned_port_t *port; - unsigned char *setup; - - dump_request(req); - - if ( NR_PENDING_REQS == MAX_PENDING_REQS ) - { - printk(KERN_WARNING "usbback: Max requests already queued. " - "Giving up!\n"); - - return; - } - - port = find_port_for_request(up, req); - - if ( port == NULL ) - { - printk(KERN_WARNING "No such device! (%d)\n", req->devnum); - dump_request(req); - - make_response(up, req->id, req->operation, -ENODEV, 0, 0); - return; - } - else if ( !port->dev_present ) - { - /* In normal operation, we'll only get here if a device is unplugged - * and the frontend hasn't noticed yet. */ - make_response(up, req->id, req->operation, -ENODEV, 0, 0); - return; - } - - - setup = kmalloc(8, GFP_KERNEL); - - if ( setup == NULL ) - goto no_mem; - - /* Copy request out for safety. */ - memcpy(setup, req->setup, 8); - - if( setup[0] == 0x0 && setup[1] == 0x5) - { - /* To virtualise the USB address space, we need to intercept - * set_address messages and emulate. From the USB specification: - * bmRequestType = 0x0; - * Brequest = SET_ADDRESS (i.e. 0x5) - * wValue = device address - * wIndex = 0 - * wLength = 0 - * data = None - */ - /* Store into the guest transfer buffer using cpu_to_le16 */ - port->guest_address = le16_to_cpu(*(u16 *)(setup + 2)); - /* Make a successful response. That was easy! */ - - make_response(up, req->id, req->operation, 0, 0, 0); - - kfree(setup); - return; - } - else if ( setup[0] == 0x0 && setup[1] == 0x9 ) - { - /* The host kernel needs to know what device configuration is in use - * because various error checks get confused otherwise. We just do - * configuration settings here, under controlled conditions. - */ - - /* Ignore configuration setting and hope that the host kernel - did it right. */ - /* usb_set_configuration(port->dev, setup[2]); */ - - make_response(up, req->id, req->operation, 0, 0, 0); - - kfree(setup); - return; - } - else if ( setup[0] == 0x1 && setup[1] == 0xB ) - { - /* The host kernel needs to know what device interface is in use - * because various error checks get confused otherwise. We just do - * configuration settings here, under controlled conditions. - */ - usb_set_interface(port->dev, (setup[4] | setup[5] << 8), - (setup[2] | setup[3] << 8) ); - - make_response(up, req->id, req->operation, 0, 0, 0); - - kfree(setup); - return; - } - - if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK) - + req->length ) - > MMAP_PAGES_PER_REQUEST * PAGE_SIZE ) - { - printk(KERN_WARNING "usbback: request of %lu bytes too large\n", - req->length); - make_response(up, req->id, req->operation, -EINVAL, 0, 0); - kfree(setup); - return; - } - - buffer_mach = req->transfer_buffer; - - if( buffer_mach == 0 ) - goto no_remap; - - ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST); - ASSERT(buffer_mach); - - /* Always map writeable for now. */ - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; - - for ( i = 0, offset = 0; offset < req->length; - i++, offset += PAGE_SIZE ) - { - mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain; - mcl[i].args[0] = MMAP_VADDR(pending_idx, i); - mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot; - mcl[i].args[2] = 0; - mcl[i].args[3] = up->domid; - - phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = - FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT); - - ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i)) - == buffer_mach + i << PAGE_SHIFT); - } - - if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */ - { - /* Map in ISO schedule, if necessary. */ - mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain; - mcl[i].args[0] = MMAP_VADDR(pending_idx, i); - mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot; - mcl[i].args[2] = 0; - mcl[i].args[3] = up->domid; - - phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = - FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT); - - i++; - } - - if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) ) - BUG(); - - { - int j; - for ( j = 0; j < i; j++ ) - { - if ( unlikely(mcl[j].args[5] != 0) ) - { - printk(KERN_WARNING - "invalid buffer %d -- could not remap it\n", j); - fast_flush_area(pending_idx, i); - goto bad_descriptor; - } - } - } - - no_remap: - - ASSERT(i <= MMAP_PAGES_PER_REQUEST); - ASSERT(i * PAGE_SIZE >= req->length); - - /* We have to do this because some things might complete out of order. */ - pending_req = &pending_reqs[pending_idx]; - pending_req->usbif_priv= up; - pending_req->id = req->id; - pending_req->operation = req->operation; - pending_req->nr_pages = i; - - pending_cons++; - - usbif_get(up); - - /* Fill out an actual request for the USB layer. */ - purb = usb_alloc_urb(req->num_iso); - - if ( purb == NULL ) - { - usbif_put(up); - free_pending(pending_idx); - goto no_mem; - } - - purb->dev = port->dev; - purb->context = pending_req; - purb->transfer_buffer = - (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK)); - if(buffer_mach == 0) - purb->transfer_buffer = NULL; - purb->complete = __end_usb_io_op; - purb->transfer_buffer_length = req->length; - purb->transfer_flags = req->transfer_flags; - - purb->pipe = 0; - purb->pipe |= req->direction << 7; - purb->pipe |= port->dev->devnum << 8; - purb->pipe |= req->speed << 26; - purb->pipe |= req->pipe_type << 30; - purb->pipe |= req->endpoint << 15; - - purb->number_of_packets = req->num_iso; - - if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE ) - goto urb_error; - - /* Make sure there's always some kind of timeout. */ - purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000 - : 1000; - - purb->setup_packet = setup; - - if ( req->pipe_type == 0 ) /* ISO */ - { - int j; - usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1); - - /* If we're dealing with an iso pipe, we need to copy in a schedule. */ - for ( j = 0; j < purb->number_of_packets; j++ ) - { - purb->iso_frame_desc[j].length = iso_sched[j].length; - purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset; - iso_sched[j].status = 0; - } - } - - if ( check_iso_schedule(purb) != 0 ) - goto urb_error; - - if ( usb_submit_urb(purb) != 0 ) - goto urb_error; - - return; - - urb_error: - dump_urb(purb); - usbif_put(up); - free_pending(pending_idx); - - bad_descriptor: - kfree ( setup ); - if ( purb != NULL ) - usb_free_urb(purb); - make_response(up, req->id, req->operation, -EINVAL, 0, 0); - return; - - no_mem: - if ( setup != NULL ) - kfree(setup); - make_response(up, req->id, req->operation, -ENOMEM, 0, 0); - return; -} - - - -/****************************************************************** - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING - */ - - -static void make_response(usbif_priv_t *up, unsigned long id, - unsigned short op, int st, int inband, - unsigned long length) -{ - usbif_response_t *resp; - unsigned long flags; - usbif_back_ring_t *usb_ring = &up->usb_ring; - - /* Place on the response ring for the relevant domain. */ - spin_lock_irqsave(&up->usb_ring_lock, flags); - resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt); - resp->id = id; - resp->operation = op; - resp->status = st; - resp->data = inband; - resp->length = length; - wmb(); /* Ensure other side can see the response fields. */ - - dump_response(resp); - - usb_ring->rsp_prod_pvt++; - RING_PUSH_RESPONSES(usb_ring); - spin_unlock_irqrestore(&up->usb_ring_lock, flags); - - /* Kick the relevant domain. */ - notify_via_evtchn(up->evtchn); -} - -/** - * usbif_claim_port - claim devices on a port on behalf of guest - * - * Once completed, this will ensure that any device attached to that - * port is claimed by this driver for use by the guest. - */ -int usbif_claim_port(usbif_be_claim_port_t *msg) -{ - owned_port_t *o_p; - - /* Sanity... */ - if ( usbif_find_port(msg->path) != NULL ) - { - printk(KERN_WARNING "usbback: Attempted to claim USB port " - "we already own!\n"); - return -EINVAL; - } - - /* No need for a slab cache - this should be infrequent. */ - o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL); - - if ( o_p == NULL ) - return -ENOMEM; - - o_p->enabled = 0; - o_p->usbif_priv = usbif_find(msg->domid); - o_p->guest_port = msg->usbif_port; - o_p->dev_present = 0; - o_p->guest_address = 0; /* Default address. */ - - strcpy(o_p->path, msg->path); - - spin_lock_irq(&owned_ports_lock); - - list_add(&o_p->list, &owned_ports); - - spin_unlock_irq(&owned_ports_lock); - - printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path, - msg->domid, msg->usbif_port); - - /* Force a reprobe for unclaimed devices. */ - usb_scan_devices(); - - return 0; -} - -owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req) -{ - unsigned long flags; - struct list_head *port; - - /* I'm assuming this is not called from IRQ context - correct? I think - * it's probably only called in response to control messages or plug events - * in the USB hub kernel thread, so should be OK. */ - spin_lock_irqsave(&owned_ports_lock, flags); - list_for_each(port, &owned_ports) - { - owned_port_t *p = list_entry(port, owned_port_t, list); - if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled ) - { - dump_port(p); - - spin_unlock_irqrestore(&owned_ports_lock, flags); - return p; - } - } - spin_unlock_irqrestore(&owned_ports_lock, flags); - - return NULL; -} - -owned_port_t *__usbif_find_port(char *path) -{ - struct list_head *port; - - list_for_each(port, &owned_ports) - { - owned_port_t *p = list_entry(port, owned_port_t, list); - if(!strcmp(path, p->path)) - { - return p; - } - } - - return NULL; -} - -owned_port_t *usbif_find_port(char *path) -{ - owned_port_t *ret; - unsigned long flags; - - spin_lock_irqsave(&owned_ports_lock, flags); - ret = __usbif_find_port(path); - spin_unlock_irqrestore(&owned_ports_lock, flags); - - return ret; -} - - -static void *probe(struct usb_device *dev, unsigned iface, - const struct usb_device_id *id) -{ - owned_port_t *p; - - /* We don't care what the device is - if we own the port, we want it. We - * don't deal with device-specifics in this driver, so we don't care what - * the device actually is ;-) */ - if ( ( p = usbif_find_port(dev->devpath) ) != NULL ) - { - printk(KERN_INFO "usbback: claimed device attached to owned port\n"); - - p->dev_present = 1; - p->dev = dev; - set_bit(iface, &p->ifaces); - - return p->usbif_priv; - } - else - printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n", - dev->devpath); - - - return NULL; -} - -static void disconnect(struct usb_device *dev, void *usbif) -{ - /* Note the device is removed so we can tell the guest when it probes. */ - owned_port_t *port = usbif_find_port(dev->devpath); - port->dev_present = 0; - port->dev = NULL; - port->ifaces = 0; -} - - -struct usb_driver driver = -{ - .owner = THIS_MODULE, - .name = "Xen USB Backend", - .probe = probe, - .disconnect = disconnect, - .id_table = NULL, -}; - -/* __usbif_release_port - internal mechanics for releasing a port */ -void __usbif_release_port(owned_port_t *p) -{ - int i; - - for ( i = 0; p->ifaces != 0; i++) - if ( p->ifaces & 1 << i ) - { - usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i)); - clear_bit(i, &p->ifaces); - } - list_del(&p->list); - - /* Reset the real device. We don't simulate disconnect / probe for other - * drivers in this kernel because we assume the device is completely under - * the control of ourselves (i.e. the guest!). This should ensure that the - * device is in a sane state for the next customer ;-) */ - - /* MAW NB: we're not resetting the real device here. This looks perfectly - * valid to me but it causes memory corruption. We seem to get away with not - * resetting for now, although it'd be nice to have this tracked down. */ -/* if ( p->dev != NULL) */ -/* usb_reset_device(p->dev); */ - - kfree(p); -} - - -/** - * usbif_release_port - stop claiming devices on a port on behalf of guest - */ -void usbif_release_port(usbif_be_release_port_t *msg) -{ - owned_port_t *p; - - spin_lock_irq(&owned_ports_lock); - p = __usbif_find_port(msg->path); - __usbif_release_port(p); - spin_unlock_irq(&owned_ports_lock); -} - -void usbif_release_ports(usbif_priv_t *up) -{ - struct list_head *port, *tmp; - unsigned long flags; - - spin_lock_irqsave(&owned_ports_lock, flags); - list_for_each_safe(port, tmp, &owned_ports) - { - owned_port_t *p = list_entry(port, owned_port_t, list); - if ( p->usbif_priv == up ) - __usbif_release_port(p); - } - spin_unlock_irqrestore(&owned_ports_lock, flags); -} - -static int __init usbif_init(void) -{ - int i; - - if ( !(xen_start_info.flags & SIF_INITDOMAIN) && - !(xen_start_info.flags & SIF_USB_BE_DOMAIN) ) - return 0; - - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) - BUG(); - - pending_cons = 0; - pending_prod = MAX_PENDING_REQS; - memset(pending_reqs, 0, sizeof(pending_reqs)); - for ( i = 0; i < MAX_PENDING_REQS; i++ ) - pending_ring[i] = i; - - spin_lock_init(&pend_prod_lock); - - spin_lock_init(&owned_ports_lock); - INIT_LIST_HEAD(&owned_ports); - - spin_lock_init(&usbio_schedule_list_lock); - INIT_LIST_HEAD(&usbio_schedule_list); - - if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) - BUG(); - - usbif_interface_init(); - - usbif_ctrlif_init(); - - usb_register(&driver); - - printk(KERN_INFO "Xen USB Backend Initialised"); - - return 0; -} - -__initcall(usbif_init); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c b/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c deleted file mode 100644 index 46cca3058a..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c +++ /dev/null @@ -1,1664 +0,0 @@ -/* - * Xen Virtual USB Frontend Driver - * - * This file contains the first version of the Xen virtual USB hub - * that I've managed not to delete by mistake (3rd time lucky!). - * - * Based on Linux's uhci.c, original copyright notices are displayed - * below. Portions also (c) 2004 Intel Research Cambridge - * and (c) 2004, 2005 Mark Williamson - * - * Contact or - * regarding this code. - * - * Still to be (maybe) implemented: - * - migration / backend restart support? - * - support for building / using as a module - */ - -/* - * Universal Host Controller Interface driver for USB. - * - * Maintainer: Johannes Erdfelt - * - * (C) Copyright 1999 Linus Torvalds - * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com - * (C) Copyright 1999 Randy Dunlap - * (C) Copyright 1999 Georg Acher, acher@in.tum.de - * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de - * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch - * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at - * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface - * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com). - * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c) - * - * Intel documents this fairly well, and as far as I know there - * are no royalties or anything like that, but even so there are - * people who decided that they want to do the same thing in a - * completely different way. - * - * WARNING! The USB documentation is downright evil. Most of it - * is just crap, written by a committee. You're better off ignoring - * most of it, the important stuff is: - * - the low-level protocol (fairly simple but lots of small details) - * - working around the horridness of the rest - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_USB_DEBUG -#define DEBUG -#else -#undef DEBUG -#endif -#include - -#include -#include - -#include "xhci.h" - -#include "../../../../../drivers/usb/hcd.h" - -#include -#include -#include - -/* - * Version Information - */ -#define DRIVER_VERSION "v1.0" -#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \ - "Randy Dunlap, Georg Acher, Deti Fliegl, " \ - "Thomas Sailer, Roman Weissgaerber, Mark Williamson" -#define DRIVER_DESC "Xen Virtual USB Host Controller Interface" - -/* - * debug = 0, no debugging messages - * debug = 1, dump failed URB's except for stalls - * debug = 2, dump all failed URB's (including stalls) - */ -#ifdef DEBUG -static int debug = 1; -#else -static int debug = 0; -#endif -MODULE_PARM(debug, "i"); -MODULE_PARM_DESC(debug, "Debug level"); -static char *errbuf; -#define ERRBUF_LEN (PAGE_SIZE * 8) - -static int rh_submit_urb(struct urb *urb); -static int rh_unlink_urb(struct urb *urb); -static int xhci_unlink_urb(struct urb *urb); -static void xhci_call_completion(struct urb *urb); -static void xhci_drain_ring(void); -static void xhci_transfer_result(struct xhci *xhci, struct urb *urb); -static void xhci_finish_completion(void); - -#define MAX_URB_LOOP 2048 /* Maximum number of linked URB's */ - -static kmem_cache_t *xhci_up_cachep; /* urb_priv cache */ -static struct xhci *xhci; /* XHCI structure for the interface */ - -/****************************************************************************** - * DEBUGGING - */ - -#ifdef DEBUG - -static void dump_urb(struct urb *urb) -{ - printk(KERN_DEBUG "dumping urb @ %p\n" - " hcpriv = %p\n" - " next = %p\n" - " dev = %p\n" - " pipe = 0x%lx\n" - " status = %d\n" - " transfer_flags = 0x%lx\n" - " transfer_buffer = %p\n" - " transfer_buffer_length = %d\n" - " actual_length = %d\n" - " bandwidth = %d\n" - " setup_packet = %p\n", - urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status, - urb->transfer_flags, urb->transfer_buffer, - urb->transfer_buffer_length, urb->actual_length, urb->bandwidth, - urb->setup_packet); - if ( urb->setup_packet != NULL ) - printk(KERN_DEBUG - "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n", - urb->setup_packet[0], urb->setup_packet[1], - urb->setup_packet[2], urb->setup_packet[3], - urb->setup_packet[4], urb->setup_packet[5], - urb->setup_packet[6], urb->setup_packet[7]); - printk(KERN_DEBUG "complete = %p\n" - "interval = %d\n", urb->complete, urb->interval); - -} - -static void xhci_show_resp(usbif_response_t *r) -{ - printk(KERN_DEBUG "dumping response @ %p\n" - " id=0x%lx\n" - " op=0x%x\n" - " data=0x%x\n" - " status=0x%x\n" - " length=0x%lx\n", - r->id, r->operation, r->data, r->status, r->length); -} - -#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__) - -#else /* DEBUG */ - -#define dump_urb(blah) ((void)0) -#define xhci_show_resp(blah) ((void)0) -#define DPRINTK(blah,...) ((void)0) - -#endif /* DEBUG */ - -/****************************************************************************** - * RING REQUEST HANDLING - */ - -/** - * xhci_construct_isoc - add isochronous information to a request - */ -static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb) -{ - usbif_iso_t *schedule; - int i; - struct urb_priv *urb_priv = urb->hcpriv; - - req->num_iso = urb->number_of_packets; - schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL); - - if ( schedule == NULL ) - return -ENOMEM; - - for ( i = 0; i < req->num_iso; i++ ) - { - schedule[i].buffer_offset = urb->iso_frame_desc[i].offset; - schedule[i].length = urb->iso_frame_desc[i].length; - } - - urb_priv->schedule = schedule; - req->iso_schedule = virt_to_machine(schedule); - - return 0; -} - -/** - * xhci_queue_req - construct and queue request for an URB - */ -static int xhci_queue_req(struct urb *urb) -{ - usbif_request_t *req; - usbif_front_ring_t *usb_ring = &xhci->usb_ring; - -#if DEBUG - printk(KERN_DEBUG - "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n", - usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod), - usbif->resp_prod, xhci->usb_resp_cons); -#endif - - - if ( RING_FULL(usb_ring) ) - { - printk(KERN_WARNING - "xhci_queue_req(): USB ring full, not queuing request\n"); - return -ENOBUFS; - } - - /* Stick something in the shared communications ring. */ - req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); - - req->operation = USBIF_OP_IO; - req->port = 0; /* We don't care what the port is. */ - req->id = (unsigned long) urb->hcpriv; - req->transfer_buffer = virt_to_machine(urb->transfer_buffer); - req->devnum = usb_pipedevice(urb->pipe); - req->direction = usb_pipein(urb->pipe); - req->speed = usb_pipeslow(urb->pipe); - req->pipe_type = usb_pipetype(urb->pipe); - req->length = urb->transfer_buffer_length; - req->transfer_flags = urb->transfer_flags; - req->endpoint = usb_pipeendpoint(urb->pipe); - req->speed = usb_pipeslow(urb->pipe); - req->timeout = urb->timeout * (1000 / HZ); - - if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */ - { - int ret = xhci_construct_isoc(req, urb); - if ( ret != 0 ) - return ret; - } - - if(urb->setup_packet != NULL) - memcpy(req->setup, urb->setup_packet, 8); - else - memset(req->setup, 0, 8); - - usb_ring->req_prod_pvt++; - RING_PUSH_REQUESTS(usb_ring); - - notify_via_evtchn(xhci->evtchn); - - DPRINTK("Queued request for an URB.\n"); - dump_urb(urb); - - return -EINPROGRESS; -} - -/** - * xhci_queue_probe - queue a probe request for a particular port - */ -static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port) -{ - usbif_request_t *req; - usbif_front_ring_t *usb_ring = &xhci->usb_ring; - -#if DEBUG - printk(KERN_DEBUG - "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, " - "resp_cons = %d\n", usbif->req_prod, - virt_to_machine(&usbif->req_prod), - usbif->resp_prod, xhci->usb_resp_cons); -#endif - - if ( RING_FULL(usb_ring) ) - { - printk(KERN_WARNING - "xhci_queue_probe(): ring full, not queuing request\n"); - return NULL; - } - - /* Stick something in the shared communications ring. */ - req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); - - memset(req, sizeof(*req), 0); - - req->operation = USBIF_OP_PROBE; - req->port = port; - - usb_ring->req_prod_pvt++; - RING_PUSH_REQUESTS(usb_ring); - - notify_via_evtchn(xhci->evtchn); - - return req; -} - -/** - * xhci_port_reset - queue a reset request for a particular port - */ -static int xhci_port_reset(usbif_vdev_t port) -{ - usbif_request_t *req; - usbif_front_ring_t *usb_ring = &xhci->usb_ring; - - /* We only reset one port at a time, so we only need one variable per - * hub. */ - xhci->awaiting_reset = 1; - - /* Stick something in the shared communications ring. */ - req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); - - memset(req, sizeof(*req), 0); - - req->operation = USBIF_OP_RESET; - req->port = port; - - usb_ring->req_prod_pvt++; - RING_PUSH_REQUESTS(usb_ring); - - notify_via_evtchn(xhci->evtchn); - - while ( xhci->awaiting_reset > 0 ) - { - mdelay(1); - xhci_drain_ring(); - } - - xhci->rh.ports[port].pe = 1; - xhci->rh.ports[port].pe_chg = 1; - - return xhci->awaiting_reset; -} - - -/****************************************************************************** - * RING RESPONSE HANDLING - */ - -static void receive_usb_reset(usbif_response_t *resp) -{ - xhci->awaiting_reset = resp->status; - rmb(); - -} - -static void receive_usb_probe(usbif_response_t *resp) -{ - spin_lock(&xhci->rh.port_state_lock); - - if ( resp->status > 0 ) - { - if ( resp->status == 1 ) - { - /* If theres a device there and there wasn't one before there must - * have been a connection status change. */ - if( xhci->rh.ports[resp->data].cs == 0 ) - { - xhci->rh.ports[resp->data].cs = 1; - xhci->rh.ports[resp->data].ccs = 1; - xhci->rh.ports[resp->data].cs_chg = 1; - } - } - else - printk(KERN_WARNING "receive_usb_probe(): unexpected status %d " - "for port %d\n", resp->status, resp->data); - } - else if ( resp->status < 0) - printk(KERN_WARNING "receive_usb_probe(): got error status %d\n", - resp->status); - - spin_unlock(&xhci->rh.port_state_lock); -} - -static void receive_usb_io(usbif_response_t *resp) -{ - struct urb_priv *urbp = (struct urb_priv *)resp->id; - struct urb *urb = urbp->urb; - - urb->actual_length = resp->length; - urbp->in_progress = 0; - - if( usb_pipetype(urb->pipe) == 0 ) /* ISO */ - { - int i; - - /* Copy ISO schedule results back in. */ - for ( i = 0; i < urb->number_of_packets; i++ ) - { - urb->iso_frame_desc[i].status - = urbp->schedule[i].status; - urb->iso_frame_desc[i].actual_length - = urbp->schedule[i].length; - } - free_page((unsigned long)urbp->schedule); - } - - /* Only set status if it's not been changed since submission. It might - * have been changed if the URB has been unlinked asynchronously, for - * instance. */ - if ( urb->status == -EINPROGRESS ) - urbp->status = urb->status = resp->status; -} - -/** - * xhci_drain_ring - drain responses from the ring, calling handlers - * - * This may be called from interrupt context when an event is received from the - * backend domain, or sometimes in process context whilst waiting for a port - * reset or URB completion. - */ -static void xhci_drain_ring(void) -{ - struct list_head *tmp, *head; - usbif_front_ring_t *usb_ring = &xhci->usb_ring; - usbif_response_t *resp; - RING_IDX i, rp; - - /* Walk the ring here to get responses, updating URBs to show what - * completed. */ - - rp = usb_ring->sring->rsp_prod; - rmb(); /* Ensure we see queued requests up to 'rp'. */ - - /* Take items off the comms ring, taking care not to overflow. */ - for ( i = usb_ring->rsp_cons; i != rp; i++ ) - { - resp = RING_GET_RESPONSE(usb_ring, i); - - /* May need to deal with batching and with putting a ceiling on - the number dispatched for performance and anti-dos reasons */ - - xhci_show_resp(resp); - - switch ( resp->operation ) - { - case USBIF_OP_PROBE: - receive_usb_probe(resp); - break; - - case USBIF_OP_IO: - receive_usb_io(resp); - break; - - case USBIF_OP_RESET: - receive_usb_reset(resp); - break; - - default: - printk(KERN_WARNING - "error: unknown USB io operation response [%d]\n", - resp->operation); - break; - } - } - - usb_ring->rsp_cons = i; - - /* Walk the list of pending URB's to see which ones completed and do - * callbacks, etc. */ - spin_lock(&xhci->urb_list_lock); - head = &xhci->urb_list; - tmp = head->next; - while (tmp != head) { - struct urb *urb = list_entry(tmp, struct urb, urb_list); - - tmp = tmp->next; - - /* Checks the status and does all of the magic necessary */ - xhci_transfer_result(xhci, urb); - } - spin_unlock(&xhci->urb_list_lock); - - xhci_finish_completion(); -} - - -static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs) -{ - xhci_drain_ring(); -} - -/****************************************************************************** - * HOST CONTROLLER FUNCTIONALITY - */ - -/** - * no-op implementation of private device alloc / free routines - */ -static int xhci_do_nothing_dev(struct usb_device *dev) -{ - return 0; -} - -static inline void xhci_add_complete(struct urb *urb) -{ - struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv; - unsigned long flags; - - spin_lock_irqsave(&xhci->complete_list_lock, flags); - list_add_tail(&urbp->complete_list, &xhci->complete_list); - spin_unlock_irqrestore(&xhci->complete_list_lock, flags); -} - -/* When this returns, the owner of the URB may free its - * storage. - * - * We spin and wait for the URB to complete before returning. - * - * Call with urb->lock acquired. - */ -static void xhci_delete_urb(struct urb *urb) -{ - struct urb_priv *urbp; - - urbp = urb->hcpriv; - - /* If there's no urb_priv structure for this URB then it can't have - * been submitted at all. */ - if ( urbp == NULL ) - return; - - /* For now we just spin until the URB completes. It shouldn't take too - * long and we don't expect to have to do this very often. */ - while ( urb->status == -EINPROGRESS ) - { - xhci_drain_ring(); - mdelay(1); - } - - /* Now we know that further transfers to the buffer won't - * occur, so we can safely return. */ -} - -static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb) -{ - struct urb_priv *urbp; - - urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC); - if (!urbp) { - err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n"); - return NULL; - } - - memset((void *)urbp, 0, sizeof(*urbp)); - - urbp->inserttime = jiffies; - urbp->urb = urb; - urbp->dev = urb->dev; - - INIT_LIST_HEAD(&urbp->complete_list); - - urb->hcpriv = urbp; - - return urbp; -} - -/* - * MUST be called with urb->lock acquired - */ -/* When is this called? Do we need to stop the transfer (as we - * currently do)? */ -static void xhci_destroy_urb_priv(struct urb *urb) -{ - struct urb_priv *urbp; - - urbp = (struct urb_priv *)urb->hcpriv; - if (!urbp) - return; - - if (!list_empty(&urb->urb_list)) - warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb); - - if (!list_empty(&urbp->complete_list)) - warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb); - - kmem_cache_free(xhci_up_cachep, urb->hcpriv); - - urb->hcpriv = NULL; -} - -/** - * Try to find URBs in progress on the same pipe to the same device. - * - * MUST be called with xhci->urb_list_lock acquired - */ -static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb) -{ - struct list_head *tmp, *head; - - /* We don't match Isoc transfers since they are special */ - if (usb_pipeisoc(urb->pipe)) - return NULL; - - head = &xhci->urb_list; - tmp = head->next; - while (tmp != head) { - struct urb *u = list_entry(tmp, struct urb, urb_list); - - tmp = tmp->next; - - if (u->dev == urb->dev && u->pipe == urb->pipe && - u->status == -EINPROGRESS) - return u; - } - - return NULL; -} - -static int xhci_submit_urb(struct urb *urb) -{ - int ret = -EINVAL; - unsigned long flags; - struct urb *eurb; - int bustime; - - DPRINTK("URB submitted to XHCI driver.\n"); - dump_urb(urb); - - if (!urb) - return -EINVAL; - - if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) { - warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb); - return -ENODEV; - } - - if ( urb->dev->devpath == NULL ) - BUG(); - - usb_inc_dev_use(urb->dev); - - spin_lock_irqsave(&xhci->urb_list_lock, flags); - spin_lock(&urb->lock); - - if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET || - urb->status == -ECONNABORTED) { - dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status); - /* Since we can have problems on the out path */ - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - usb_dec_dev_use(urb->dev); - - return ret; - } - - INIT_LIST_HEAD(&urb->urb_list); - if (!xhci_alloc_urb_priv(urb)) { - ret = -ENOMEM; - - goto out; - } - - ( (struct urb_priv *)urb->hcpriv )->in_progress = 1; - - eurb = xhci_find_urb_ep(xhci, urb); - if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) { - ret = -ENXIO; - - goto out; - } - - /* Short circuit the virtual root hub */ - if (urb->dev == xhci->rh.dev) { - ret = rh_submit_urb(urb); - - goto out; - } - - switch (usb_pipetype(urb->pipe)) { - case PIPE_CONTROL: - case PIPE_BULK: - ret = xhci_queue_req(urb); - break; - - case PIPE_INTERRUPT: - if (urb->bandwidth == 0) { /* not yet checked/allocated */ - bustime = usb_check_bandwidth(urb->dev, urb); - if (bustime < 0) - ret = bustime; - else { - ret = xhci_queue_req(urb); - if (ret == -EINPROGRESS) - usb_claim_bandwidth(urb->dev, urb, - bustime, 0); - } - } else /* bandwidth is already set */ - ret = xhci_queue_req(urb); - break; - - case PIPE_ISOCHRONOUS: - if (urb->bandwidth == 0) { /* not yet checked/allocated */ - if (urb->number_of_packets <= 0) { - ret = -EINVAL; - break; - } - bustime = usb_check_bandwidth(urb->dev, urb); - if (bustime < 0) { - ret = bustime; - break; - } - - ret = xhci_queue_req(urb); - if (ret == -EINPROGRESS) - usb_claim_bandwidth(urb->dev, urb, bustime, 1); - } else /* bandwidth is already set */ - ret = xhci_queue_req(urb); - break; - } -out: - urb->status = ret; - - if (ret == -EINPROGRESS) { - /* We use _tail to make find_urb_ep more efficient */ - list_add_tail(&urb->urb_list, &xhci->urb_list); - - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - - return 0; - } - - xhci_delete_urb(urb); - - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - - /* Only call completion if it was successful */ - if (!ret) - xhci_call_completion(urb); - - return ret; -} - -/* - * Return the result of a transfer - * - * MUST be called with urb_list_lock acquired - */ -static void xhci_transfer_result(struct xhci *xhci, struct urb *urb) -{ - int ret = 0; - unsigned long flags; - struct urb_priv *urbp; - - /* The root hub is special */ - if (urb->dev == xhci->rh.dev) - return; - - spin_lock_irqsave(&urb->lock, flags); - - urbp = (struct urb_priv *)urb->hcpriv; - - if ( ( (struct urb_priv *)urb->hcpriv )->in_progress ) - ret = -EINPROGRESS; - - if (urb->actual_length < urb->transfer_buffer_length) { - if (urb->transfer_flags & USB_DISABLE_SPD) { - ret = -EREMOTEIO; - } - } - - if (urb->status == -EPIPE) - { - ret = urb->status; - /* endpoint has stalled - mark it halted */ - usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe), - usb_pipeout(urb->pipe)); - } - - if ((debug == 1 && ret != 0 && ret != -EPIPE) || - (ret != 0 && debug > 1)) { - /* Some debugging code */ - dbg("xhci_result_interrupt/bulk() failed with status %x", - status); - } - - if (ret == -EINPROGRESS) - goto out; - - switch (usb_pipetype(urb->pipe)) { - case PIPE_CONTROL: - case PIPE_BULK: - case PIPE_ISOCHRONOUS: - /* Release bandwidth for Interrupt or Isoc. transfers */ - /* Spinlock needed ? */ - if (urb->bandwidth) - usb_release_bandwidth(urb->dev, urb, 1); - xhci_delete_urb(urb); - break; - case PIPE_INTERRUPT: - /* Interrupts are an exception */ - if (urb->interval) - goto out_complete; - - /* Release bandwidth for Interrupt or Isoc. transfers */ - /* Spinlock needed ? */ - if (urb->bandwidth) - usb_release_bandwidth(urb->dev, urb, 0); - xhci_delete_urb(urb); - break; - default: - info("xhci_transfer_result: unknown pipe type %d for urb %p\n", - usb_pipetype(urb->pipe), urb); - } - - /* Remove it from xhci->urb_list */ - list_del_init(&urb->urb_list); - -out_complete: - xhci_add_complete(urb); - -out: - spin_unlock_irqrestore(&urb->lock, flags); -} - -static int xhci_unlink_urb(struct urb *urb) -{ - unsigned long flags; - struct urb_priv *urbp = urb->hcpriv; - - if (!urb) - return -EINVAL; - - if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) - return -ENODEV; - - spin_lock_irqsave(&xhci->urb_list_lock, flags); - spin_lock(&urb->lock); - - /* Release bandwidth for Interrupt or Isoc. transfers */ - /* Spinlock needed ? */ - if (urb->bandwidth) { - switch (usb_pipetype(urb->pipe)) { - case PIPE_INTERRUPT: - usb_release_bandwidth(urb->dev, urb, 0); - break; - case PIPE_ISOCHRONOUS: - usb_release_bandwidth(urb->dev, urb, 1); - break; - default: - break; - } - } - - if (urb->status != -EINPROGRESS) { - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - return 0; - } - - list_del_init(&urb->urb_list); - - /* Short circuit the virtual root hub */ - if (urb->dev == xhci->rh.dev) { - rh_unlink_urb(urb); - - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - - xhci_call_completion(urb); - } else { - if (urb->transfer_flags & USB_ASYNC_UNLINK) { - /* We currently don't currently attempt to cancel URBs - * that have been queued in the ring. We handle async - * unlinked URBs when they complete. */ - urbp->status = urb->status = -ECONNABORTED; - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - } else { - urb->status = -ENOENT; - - spin_unlock(&urb->lock); - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - - if (in_interrupt()) { /* wait at least 1 frame */ - static int errorcount = 10; - - if (errorcount--) - dbg("xhci_unlink_urb called from interrupt for urb %p", urb); - udelay(1000); - } else - schedule_timeout(1+1*HZ/1000); - - xhci_delete_urb(urb); - - xhci_call_completion(urb); - } - } - - return 0; -} - -static void xhci_call_completion(struct urb *urb) -{ - struct urb_priv *urbp; - struct usb_device *dev = urb->dev; - int is_ring = 0, killed, resubmit_interrupt, status; - struct urb *nurb; - unsigned long flags; - - spin_lock_irqsave(&urb->lock, flags); - - urbp = (struct urb_priv *)urb->hcpriv; - if (!urbp || !urb->dev) { - spin_unlock_irqrestore(&urb->lock, flags); - return; - } - - killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED || - urb->status == -ECONNRESET); - resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT && - urb->interval); - - nurb = urb->next; - if (nurb && !killed) { - int count = 0; - - while (nurb && nurb != urb && count < MAX_URB_LOOP) { - if (nurb->status == -ENOENT || - nurb->status == -ECONNABORTED || - nurb->status == -ECONNRESET) { - killed = 1; - break; - } - - nurb = nurb->next; - count++; - } - - if (count == MAX_URB_LOOP) - err("xhci_call_completion: too many linked URB's, loop? (first loop)"); - - /* Check to see if chain is a ring */ - is_ring = (nurb == urb); - } - - status = urbp->status; - if (!resubmit_interrupt || killed) - /* We don't need urb_priv anymore */ - xhci_destroy_urb_priv(urb); - - if (!killed) - urb->status = status; - - spin_unlock_irqrestore(&urb->lock, flags); - - if (urb->complete) - urb->complete(urb); - - if (resubmit_interrupt) - /* Recheck the status. The completion handler may have */ - /* unlinked the resubmitting interrupt URB */ - killed = (urb->status == -ENOENT || - urb->status == -ECONNABORTED || - urb->status == -ECONNRESET); - - if (resubmit_interrupt && !killed) { - if ( urb->dev != xhci->rh.dev ) - xhci_queue_req(urb); /* XXX What if this fails? */ - /* Don't need to resubmit URBs for the virtual root dev. */ - } else { - if (is_ring && !killed) { - urb->dev = dev; - xhci_submit_urb(urb); - } else { - /* We decrement the usage count after we're done */ - /* with everything */ - usb_dec_dev_use(dev); - } - } -} - -static void xhci_finish_completion(void) -{ - struct list_head *tmp, *head; - unsigned long flags; - - spin_lock_irqsave(&xhci->complete_list_lock, flags); - head = &xhci->complete_list; - tmp = head->next; - while (tmp != head) { - struct urb_priv *urbp = list_entry(tmp, struct urb_priv, - complete_list); - struct urb *urb = urbp->urb; - - list_del_init(&urbp->complete_list); - spin_unlock_irqrestore(&xhci->complete_list_lock, flags); - - xhci_call_completion(urb); - - spin_lock_irqsave(&xhci->complete_list_lock, flags); - head = &xhci->complete_list; - tmp = head->next; - } - spin_unlock_irqrestore(&xhci->complete_list_lock, flags); -} - -static struct usb_operations xhci_device_operations = { - .allocate = xhci_do_nothing_dev, - .deallocate = xhci_do_nothing_dev, - /* It doesn't look like any drivers actually care what the frame number - * is at the moment! If necessary, we could approximate the current - * frame nubmer by passing it from the backend in response messages. */ - .get_frame_number = NULL, - .submit_urb = xhci_submit_urb, - .unlink_urb = xhci_unlink_urb -}; - -/****************************************************************************** - * VIRTUAL ROOT HUB EMULATION - */ - -static __u8 root_hub_dev_des[] = -{ - 0x12, /* __u8 bLength; */ - 0x01, /* __u8 bDescriptorType; Device */ - 0x00, /* __u16 bcdUSB; v1.0 */ - 0x01, - 0x09, /* __u8 bDeviceClass; HUB_CLASSCODE */ - 0x00, /* __u8 bDeviceSubClass; */ - 0x00, /* __u8 bDeviceProtocol; */ - 0x08, /* __u8 bMaxPacketSize0; 8 Bytes */ - 0x00, /* __u16 idVendor; */ - 0x00, - 0x00, /* __u16 idProduct; */ - 0x00, - 0x00, /* __u16 bcdDevice; */ - 0x00, - 0x00, /* __u8 iManufacturer; */ - 0x02, /* __u8 iProduct; */ - 0x01, /* __u8 iSerialNumber; */ - 0x01 /* __u8 bNumConfigurations; */ -}; - - -/* Configuration descriptor */ -static __u8 root_hub_config_des[] = -{ - 0x09, /* __u8 bLength; */ - 0x02, /* __u8 bDescriptorType; Configuration */ - 0x19, /* __u16 wTotalLength; */ - 0x00, - 0x01, /* __u8 bNumInterfaces; */ - 0x01, /* __u8 bConfigurationValue; */ - 0x00, /* __u8 iConfiguration; */ - 0x40, /* __u8 bmAttributes; - Bit 7: Bus-powered, 6: Self-powered, - Bit 5 Remote-wakeup, 4..0: resvd */ - 0x00, /* __u8 MaxPower; */ - - /* interface */ - 0x09, /* __u8 if_bLength; */ - 0x04, /* __u8 if_bDescriptorType; Interface */ - 0x00, /* __u8 if_bInterfaceNumber; */ - 0x00, /* __u8 if_bAlternateSetting; */ - 0x01, /* __u8 if_bNumEndpoints; */ - 0x09, /* __u8 if_bInterfaceClass; HUB_CLASSCODE */ - 0x00, /* __u8 if_bInterfaceSubClass; */ - 0x00, /* __u8 if_bInterfaceProtocol; */ - 0x00, /* __u8 if_iInterface; */ - - /* endpoint */ - 0x07, /* __u8 ep_bLength; */ - 0x05, /* __u8 ep_bDescriptorType; Endpoint */ - 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */ - 0x03, /* __u8 ep_bmAttributes; Interrupt */ - 0x08, /* __u16 ep_wMaxPacketSize; 8 Bytes */ - 0x00, - 0xff /* __u8 ep_bInterval; 255 ms */ -}; - -static __u8 root_hub_hub_des[] = -{ - 0x09, /* __u8 bLength; */ - 0x29, /* __u8 bDescriptorType; Hub-descriptor */ - 0x02, /* __u8 bNbrPorts; */ - 0x00, /* __u16 wHubCharacteristics; */ - 0x00, - 0x01, /* __u8 bPwrOn2pwrGood; 2ms */ - 0x00, /* __u8 bHubContrCurrent; 0 mA */ - 0x00, /* __u8 DeviceRemovable; *** 7 Ports max *** */ - 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */ -}; - -/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */ -static int rh_send_irq(struct urb *urb) -{ - struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv; - xhci_port_t *ports = xhci->rh.ports; - unsigned long flags; - int i, len = 1; - __u16 data = 0; - - spin_lock_irqsave(&urb->lock, flags); - for (i = 0; i < xhci->rh.numports; i++) { - /* Set a bit if anything at all has changed on the port, as per - * USB spec 11.12 */ - data |= (ports[i].cs_chg || ports[i].pe_chg ) - ? (1 << (i + 1)) - : 0; - - len = (i + 1) / 8 + 1; - } - - *(__u16 *) urb->transfer_buffer = cpu_to_le16(data); - urb->actual_length = len; - urbp->status = 0; - - spin_unlock_irqrestore(&urb->lock, flags); - - if ((data > 0) && (xhci->rh.send != 0)) { - dbg("root-hub INT complete: data: %x", data); - xhci_call_completion(urb); - } - - return 0; -} - -/* Virtual Root Hub INTs are polled by this timer every "interval" ms */ -static int rh_init_int_timer(struct urb *urb); - -static void rh_int_timer_do(unsigned long ptr) -{ - struct urb *urb = (struct urb *)ptr; - struct list_head list, *tmp, *head; - unsigned long flags; - int i; - - for ( i = 0; i < xhci->rh.numports; i++) - xhci_queue_probe(i); - - if (xhci->rh.send) - rh_send_irq(urb); - - INIT_LIST_HEAD(&list); - - spin_lock_irqsave(&xhci->urb_list_lock, flags); - head = &xhci->urb_list; - tmp = head->next; - while (tmp != head) { - struct urb *u = list_entry(tmp, struct urb, urb_list); - struct urb_priv *up = (struct urb_priv *)u->hcpriv; - - tmp = tmp->next; - - spin_lock(&u->lock); - - /* Check if the URB timed out */ - if (u->timeout && time_after_eq(jiffies, - up->inserttime + u->timeout)) { - list_del(&u->urb_list); - list_add_tail(&u->urb_list, &list); - } - - spin_unlock(&u->lock); - } - spin_unlock_irqrestore(&xhci->urb_list_lock, flags); - - head = &list; - tmp = head->next; - while (tmp != head) { - struct urb *u = list_entry(tmp, struct urb, urb_list); - - tmp = tmp->next; - - u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED; - xhci_unlink_urb(u); - } - - rh_init_int_timer(urb); -} - -/* Root Hub INTs are polled by this timer */ -static int rh_init_int_timer(struct urb *urb) -{ - xhci->rh.interval = urb->interval; - init_timer(&xhci->rh.rh_int_timer); - xhci->rh.rh_int_timer.function = rh_int_timer_do; - xhci->rh.rh_int_timer.data = (unsigned long)urb; - xhci->rh.rh_int_timer.expires = jiffies - + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000; - add_timer(&xhci->rh.rh_int_timer); - - return 0; -} - -#define OK(x) len = (x); break - -/* Root Hub Control Pipe */ -static int rh_submit_urb(struct urb *urb) -{ - unsigned int pipe = urb->pipe; - struct usb_ctrlrequest *cmd = - (struct usb_ctrlrequest *)urb->setup_packet; - void *data = urb->transfer_buffer; - int leni = urb->transfer_buffer_length; - int len = 0; - xhci_port_t *status; - int stat = 0; - int i; - int retstatus; - unsigned long flags; - - __u16 cstatus; - __u16 bmRType_bReq; - __u16 wValue; - __u16 wIndex; - __u16 wLength; - - if (usb_pipetype(pipe) == PIPE_INTERRUPT) { - xhci->rh.urb = urb; - xhci->rh.send = 1; - xhci->rh.interval = urb->interval; - rh_init_int_timer(urb); - - return -EINPROGRESS; - } - - bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8; - wValue = le16_to_cpu(cmd->wValue); - wIndex = le16_to_cpu(cmd->wIndex); - wLength = le16_to_cpu(cmd->wLength); - - for (i = 0; i < 8; i++) - xhci->rh.c_p_r[i] = 0; - - status = &xhci->rh.ports[wIndex - 1]; - - spin_lock_irqsave(&xhci->rh.port_state_lock, flags); - - switch (bmRType_bReq) { - /* Request Destination: - without flags: Device, - RH_INTERFACE: interface, - RH_ENDPOINT: endpoint, - RH_CLASS means HUB here, - RH_OTHER | RH_CLASS almost ever means HUB_PORT here - */ - - case RH_GET_STATUS: - *(__u16 *)data = cpu_to_le16(1); - OK(2); - case RH_GET_STATUS | RH_INTERFACE: - *(__u16 *)data = cpu_to_le16(0); - OK(2); - case RH_GET_STATUS | RH_ENDPOINT: - *(__u16 *)data = cpu_to_le16(0); - OK(2); - case RH_GET_STATUS | RH_CLASS: - *(__u32 *)data = cpu_to_le32(0); - OK(4); /* hub power */ - case RH_GET_STATUS | RH_OTHER | RH_CLASS: - cstatus = (status->cs_chg) | - (status->pe_chg << 1) | - (xhci->rh.c_p_r[wIndex - 1] << 4); - retstatus = (status->ccs) | - (status->pe << 1) | - (status->susp << 2) | - (status->pr << 8) | - (1 << 8) | /* power on */ - (status->lsda << 9); - *(__u16 *)data = cpu_to_le16(retstatus); - *(__u16 *)(data + 2) = cpu_to_le16(cstatus); - OK(4); - case RH_CLEAR_FEATURE | RH_ENDPOINT: - switch (wValue) { - case RH_ENDPOINT_STALL: - OK(0); - } - break; - case RH_CLEAR_FEATURE | RH_CLASS: - switch (wValue) { - case RH_C_HUB_OVER_CURRENT: - OK(0); /* hub power over current */ - } - break; - case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS: - switch (wValue) { - case RH_PORT_ENABLE: - status->pe = 0; - OK(0); - case RH_PORT_SUSPEND: - status->susp = 0; - OK(0); - case RH_PORT_POWER: - OK(0); /* port power */ - case RH_C_PORT_CONNECTION: - status->cs_chg = 0; - OK(0); - case RH_C_PORT_ENABLE: - status->pe_chg = 0; - OK(0); - case RH_C_PORT_SUSPEND: - /*** WR_RH_PORTSTAT(RH_PS_PSSC); */ - OK(0); - case RH_C_PORT_OVER_CURRENT: - OK(0); /* port power over current */ - case RH_C_PORT_RESET: - xhci->rh.c_p_r[wIndex - 1] = 0; - OK(0); - } - break; - case RH_SET_FEATURE | RH_OTHER | RH_CLASS: - switch (wValue) { - case RH_PORT_SUSPEND: - status->susp = 1; - OK(0); - case RH_PORT_RESET: - { - int ret; - xhci->rh.c_p_r[wIndex - 1] = 1; - status->pr = 0; - status->pe = 1; - ret = xhci_port_reset(wIndex - 1); - /* XXX MAW: should probably cancel queued transfers during reset... *\/ */ - if ( ret == 0 ) { OK(0); } - else { return ret; } - } - break; - case RH_PORT_POWER: - OK(0); /* port power ** */ - case RH_PORT_ENABLE: - status->pe = 1; - OK(0); - } - break; - case RH_SET_ADDRESS: - xhci->rh.devnum = wValue; - OK(0); - case RH_GET_DESCRIPTOR: - switch ((wValue & 0xff00) >> 8) { - case 0x01: /* device descriptor */ - len = min_t(unsigned int, leni, - min_t(unsigned int, - sizeof(root_hub_dev_des), wLength)); - memcpy(data, root_hub_dev_des, len); - OK(len); - case 0x02: /* configuration descriptor */ - len = min_t(unsigned int, leni, - min_t(unsigned int, - sizeof(root_hub_config_des), wLength)); - memcpy (data, root_hub_config_des, len); - OK(len); - case 0x03: /* string descriptors */ - len = usb_root_hub_string (wValue & 0xff, - 0, "XHCI-alt", - data, wLength); - if (len > 0) { - OK(min_t(int, leni, len)); - } else - stat = -EPIPE; - } - break; - case RH_GET_DESCRIPTOR | RH_CLASS: - root_hub_hub_des[2] = xhci->rh.numports; - len = min_t(unsigned int, leni, - min_t(unsigned int, sizeof(root_hub_hub_des), wLength)); - memcpy(data, root_hub_hub_des, len); - OK(len); - case RH_GET_CONFIGURATION: - *(__u8 *)data = 0x01; - OK(1); - case RH_SET_CONFIGURATION: - OK(0); - case RH_GET_INTERFACE | RH_INTERFACE: - *(__u8 *)data = 0x00; - OK(1); - case RH_SET_INTERFACE | RH_INTERFACE: - OK(0); - default: - stat = -EPIPE; - } - - spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags); - - urb->actual_length = len; - - return stat; -} - -/* - * MUST be called with urb->lock acquired - */ -static int rh_unlink_urb(struct urb *urb) -{ - if (xhci->rh.urb == urb) { - urb->status = -ENOENT; - xhci->rh.send = 0; - xhci->rh.urb = NULL; - del_timer(&xhci->rh.rh_int_timer); - } - return 0; -} - -/****************************************************************************** - * CONTROL PLANE FUNCTIONALITY - */ - -/** - * alloc_xhci - initialise a new virtual root hub for a new USB device channel - */ -static int alloc_xhci(void) -{ - int retval; - struct usb_bus *bus; - - retval = -EBUSY; - - xhci = kmalloc(sizeof(*xhci), GFP_KERNEL); - if (!xhci) { - err("couldn't allocate xhci structure"); - retval = -ENOMEM; - goto err_alloc_xhci; - } - - xhci->state = USBIF_STATE_CLOSED; - - spin_lock_init(&xhci->urb_list_lock); - INIT_LIST_HEAD(&xhci->urb_list); - - spin_lock_init(&xhci->complete_list_lock); - INIT_LIST_HEAD(&xhci->complete_list); - - spin_lock_init(&xhci->frame_list_lock); - - bus = usb_alloc_bus(&xhci_device_operations); - - if (!bus) { - err("unable to allocate bus"); - goto err_alloc_bus; - } - - xhci->bus = bus; - bus->bus_name = "XHCI"; - bus->hcpriv = xhci; - - usb_register_bus(xhci->bus); - - /* Initialize the root hub */ - - xhci->rh.numports = 0; - - xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus); - if (!xhci->rh.dev) { - err("unable to allocate root hub"); - goto err_alloc_root_hub; - } - - xhci->state = 0; - - return 0; - -/* - * error exits: - */ -err_alloc_root_hub: - usb_deregister_bus(xhci->bus); - usb_free_bus(xhci->bus); - xhci->bus = NULL; - -err_alloc_bus: - kfree(xhci); - -err_alloc_xhci: - return retval; -} - -/** - * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message - */ -static void usbif_status_change(usbif_fe_interface_status_changed_t *status) -{ - ctrl_msg_t cmsg; - usbif_fe_interface_connect_t up; - long rc; - usbif_sring_t *sring; - - switch ( status->status ) - { - case USBIF_INTERFACE_STATUS_DESTROYED: - printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n", - xhci->state); - break; - - case USBIF_INTERFACE_STATUS_DISCONNECTED: - if ( xhci->state != USBIF_STATE_CLOSED ) - { - printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message" - " in state %d\n", xhci->state); - break; - /* Not bothering to do recovery here for now. Keep things - * simple. */ - } - - /* Move from CLOSED to DISCONNECTED state. */ - sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&xhci->usb_ring, sring); - xhci->state = USBIF_STATE_DISCONNECTED; - - /* Construct an interface-CONNECT message for the domain controller. */ - cmsg.type = CMSG_USBIF_FE; - cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT; - cmsg.length = sizeof(usbif_fe_interface_connect_t); - up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT; - memcpy(cmsg.msg, &up, sizeof(up)); - - /* Tell the controller to bring up the interface. */ - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - break; - - case USBIF_INTERFACE_STATUS_CONNECTED: - if ( xhci->state == USBIF_STATE_CLOSED ) - { - printk(KERN_WARNING "Unexpected usbif-CONNECTED message" - " in state %d\n", xhci->state); - break; - } - - xhci->evtchn = status->evtchn; - xhci->irq = bind_evtchn_to_irq(xhci->evtchn); - xhci->bandwidth = status->bandwidth; - xhci->rh.numports = status->num_ports; - - xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL); - memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports); - - usb_connect(xhci->rh.dev); - - if (usb_new_device(xhci->rh.dev) != 0) { - err("unable to start root hub"); - } - - /* Allocate the appropriate USB bandwidth here... Need to - * somehow know what the total available is thought to be so we - * can calculate the reservation correctly. */ - usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb, - 1000 - xhci->bandwidth, 0); - - if ( (rc = request_irq(xhci->irq, xhci_interrupt, - SA_SAMPLE_RANDOM, "usbif", xhci)) ) - printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc); - - DPRINTK(KERN_INFO __FILE__ - ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n", - xhci->usb_ring.sring, virt_to_machine(xhci->usbif), - xhci->evtchn, xhci->irq); - - xhci->state = USBIF_STATE_CONNECTED; - - break; - - default: - printk(KERN_WARNING "Status change to unknown value %d\n", - status->status); - break; - } -} - -/** - * usbif_ctrlif_rx - demux control messages by subtype - */ -static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED: - if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) ) - goto parse_error; - usbif_status_change((usbif_fe_interface_status_changed_t *) - &msg->msg[0]); - break; - - /* New interface...? */ - default: - goto parse_error; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - - -static int __init xhci_hcd_init(void) -{ - int retval = -ENOMEM, i; - usbif_fe_interface_status_changed_t st; - control_msg_t cmsg; - - if ( (xen_start_info.flags & SIF_INITDOMAIN) - || (xen_start_info.flags & SIF_USB_BE_DOMAIN) ) - return 0; - - info(DRIVER_DESC " " DRIVER_VERSION); - - if (debug) { - errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL); - if (!errbuf) - goto errbuf_failed; - } - - xhci_up_cachep = kmem_cache_create("xhci_urb_priv", - sizeof(struct urb_priv), 0, 0, NULL, NULL); - if (!xhci_up_cachep) - goto up_failed; - - /* Let the domain controller know we're here. For now we wait until - * connection, as for the block and net drivers. This is only strictly - * necessary if we're going to boot off a USB device. */ - printk(KERN_INFO "Initialising Xen virtual USB hub\n"); - - (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - alloc_xhci(); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_USBIF_FE; - cmsg.subtype = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED; - cmsg.length = sizeof(usbif_fe_driver_status_changed_t); - st.status = USBIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - - /* - * We should read 'nr_interfaces' from response message and wait - * for notifications before proceeding. For now we assume that we - * will be notified of exactly one interface. - */ - for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if (xhci->state != USBIF_STATE_CONNECTED) - printk(KERN_WARNING "Timeout connecting USB frontend driver!\n"); - - return 0; - -up_failed: - if (errbuf) - kfree(errbuf); - -errbuf_failed: - return retval; -} - -module_init(xhci_hcd_init); - -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_LICENSE("GPL"); - diff --git a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h b/linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h deleted file mode 100644 index f503e59ebc..0000000000 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h +++ /dev/null @@ -1,180 +0,0 @@ -/****************************************************************************** - * xhci.h - * - * Private definitions for the Xen Virtual USB Controller. Based on - * drivers/usb/host/uhci.h from Linux. Copyright for the imported content is - * retained by the original authors. - * - * Modifications are: - * Copyright (C) 2004 Intel Research Cambridge - * Copyright (C) 2004, 2005 Mark Williamson - */ - -#ifndef __LINUX_XHCI_H -#define __LINUX_XHCI_H - -#include -#include -#include -#include - -/* xhci_port_t - current known state of a virtual hub ports */ -typedef struct { - unsigned int cs :1; /* Connection status. do we really need this /and/ ccs? */ - unsigned int cs_chg :1; /* Connection status change. */ - unsigned int pe :1; /* Port enable. */ - unsigned int pe_chg :1; /* Port enable change. */ - unsigned int ccs :1; /* Current connect status. */ - unsigned int susp :1; /* Suspended. */ - unsigned int lsda :1; /* Low speed device attached. */ - unsigned int pr :1; /* Port reset. */ -} xhci_port_t; - -/* struct virt_root_hub - state related to the virtual root hub */ -struct virt_root_hub { - struct usb_device *dev; - int devnum; /* Address of Root Hub endpoint */ - struct urb *urb; - void *int_addr; - int send; - int interval; - int numports; - int c_p_r[8]; - struct timer_list rh_int_timer; - spinlock_t port_state_lock; - xhci_port_t *ports; -}; - -/* struct xhci - contains the state associated with a single USB interface */ -struct xhci { - -#ifdef CONFIG_PROC_FS - /* procfs */ - int num; - struct proc_dir_entry *proc_entry; -#endif - - int evtchn; /* Interdom channel to backend */ - int irq; /* Bound to evtchn */ - enum { USBIF_STATE_CONNECTED = 2, - USBIF_STATE_DISCONNECTED = 1, - USBIF_STATE_CLOSED = 0 - } state; /* State of this USB interface */ - unsigned long bandwidth; - - struct usb_bus *bus; - - /* Main list of URB's currently controlled by this HC */ - spinlock_t urb_list_lock; - struct list_head urb_list; /* P: xhci->urb_list_lock */ - - /* List of URB's awaiting completion callback */ - spinlock_t complete_list_lock; - struct list_head complete_list; /* P: xhci->complete_list_lock */ - - struct virt_root_hub rh; /* private data of the virtual root hub */ - - usbif_front_ring_t usb_ring; - - int awaiting_reset; -}; - -/* per-URB private data structure for the host controller */ -struct urb_priv { - struct urb *urb; - usbif_iso_t *schedule; - struct usb_device *dev; - - int in_progress : 1; /* QH was queued (not linked in) */ - int short_control_packet : 1; /* If we get a short packet during */ - /* a control transfer, retrigger */ - /* the status phase */ - - int status; /* Final status */ - - unsigned long inserttime; /* In jiffies */ - - struct list_head complete_list; /* P: xhci->complete_list_lock */ -}; - -/* - * Locking in xhci.c - * - * spinlocks are used extensively to protect the many lists and data - * structures we have. It's not that pretty, but it's necessary. We - * need to be done with all of the locks (except complete_list_lock) when - * we call urb->complete. I've tried to make it simple enough so I don't - * have to spend hours racking my brain trying to figure out if the - * locking is safe. - * - * Here's the safe locking order to prevent deadlocks: - * - * #1 xhci->urb_list_lock - * #2 urb->lock - * #3 xhci->urb_remove_list_lock - * #4 xhci->complete_list_lock - * - * If you're going to grab 2 or more locks at once, ALWAYS grab the lock - * at the lowest level FIRST and NEVER grab locks at the same level at the - * same time. - * - * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock - */ - -/* ------------------------------------------------------------------------- - Virtual Root HUB - ------------------------------------------------------------------------- */ -/* destination of request */ -#define RH_DEVICE 0x00 -#define RH_INTERFACE 0x01 -#define RH_ENDPOINT 0x02 -#define RH_OTHER 0x03 - -#define RH_CLASS 0x20 -#define RH_VENDOR 0x40 - -/* Requests: bRequest << 8 | bmRequestType */ -#define RH_GET_STATUS 0x0080 -#define RH_CLEAR_FEATURE 0x0100 -#define RH_SET_FEATURE 0x0300 -#define RH_SET_ADDRESS 0x0500 -#define RH_GET_DESCRIPTOR 0x0680 -#define RH_SET_DESCRIPTOR 0x0700 -#define RH_GET_CONFIGURATION 0x0880 -#define RH_SET_CONFIGURATION 0x0900 -#define RH_GET_STATE 0x0280 -#define RH_GET_INTERFACE 0x0A80 -#define RH_SET_INTERFACE 0x0B00 -#define RH_SYNC_FRAME 0x0C80 -/* Our Vendor Specific Request */ -#define RH_SET_EP 0x2000 - -/* Hub port features */ -#define RH_PORT_CONNECTION 0x00 -#define RH_PORT_ENABLE 0x01 -#define RH_PORT_SUSPEND 0x02 -#define RH_PORT_OVER_CURRENT 0x03 -#define RH_PORT_RESET 0x04 -#define RH_PORT_POWER 0x08 -#define RH_PORT_LOW_SPEED 0x09 -#define RH_C_PORT_CONNECTION 0x10 -#define RH_C_PORT_ENABLE 0x11 -#define RH_C_PORT_SUSPEND 0x12 -#define RH_C_PORT_OVER_CURRENT 0x13 -#define RH_C_PORT_RESET 0x14 - -/* Hub features */ -#define RH_C_HUB_LOCAL_POWER 0x00 -#define RH_C_HUB_OVER_CURRENT 0x01 -#define RH_DEVICE_REMOTE_WAKEUP 0x00 -#define RH_ENDPOINT_STALL 0x01 - -/* Our Vendor Specific feature */ -#define RH_REMOVE_EP 0x00 - -#define RH_ACK 0x01 -#define RH_REQ_ERR -1 -#define RH_NACK 0x00 - -#endif - diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h deleted file mode 100644 index 421a81f17b..0000000000 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h +++ /dev/null @@ -1,59 +0,0 @@ -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws - * which needs to alter them. */ - -static inline void smpboot_clear_io_apic_irqs(void) -{ -#if 1 - printk("smpboot_clear_io_apic_irqs\n"); -#else - io_apic_irqs = 0; -#endif -} - -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) -{ -#if 1 - printk("smpboot_setup_warm_reset_vector\n"); -#else - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - Dprintk("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; - Dprintk("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; - Dprintk("3.\n"); -#endif -} - -static inline void smpboot_restore_warm_reset_vector(void) -{ - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - CMOS_WRITE(0, 0xf); - - *((volatile long *) phys_to_virt(0x467)) = 0; -} - -static inline void smpboot_setup_io_apic(void) -{ -#if 1 - printk("smpboot_setup_io_apic\n"); -#else - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif -} - - -#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1) diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h deleted file mode 100644 index fb8bd00753..0000000000 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h +++ /dev/null @@ -1,224 +0,0 @@ -#ifndef __ASM_SPINLOCK_H -#define __ASM_SPINLOCK_H - -#include -#include -#include -#include -#include - -asmlinkage int printk(const char * fmt, ...) - __attribute__ ((format (printf, 1, 2))); - -/* - * Your basic SMP spinlocks, allowing only a single CPU anywhere - */ - -typedef struct { - volatile unsigned int lock; -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned magic; -#endif -} spinlock_t; - -#define SPINLOCK_MAGIC 0xdead4ead - -#ifdef CONFIG_DEBUG_SPINLOCK -#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC -#else -#define SPINLOCK_MAGIC_INIT /* */ -#endif - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } - -#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) - -/* - * Simple spin lock operations. There are two variants, one clears IRQ's - * on the local processor, one does not. - * - * We make no fairness assumptions. They have a cost. - */ - -#define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0) -#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) - -#define spin_lock_string \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "jns 3f\n" \ - "2:\t" \ - "rep;nop\n\t" \ - "cmpb $0,%0\n\t" \ - "jle 2b\n\t" \ - "jmp 1b\n" \ - "3:\n\t" - -#define spin_lock_string_flags \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "jns 4f\n\t" \ - "2:\t" \ - "testl $0x200, %1\n\t" \ - "jz 3f\n\t" \ - "#sti\n\t" \ - "3:\t" \ - "rep;nop\n\t" \ - "cmpb $0, %0\n\t" \ - "jle 3b\n\t" \ - "#cli\n\t" \ - "jmp 1b\n" \ - "4:\n\t" - -/* - * This works. Despite all the confusion. - * (except on PPro SMP or if we are using OOSTORE) - * (PPro errata 66, 92) - */ - -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) - -#define spin_unlock_string \ - "movb $1,%0" \ - :"=m" (lock->lock) : : "memory" - - -static inline void _raw_spin_unlock(spinlock_t *lock) -{ -#ifdef CONFIG_DEBUG_SPINLOCK - BUG_ON(lock->magic != SPINLOCK_MAGIC); - BUG_ON(!spin_is_locked(lock)); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#else - -#define spin_unlock_string \ - "xchgb %b0, %1" \ - :"=q" (oldval), "=m" (lock->lock) \ - :"0" (oldval) : "memory" - -static inline void _raw_spin_unlock(spinlock_t *lock) -{ - char oldval = 1; -#ifdef CONFIG_DEBUG_SPINLOCK - BUG_ON(lock->magic != SPINLOCK_MAGIC); - BUG_ON(!spin_is_locked(lock)); -#endif - __asm__ __volatile__( - spin_unlock_string - ); -} - -#endif - -static inline int _raw_spin_trylock(spinlock_t *lock) -{ - char oldval; - __asm__ __volatile__( - "xchgb %b0,%1" - :"=q" (oldval), "=m" (lock->lock) - :"0" (0) : "memory"); - return oldval > 0; -} - -static inline void _raw_spin_lock(spinlock_t *lock) -{ -#ifdef CONFIG_DEBUG_SPINLOCK - if (unlikely(lock->magic != SPINLOCK_MAGIC)) { - printk("eip: %p\n", __builtin_return_address(0)); - BUG(); - } -#endif - __asm__ __volatile__( - spin_lock_string - :"=m" (lock->lock) : : "memory"); -} - -static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) -{ -#ifdef CONFIG_DEBUG_SPINLOCK - if (unlikely(lock->magic != SPINLOCK_MAGIC)) { - printk("eip: %p\n", __builtin_return_address(0)); - BUG(); - } -#endif - __asm__ __volatile__( - spin_lock_string_flags - :"=m" (lock->lock) : "r" (flags) : "memory"); -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ -typedef struct { - volatile unsigned int lock; -#ifdef CONFIG_DEBUG_SPINLOCK - unsigned magic; -#endif -} rwlock_t; - -#define RWLOCK_MAGIC 0xdeaf1eed - -#ifdef CONFIG_DEBUG_SPINLOCK -#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC -#else -#define RWLOCK_MAGIC_INIT /* */ -#endif - -#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } - -#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) - -#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS) - -/* - * On x86, we implement read-write locks as a 32-bit counter - * with the high bit (sign) being the "contended" bit. - * - * The inline assembly is non-obvious. Think about it. - * - * Changed to use the same technique as rw semaphores. See - * semaphore.h for details. -ben - */ -/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ - -static inline void _raw_read_lock(rwlock_t *rw) -{ -#ifdef CONFIG_DEBUG_SPINLOCK - BUG_ON(rw->magic != RWLOCK_MAGIC); -#endif - __build_read_lock(rw, "__read_lock_failed"); -} - -static inline void _raw_write_lock(rwlock_t *rw) -{ -#ifdef CONFIG_DEBUG_SPINLOCK - BUG_ON(rw->magic != RWLOCK_MAGIC); -#endif - __build_write_lock(rw, "__write_lock_failed"); -} - -#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") -#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") - -static inline int _raw_write_trylock(rwlock_t *lock) -{ - atomic_t *count = (atomic_t *)lock; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) - return 1; - atomic_add(RW_LOCK_BIAS, count); - return 0; -} - -#endif /* __ASM_SPINLOCK_H */ diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c new file mode 100644 index 0000000000..6cd16ccfdc --- /dev/null +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c @@ -0,0 +1,258 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the lowest level x86-specific interrupt + * entry, irq-stacks and irq statistics code. All the remaining + * irq logic is done by the generic kernel/irq/ code and + * by the x86-specific irq controller code. (e.g. i8259.c and + * io_apic.c.) + */ + +#include +#include +#include +#include +#include + +#ifndef CONFIG_X86_LOCAL_APIC +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} +#endif + +#ifdef CONFIG_4KSTACKS +/* + * per-CPU IRQ handling contexts (thread information and stack) + */ +union irq_ctx { + struct thread_info tinfo; + u32 stack[THREAD_SIZE/sizeof(u32)]; +}; + +static union irq_ctx *hardirq_ctx[NR_CPUS]; +static union irq_ctx *softirq_ctx[NR_CPUS]; +#endif + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +fastcall unsigned int do_IRQ(struct pt_regs *regs) +{ + /* high bits used in ret_from_ code */ + int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS); +#ifdef CONFIG_4KSTACKS + union irq_ctx *curctx, *irqctx; + u32 *isp; +#endif + + irq_enter(); +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 1KB free? */ + { + long esp; + + __asm__ __volatile__("andl %%esp,%0" : + "=r" (esp) : "0" (THREAD_SIZE - 1)); + if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { + printk("do_IRQ: stack overflow: %ld\n", + esp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + +#ifdef CONFIG_4KSTACKS + + curctx = (union irq_ctx *) current_thread_info(); + irqctx = hardirq_ctx[smp_processor_id()]; + + /* + * this is where we switch to the IRQ stack. However, if we are + * already using the IRQ stack (because we interrupted a hardirq + * handler) we can't do that and just have to keep using the + * current stack (which is the irq stack already after all) + */ + if (curctx != irqctx) { + int arg1, arg2, ebx; + + /* build the stack frame on the IRQ stack */ + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + irqctx->tinfo.task = curctx->tinfo.task; + irqctx->tinfo.previous_esp = current_stack_pointer; + + asm volatile( + " xchgl %%ebx,%%esp \n" + " call __do_IRQ \n" + " movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (ebx) + : "0" (irq), "1" (regs), "2" (isp) + : "memory", "cc", "ecx" + ); + } else +#endif + __do_IRQ(irq, regs); + + irq_exit(); + + return 1; +} + +#ifdef CONFIG_4KSTACKS + +/* + * These should really be __section__(".bss.page_aligned") as well, but + * gcc's 3.0 and earlier don't handle that correctly. + */ +static char softirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__aligned__(THREAD_SIZE))); + +static char hardirq_stack[NR_CPUS * THREAD_SIZE] + __attribute__((__aligned__(THREAD_SIZE))); + +/* + * allocate per-cpu stacks for hardirq and for softirq processing + */ +void irq_ctx_init(int cpu) +{ + union irq_ctx *irqctx; + + if (hardirq_ctx[cpu]) + return; + + irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + hardirq_ctx[cpu] = irqctx; + + irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; + irqctx->tinfo.task = NULL; + irqctx->tinfo.exec_domain = NULL; + irqctx->tinfo.cpu = cpu; + irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; + irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + + softirq_ctx[cpu] = irqctx; + + printk("CPU %u irqstacks, hard=%p soft=%p\n", + cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); +} + +extern asmlinkage void __do_softirq(void); + +asmlinkage void do_softirq(void) +{ + unsigned long flags; + struct thread_info *curctx; + union irq_ctx *irqctx; + u32 *isp; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + curctx = current_thread_info(); + irqctx = softirq_ctx[smp_processor_id()]; + irqctx->tinfo.task = curctx->task; + irqctx->tinfo.previous_esp = current_stack_pointer; + + /* build the stack frame on the softirq stack */ + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + + asm volatile( + " xchgl %%ebx,%%esp \n" + " call __do_softirq \n" + " movl %%ebx,%%esp \n" + : "=b"(isp) + : "0"(isp) + : "memory", "cc", "edx", "ecx", "eax" + ); + } + + local_irq_restore(flags); +} + +EXPORT_SYMBOL(do_softirq); +#endif + +/* + * Interrupt statistics: + */ + +atomic_t irq_err_count; + +/* + * /proc/interrupts printing: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for (j=0; jtypename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) { + seq_printf(p, "NMI: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", nmi_count(j)); + seq_putc(p, '\n'); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", + irq_stat[j].apic_timer_irqs); + seq_putc(p, '\n'); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + } + return 0; +} diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c new file mode 100644 index 0000000000..7b7763848b --- /dev/null +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c @@ -0,0 +1,599 @@ +/* + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * + * This code is released under the GNU General Public License version 2 or + * later. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#if 0 +#include +#endif +#include + +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg) + +/* + * Some notes on x86 processor bugs affecting SMP operation: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP errata are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP errata are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP errata are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + * + * Pentium + * There is a marginal case where REP MOVS on 100MHz SMP + * machines with B stepping processors can fail. XXX should provide + * an L1cache=Writethrough or L1cache=off option. + * + * B stepping CPUs may hang. There are hardware work arounds + * for this. We warn about it in case your board doesn't have the work + * arounds. Basically thats so I can tell anyone with a B stepping + * CPU and SMP problems "tough". + * + * Specific items [From Pentium Processor Specification Update] + * + * 1AP. Linux doesn't use remote read + * 2AP. Linux doesn't trust APIC errors + * 3AP. We work around this + * 4AP. Linux never generated 3 interrupts of the same priority + * to cause a lost local interrupt. + * 5AP. Remote read is never used + * 6AP. not affected - worked around in hardware + * 7AP. not affected - worked around in hardware + * 8AP. worked around in hardware - we get explicit CS errors if not + * 9AP. only 'noapic' mode affected. Might generate spurious + * interrupts, we log only the first one and count the + * rest silently. + * 10AP. not affected - worked around in hardware + * 11AP. Linux reads the APIC between writes to avoid this, as per + * the documentation. Make sure you preserve this as it affects + * the C stepping chips too. + * 12AP. not affected - worked around in hardware + * 13AP. not affected - worked around in hardware + * 14AP. we always deassert INIT during bootup + * 15AP. not affected - worked around in hardware + * 16AP. not affected - worked around in hardware + * 17AP. not affected - worked around in hardware + * 18AP. not affected - worked around in hardware + * 19AP. not affected - worked around in BIOS + * + * If this sounds worrying believe me these bugs are either ___RARE___, + * or are signal timing bugs worked around in hardware and there's + * about nothing of note with C stepping upwards. + */ + +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline int __prepare_ICR (unsigned int shortcut, int vector) +{ + return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; +} + +static inline int __prepare_ICR2 (unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]); + +static inline void __send_IPI_one(unsigned int cpu, int vector) +{ + unsigned int evtchn; + + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; + // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn); + if (evtchn) { +#if 0 + shared_info_t *s = HYPERVISOR_shared_info; + while (synch_test_bit(evtchn, &s->evtchn_pending[0]) || + synch_test_bit(evtchn, &s->evtchn_mask[0])) + ; +#endif + notify_via_evtchn(evtchn); + } else + printk("send_IPI to unbound port %d/%d", + cpu, vector); +} + +void __send_IPI_shortcut(unsigned int shortcut, int vector) +{ + int cpu; + + switch (shortcut) { + case APIC_DEST_SELF: + __send_IPI_one(smp_processor_id(), vector); + break; + case APIC_DEST_ALLBUT: + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu == smp_processor_id()) + continue; + if (cpu_isset(cpu, cpu_online_map)) { + __send_IPI_one(cpu, vector); + } + } + break; + default: + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, + vector); + break; + } +} + +void fastcall send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector); +} + +/* + * This is only used on smaller machines. + */ +void send_IPI_mask_bitmask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned int cpu; + + local_irq_save(flags); + + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + if (cpu_isset(cpu, mask)) { + __send_IPI_one(cpu, vector); + } + } + + local_irq_restore(flags); +} + +inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +{ + + send_IPI_mask_bitmask(mask, vector); +} + +#include /* must come after the send_IPI functions above for inlining */ + +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + */ + +static cpumask_t flush_cpumask; +static struct mm_struct * flush_mm; +static unsigned long flush_va; +static DEFINE_SPINLOCK(tlbstate_lock); +#define FLUSH_ALL 0xffffffff + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + * + * We need to reload %cr3 since the page tables may be going + * away from under us.. + */ +static inline void leave_mm (unsigned long cpu) +{ + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superflous + * tlb flush. + * 1a2) set cpu_tlbstate to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu_tlbstate[].active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu_tlbstate[].active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu_tlbstate to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu_tlbstate is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + unsigned long cpu; + + cpu = get_cpu(); + + if (!cpu_isset(cpu, flush_cpumask)) + goto out; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { + if (flush_va == FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(flush_va); + } else + leave_mm(cpu); + } + smp_mb__before_clear_bit(); + cpu_clear(cpu, flush_cpumask); + smp_mb__after_clear_bit(); +out: + put_cpu_no_resched(); + + return IRQ_HANDLED; +} + +static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, + unsigned long va) +{ + cpumask_t tmp; + /* + * A couple of (to be removed) sanity checks: + * + * - we do not send IPIs to not-yet booted CPUs. + * - current CPU must not be in mask + * - mask must exist :) + */ + BUG_ON(cpus_empty(cpumask)); + + cpus_and(tmp, cpumask, cpu_online_map); + BUG_ON(!cpus_equal(cpumask, tmp)); + BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(!mm); + + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. + * Temporarily this turns IRQs off, so that lockups are + * detected by the NMI watchdog. + */ + spin_lock(&tlbstate_lock); + + flush_mm = mm; + flush_va = va; +#if NR_CPUS <= BITS_PER_LONG + atomic_set_mask(cpumask, &flush_cpumask); +#else + { + int k; + unsigned long *flush_mask = (unsigned long *)&flush_cpumask; + unsigned long *cpu_mask = (unsigned long *)&cpumask; + for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) + atomic_set_mask(cpu_mask[k], &flush_mask[k]); + } +#endif + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + + while (!cpus_empty(flush_cpumask)) + /* nothing. lockup detection does not belong here */ + mb(); + + flush_mm = NULL; + flush_va = 0; + spin_unlock(&tlbstate_lock); +} + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + local_flush_tlb(); + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + preempt_enable(); +} + +void flush_tlb_mm (struct mm_struct * mm) +{ + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + cpumask_t cpu_mask; + + preempt_disable(); + cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + + if (current->active_mm == mm) { + if(current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (!cpus_empty(cpu_mask)) + flush_tlb_others(cpu_mask, mm, va); + + preempt_enable(); +} + +static void do_flush_tlb_all(void* info) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) + leave_mm(cpu); +} + +void flush_tlb_all(void) +{ + on_each_cpu(do_flush_tlb_all, NULL, 1, 1); +} + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static struct call_data_struct * call_data; + +/* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +/* + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +{ + struct call_data_struct data; + int cpus = num_online_cpus()-1; + + if (!cpus) + return 0; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + call_data = &data; + mb(); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + barrier(); + spin_unlock(&call_lock); + + return 0; +} + +static void stop_this_cpu (void * dummy) +{ + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + local_irq_disable(); +#if 1 + xxprint("stop_this_cpu disable_local_APIC\n"); +#else + disable_local_APIC(); +#endif + if (cpu_data[smp_processor_id()].hlt_works_ok) + for(;;) __asm__("hlt"); + for (;;); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); + + local_irq_disable(); +#if 1 + xxprint("smp_send_stop disable_local_APIC\n"); +#else + disable_local_APIC(); +#endif + local_irq_enable(); +} + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + + return IRQ_HANDLED; +} + +#include +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); + irq_exit(); + + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } + + return IRQ_HANDLED; +} + diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c new file mode 100644 index 0000000000..9f39dde976 --- /dev/null +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c @@ -0,0 +1,1365 @@ +/* + * x86 SMP booting functions + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU General Public License version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIPS report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX APICs + * Martin J. Bligh : Added support for multi-quad systems + * Dave Jones : Report invalid combinations of Athlon CPUs. +* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if 1 +#define Dprintk(args...) +#else +#include +#endif +#include +#include + +/* Set if we find a B stepping CPU */ +static int __initdata smp_b_stepping; + +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ +EXPORT_SYMBOL(phys_proc_id); + +/* bitmap of online cpus */ +cpumask_t cpu_online_map; + +cpumask_t cpu_callin_map; +cpumask_t cpu_callout_map; +static cpumask_t smp_commenced_mask; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; + +u8 x86_cpu_to_apicid[NR_CPUS] = + { [0 ... NR_CPUS-1] = 0xff }; +EXPORT_SYMBOL(x86_cpu_to_apicid); + +/* Set when the idlers are all forked */ +int smp_threads_ready; + +#if 0 +/* + * Trampoline 80x86 program as an array. + */ + +extern unsigned char trampoline_data []; +extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; +static int trampoline_exec; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +static unsigned long __init setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} +#endif + +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +void __init smp_alloc_memory(void) +{ +#if 1 + int cpu; + + for (cpu = 1; cpu < NR_CPUS; cpu++) { + cpu_gdt_descr[cpu].address = (unsigned long) + alloc_bootmem_low_pages(PAGE_SIZE); + /* XXX free unused pages later */ + } +#else + trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + if (__pa(trampoline_base) >= 0x9F000) + BUG(); + /* + * Make the SMP trampoline executable: + */ + trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); +#endif +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +static void __init smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = cpu_data + id; + + *c = boot_cpu_data; + if (id!=0) + identify_cpu(c); + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + + /* Athlon 660/661 is valid. */ + if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model==7) && (c->x86_mask==0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. + * It's worth noting that the A5 stepping (662) of some Athlon XP's + * have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. + */ + if (((c->x86_model==6) && (c->x86_mask>=2)) || + ((c->x86_model==7) && (c->x86_mask>=1)) || + (c->x86_model> 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, it's not a certified SMP capable AMD system. */ + tainted |= TAINT_UNSAFE_SMP; + } + +valid_k7: + ; +} + +#if 0 +/* + * TSC synchronization. + * + * We first check whether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS]; + +#define NR_LOOPS 5 + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + unsigned long one_usec; + int buggy = 0; + + printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); + + /* convert from kcyc/sec to cyc/usec */ + one_usec = cpu_khz / 1000; + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) + mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) + mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_isset(i, cpu_callout_map)) { + t0 = tsc_values[i]; + sum += t0; + } + } + avg = sum; + do_div(avg, num_booting_cpus()); + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*one_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = delta; + do_div(realdelta, one_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * Not every cpu is online at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != num_booting_cpus()) + mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); + } +} +#undef NR_LOOPS +#endif + +extern void calibrate_delay(void); + +static atomic_t init_deasserted; + +void __init smp_callin(void) +{ + int cpuid, phys_id; + unsigned long timeout; + +#if 0 + /* + * If waken up by an INIT in an 82489DX configuration + * we may get here before an INIT-deassert IPI reaches + * our local APIC. We have to wait for the IPI or we'll + * lock up on an APIC access. + */ + wait_for_init_deassert(&init_deasserted); +#endif + + /* + * (This works even if the APIC is not enabled.) + */ + phys_id = smp_processor_id(); + cpuid = smp_processor_id(); + if (cpu_isset(cpuid, cpu_callin_map)) { + printk("huh, phys CPU#%d, CPU#%d already present??\n", + phys_id, cpuid); + BUG(); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + /* + * Waiting 2s total for startup (udelay is not yet working) + */ + timeout = jiffies + 2*HZ; + while (time_before(jiffies, timeout)) { + /* + * Has the boot CPU finished it's STARTUP sequence? + */ + if (cpu_isset(cpuid, cpu_callout_map)) + break; + rep_nop(); + } + + if (!time_before(jiffies, timeout)) { + printk("BUG: CPU%d started up but did not get a callout!\n", + cpuid); + BUG(); + } + +#if 0 + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + Dprintk("CALLIN, before setup_local_APIC().\n"); + smp_callin_clear_local_apic(); + setup_local_APIC(); +#endif + map_cpu_to_logical_apicid(); + + /* + * Get our bogomips. + */ + calibrate_delay(); + Dprintk("Stack at about %p\n",&cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + +#if 0 + disable_APIC_timer(); +#endif + + /* + * Allow the master to continue. + */ + cpu_set(cpuid, cpu_callin_map); + +#if 0 + /* + * Synchronize the TSC with the BP + */ + if (cpu_has_tsc && cpu_khz) + synchronize_tsc_ap(); +#endif +} + +int cpucount; + + +static irqreturn_t local_debug_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + + return IRQ_HANDLED; +} + +static struct irqaction local_irq_debug = { + local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug", + NULL, NULL +}; + +void local_setup_debug(void) +{ + (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug); +} + + +extern void local_setup_timer(void); + +/* + * Activate a secondary processor. + */ +static int __init start_secondary(void *unused) +{ + /* + * Dont put anything before smp_callin(), SMP + * booting is too fragile that we want to limit the + * things done here to the most necessary things. + */ + cpu_init(); + smp_callin(); + while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) + rep_nop(); + local_setup_timer(); + local_setup_debug(); /* XXX */ + smp_intr_init(); + local_irq_enable(); + /* + * low-memory mappings have been cleared, flush them from + * the local TLBs too. + */ + local_flush_tlb(); + cpu_set(smp_processor_id(), cpu_online_map); + + /* We can take interrupts now: we're officially "up". */ + local_irq_enable(); + + wmb(); + if (0) { + char *msg2 = "delay2\n"; + int timeout; + for (timeout = 0; timeout < 50000; timeout++) { + udelay(1000); + if (timeout == 2000) { + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2); + timeout = 0; + } + } + } + cpu_idle(); +} + +/* + * Everything has been set up for the secondary + * CPUs - they just need to reload everything + * from the task structure + * This function must not return. + */ +void __init initialize_secondary(void) +{ + /* + * We don't actually need to load the full TSS, + * basically just the stack pointer and the eip. + */ + + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"r" (current->thread.esp),"r" (current->thread.eip)); +} + +extern struct { + void * esp; + unsigned short ss; +} stack_start; + +#ifdef CONFIG_NUMA + +/* which logical CPUs are on which nodes */ +cpumask_t node_2_cpu_mask[MAX_NUMNODES] = + { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; +/* which node each logical CPU is on */ +int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +EXPORT_SYMBOL(cpu_2_node); + +/* set up a mapping between cpu and node. */ +static inline void map_cpu_to_node(int cpu, int node) +{ + printk("Mapping cpu %d to node %d\n", cpu, node); + cpu_set(cpu, node_2_cpu_mask[node]); + cpu_2_node[cpu] = node; +} + +/* undo a mapping between cpu and node. */ +static inline void unmap_cpu_to_node(int cpu) +{ + int node; + + printk("Unmapping cpu %d from all nodes\n", cpu); + for (node = 0; node < MAX_NUMNODES; node ++) + cpu_clear(cpu, node_2_cpu_mask[node]); + cpu_2_node[cpu] = 0; +} +#else /* !CONFIG_NUMA */ + +#define map_cpu_to_node(cpu, node) ({}) +#define unmap_cpu_to_node(cpu) ({}) + +#endif /* CONFIG_NUMA */ + +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + +void map_cpu_to_logical_apicid(void) +{ + int cpu = smp_processor_id(); + int apicid = smp_processor_id(); + + cpu_2_logical_apicid[cpu] = apicid; + map_cpu_to_node(cpu, apicid_to_node(apicid)); +} + +void unmap_cpu_to_logical_apicid(int cpu) +{ + cpu_2_logical_apicid[cpu] = BAD_APICID; + unmap_cpu_to_node(cpu); +} + +#if APIC_DEBUG +static inline void __inquire_remote_apic(int apicid) +{ + int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + char *names[] = { "ID", "VERSION", "SPIV" }; + int timeout, status; + + printk("Inquiring remote APIC #%d...\n", apicid); + + for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + printk("... APIC #%d %s: ", apicid, names[i]); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + + timeout = 0; + do { + udelay(100); + status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; + } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); + + switch (status) { + case APIC_ICR_RR_VALID: + status = apic_read(APIC_RRR); + printk("%08x\n", status); + break; + default: + printk("failed\n"); + } + } +} +#endif + +#if 0 +#ifdef WAKE_SECONDARY_VIA_NMI +/* + * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal + * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this + * won't ... remember to clear down the APIC, etc later. + */ +static int __init +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) +{ + unsigned long send_status = 0, accept_status = 0; + int timeout, maxlvt; + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + maxlvt = get_maxlvt(); + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + Dprintk("NMI sent.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} +#endif /* WAKE_SECONDARY_VIA_NMI */ + +#ifdef WAKE_SECONDARY_VIA_INIT +static int __init +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +{ + unsigned long send_status = 0, accept_status = 0; + int maxlvt, timeout, num_starts, j; + + /* + * Be paranoid about clearing APIC errors. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + + Dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* + * Send IPI + */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + mdelay(10); + + Dprintk("Deasserting INIT.\n"); + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Send IPI */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + atomic_set(&init_deasserted, 1); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't send the STARTUP IPIs. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + Dprintk("#startup loops: %d.\n", num_starts); + + maxlvt = get_maxlvt(); + + for (j = 1; j <= num_starts; j++) { + Dprintk("Sending STARTUP #%d.\n",j); + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + Dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + Dprintk("Startup point 1.\n"); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + Dprintk("After Startup.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} +#endif /* WAKE_SECONDARY_VIA_INIT */ +#endif + +extern cpumask_t cpu_initialized; + +static int __init do_boot_cpu(int apicid) +/* + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad + * (ie clustered apic addressing mode), this is a LOGICAL apic ID. + * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + */ +{ + struct task_struct *idle; + unsigned long boot_error; + int timeout, cpu; + unsigned long start_eip; +#if 0 + unsigned short nmi_high = 0, nmi_low = 0; +#endif + full_execution_context_t ctxt; + extern void startup_32_smp(void); + extern void hypervisor_callback(void); + extern void failsafe_callback(void); + extern int smp_trap_init(trap_info_t *); + int i; + + cpu = ++cpucount; + /* + * We can't use kernel_thread since we must avoid to + * reschedule the child. + */ + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + idle->thread.eip = (unsigned long) start_secondary; + /* start_eip had better be page-aligned! */ + start_eip = (unsigned long)startup_32_smp; + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); + /* Stack for startup_32 can be just as for start_secondary onwards */ + stack_start.esp = (void *) idle->thread.esp; + + irq_ctx_init(cpu); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + atomic_set(&init_deasserted, 0); + +#if 1 + if (cpu_gdt_descr[0].size > PAGE_SIZE) + BUG(); + cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; + memcpy((void *)cpu_gdt_descr[cpu].address, + (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); + memset((char *)cpu_gdt_descr[cpu].address + + FIRST_RESERVED_GDT_ENTRY * 8, 0, + NR_RESERVED_GDT_ENTRIES * 8); + + memset(&ctxt, 0, sizeof(ctxt)); + + ctxt.cpu_ctxt.ds = __USER_DS; + ctxt.cpu_ctxt.es = __USER_DS; + ctxt.cpu_ctxt.fs = 0; + ctxt.cpu_ctxt.gs = 0; + ctxt.cpu_ctxt.ss = __KERNEL_DS; + ctxt.cpu_ctxt.cs = __KERNEL_CS; + ctxt.cpu_ctxt.eip = start_eip; + ctxt.cpu_ctxt.esp = idle->thread.esp; + ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12); + + /* FPU is set up to default initial state. */ + memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt.trap_ctxt[i].vector = i; + ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; + } + ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt); + + /* No LDT. */ + ctxt.ldt_ents = 0; + + { + unsigned long va; + int f; + + for (va = cpu_gdt_descr[cpu].address, f = 0; + va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; + va += PAGE_SIZE, f++) { + ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT; + make_page_readonly((void *)va); + } + ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8; + flush_page_update_queue(); + } + + /* Ring 1 stack is the initial stack. */ + ctxt.kernel_ss = __KERNEL_DS; + ctxt.kernel_esp = idle->thread.esp; + + /* Callback handlers. */ + ctxt.event_callback_cs = __KERNEL_CS; + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_cs = __KERNEL_CS; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + + ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir); + + boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + cpu_set(cpu, cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (cpu_isset(cpu, cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); + printk("CPU%d: ", cpu); + print_cpu_info(&cpu_data[cpu]); + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; + } + } + x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_logical_apicid(cpu); + cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ + cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ + cpucount--; + } + +#else + Dprintk("Setting warm reset code and vector.\n"); + + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_eip); + + /* + * Starting actual IPI sequence... + */ + boot_error = wakeup_secondary_cpu(apicid, start_eip); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + cpu_set(cpu, cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (cpu_isset(cpu, cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (cpu_isset(cpu, cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + Dprintk("OK.\n"); + printk("CPU%d: ", cpu); + print_cpu_info(&cpu_data[cpu]); + Dprintk("CPU has booted.\n"); + } else { + boot_error= 1; + if (*((volatile unsigned char *)trampoline_base) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); + else + /* trampoline code not run */ + printk("Not responding.\n"); + inquire_remote_apic(apicid); + } + } + x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_logical_apicid(cpu); + cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ + cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ + cpucount--; + } + + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)trampoline_base) = 0; +#endif + + return boot_error; +} + +cycles_t cacheflush_time; +unsigned long cache_decay_ticks; + +static void smp_tune_scheduling (void) +{ + unsigned long cachesize; /* kB */ + unsigned long bandwidth = 350; /* MB/s */ + /* + * Rough estimation for SMP scheduling, this is the number of + * cycles it takes for a fully memory-limited process to flush + * the SMP-local cache. + * + * (For a P5 this pretty much means we will choose another idle + * CPU almost always at wakeup time (this is due to the small + * L1 cache), on PIIs it's around 50-100 usecs, depending on + * the cache size) + */ + + if (!cpu_khz) { + /* + * this basically disables processor-affinity + * scheduling on SMP without a TSC. + */ + cacheflush_time = 0; + return; + } else { + cachesize = boot_cpu_data.x86_cache_size; + if (cachesize == -1) { + cachesize = 16; /* Pentiums, 2x8kB cache */ + bandwidth = 100; + } + + cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth; + } + + cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1; + + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", + (long)cacheflush_time/(cpu_khz/1000), + ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); + printk("task migration cache decay timeout: %ld msecs.\n", + cache_decay_ticks); +} + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ + +#if 0 +static int boot_cpu_logical_apicid; +#endif +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; + +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; + +static void __init smp_boot_cpus(unsigned int max_cpus) +{ + int cpu, kicked; + unsigned long bogosum = 0; +#if 0 + int apicid, bit; +#endif + + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + printk("CPU%d: ", 0); + print_cpu_info(&cpu_data[0]); + +#if 0 + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_logical_apicid = logical_smp_processor_id(); + x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; +#else + // boot_cpu_physical_apicid = 0; + // boot_cpu_logical_apicid = 0; + x86_cpu_to_apicid[0] = 0; +#endif + + current_thread_info()->cpu = 0; + smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); + + /* + * If we couldn't find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config /* && !acpi_lapic) */) { + printk(KERN_NOTICE "SMP motherboard not detected.\n"); + smpboot_clear_io_apic_irqs(); +#if 0 + phys_cpu_present_map = physid_mask_of_physid(0); + if (APIC_init_uniprocessor()) + printk(KERN_NOTICE "Local APIC not detected." + " Using dummy APIC emulation.\n"); +#endif + map_cpu_to_logical_apicid(); + return; + } + +#if 0 + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * Makes no sense to do this check in clustered apic mode, so skip it + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + return; + } + + verify_local_APIC(); +#endif + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + HYPERVISOR_shared_info->n_vcpu = 1; + printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + smpboot_clear_io_apic_irqs(); +#if 0 + phys_cpu_present_map = physid_mask_of_physid(0); +#endif + return; + } + + smp_intr_init(); + +#if 0 + connect_bsp_APIC(); + setup_local_APIC(); +#endif + map_cpu_to_logical_apicid(); +#if 0 + + + setup_portio_remap(); + + /* + * Scan the CPU present map and fire up the other CPUs via do_boot_cpu + * + * In clustered apic mode, phys_cpu_present_map is a constructed thus: + * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the + * clustered apic ID. + */ + Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); +#endif + Dprintk("CPU present map: %lx\n", + (1UL << HYPERVISOR_shared_info->n_vcpu) - 1); + + kicked = 1; + for (cpu = 1; kicked < NR_CPUS && + cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) { + if (max_cpus <= cpucount+1) + continue; + + if (do_boot_cpu(cpu)) + printk("CPU #%d not responding - cannot use it.\n", + cpu); + else + ++kicked; + } + +#if 0 + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); +#endif + + /* + * Allow the user to impress friends. + */ + Dprintk("Before bogomips.\n"); + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_isset(cpu, cpu_callout_map)) + bogosum += cpu_data[cpu].loops_per_jiffy; + printk(KERN_INFO + "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + cpucount+1, + bogosum/(500000/HZ), + (bogosum/(5000/HZ))%100); + + Dprintk("Before bogocount - setting activated=1.\n"); + + if (smp_b_stepping) + printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + if (tainted & TAINT_UNSAFE_SMP) { + if (cpucount) + printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); + else + tainted &= ~TAINT_UNSAFE_SMP; + } + + Dprintk("Boot done.\n"); + + /* + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + + if (smp_num_siblings > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (phys_proc_id[cpu] == phys_proc_id[i]) { + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); + } + } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); + } + +#if 0 + if (nmi_watchdog == NMI_LOCAL_APIC) + check_nmi_watchdog(); + + smpboot_setup_io_apic(); + + setup_boot_APIC_clock(); + + /* + * Synchronize the TSC with the AP + */ + if (cpu_has_tsc && cpucount && cpu_khz) + synchronize_tsc_bp(); +#endif +} + +/* These are wrappers to interface to the new boot process. Someone + who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + smp_boot_cpus(max_cpus); +} + +void __devinit smp_prepare_boot_cpu(void) +{ + cpu_set(smp_processor_id(), cpu_online_map); + cpu_set(smp_processor_id(), cpu_callout_map); +} + +int __devinit __cpu_up(unsigned int cpu) +{ + /* This only works at boot for x86. See "rewrite" above. */ + if (cpu_isset(cpu, smp_commenced_mask)) { + local_irq_enable(); + return -ENOSYS; + } + + /* In case one didn't come up */ + if (!cpu_isset(cpu, cpu_callin_map)) { + local_irq_enable(); + return -EIO; + } + + local_irq_enable(); + /* Unleash the CPU! */ + cpu_set(cpu, smp_commenced_mask); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +#if 1 +#else +#ifdef CONFIG_X86_IO_APIC + setup_ioapic_dest(); +#endif + zap_low_mappings(); + /* + * Disable executability of the SMP trampoline: + */ + set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif +} + +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); + +static struct irqaction reschedule_irq = { + smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule", + NULL, NULL +}; + +extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *); + +static struct irqaction invalidate_irq = { + smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate", + NULL, NULL +}; + +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); + +static struct irqaction call_function_irq = { + smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE, + "call_function", NULL, NULL +}; + +void __init smp_intr_init(void) +{ + + (void)setup_irq( + bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR), + &reschedule_irq); + (void)setup_irq( + bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR), + &invalidate_irq); + (void)setup_irq( + bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR), + &call_function_irq); +} diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c new file mode 100644 index 0000000000..51addc6c76 --- /dev/null +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c @@ -0,0 +1,19 @@ +/* Copyright (C) 2004, Christian Limpach */ + +#include +#include +#include + +unsigned int __initdata maxcpus = NR_CPUS; + + +/* + * the frequency of the profiling timer can be changed + * by writing a multiplier value into /proc/profile. + */ +int setup_profiling_timer(unsigned int multiplier) +{ + printk("setup_profiling_timer\n"); + + return 0; +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile new file mode 100644 index 0000000000..80b7ca0627 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile @@ -0,0 +1,3 @@ + +obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o + diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c new file mode 100644 index 0000000000..a9a00677bc --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c @@ -0,0 +1,87 @@ +/****************************************************************************** + * blktap.c + * + * XenLinux virtual block-device tap. + * + * Copyright (c) 2004, Andrew Warfield + * + * Based on the original split block driver: + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + * Copyright (c) 2004, Christian Limpach + * + * Note that unlike the split block driver code, this driver has been developed + * strictly for Linux 2.6 + */ + +#include "blktap.h" + +int __init xlblktap_init(void) +{ + ctrl_msg_t cmsg; + blkif_fe_driver_status_t fe_st; + blkif_be_driver_status_t be_st; + + printk(KERN_INFO "Initialising Xen block tap device\n"); + + DPRINTK(" tap - Backend connection init:\n"); + + + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_fe_driver_status_t); + fe_st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &fe_st, sizeof(fe_st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + DPRINTK(" tap - Frontend connection init:\n"); + + active_reqs_init(); + blkif_interface_init(); + blkdev_schedule_init(); + + (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_BE; + cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_be_driver_status_t); + be_st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &be_st, sizeof(be_st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + DPRINTK(" tap - Userland channel init:\n"); + + blktap_init(); + + DPRINTK("Blkif tap device initialized.\n"); + + return 0; +} + +#if 0 /* tap doesn't handle suspend/resume */ +void blkdev_suspend(void) +{ +} + +void blkdev_resume(void) +{ + ctrl_msg_t cmsg; + blkif_fe_driver_status_t st; + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; + cmsg.length = sizeof(blkif_fe_driver_status_t); + st.status = BLKIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} +#endif + +__initcall(xlblktap_init); diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h new file mode 100644 index 0000000000..eb084e8f06 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h @@ -0,0 +1,250 @@ +/* + * blktap.h + * + * Interfaces for the Xen block tap driver. + * + * (c) 2004, Andrew Warfield, University of Cambridge + * + */ + +#ifndef __BLKTAP_H__ +#define __BLKTAP_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Used to signal to the backend that this is a tap domain. */ +#define BLKTAP_COOKIE 0xbeadfeed + +/* -------[ debug / pretty printing ]--------------------------------- */ + +#if 0 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + + +/* -------[ state descriptors ]--------------------------------------- */ + +#define BLKIF_STATE_CLOSED 0 +#define BLKIF_STATE_DISCONNECTED 1 +#define BLKIF_STATE_CONNECTED 2 + +/* -------[ connection tracking ]------------------------------------- */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#endif + +extern spinlock_t blkif_io_lock; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned long shmem_frame; + unsigned int evtchn; + int irq; + /* Comms information. */ + blkif_back_ring_t blk_ring; + + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + /* + * DISCONNECT response is deferred until pending requests are ack'ed. + * We therefore need to store the id from the original request. + */ + u8 disconnect_rspid; + struct blkif_st *hash_next; + struct list_head blkdev_list; + spinlock_t blk_ring_lock; + atomic_t refcnt; + struct work_struct work; +} blkif_t; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); +void blkif_disconnect_complete(blkif_t *blkif); +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + blkif_disconnect_complete(_b); \ + } while (0) + + +/* -------[ active request tracking ]--------------------------------- */ + +typedef struct { + blkif_t *blkif; + unsigned long id; + int nr_pages; + unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int next_free; +} active_req_t; + +typedef unsigned int ACTIVE_RING_IDX; + +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx); + +extern inline unsigned int ID_TO_IDX(unsigned long id) +{ + return ( id & 0x0000ffff ); +} + +extern inline domid_t ID_TO_DOM(unsigned long id) +{ + return (id >> 16); +} + +void active_reqs_init(void); + +/* -------[ interposition -> character device interface ]------------- */ + +/* /dev/xen/blktap resides at device number major=10, minor=200 */ +#define BLKTAP_MINOR 202 + +/* size of the extra VMA area to map in attached pages. */ +#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 +#define BLKTAP_MODE_COPY_FE 0x00000004 +#define BLKTAP_MODE_COPY_BE 0x00000008 +#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 +#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + +#define BLKTAP_MODE_COPY_BOTH \ + (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) + +#define BLKTAP_MODE_COPY_BOTH_PAGES \ + (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERCEPT_BE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) || + ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || + ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || + ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) + ); +} + + + +/* -------[ Mappings to User VMA ]------------------------------------ */ +#define MAX_PENDING_REQS 64 +#define BATCH_PER_DOMAIN 16 +extern struct vm_area_struct *blktap_vma; + +/* The following are from blkback.c and should probably be put in a + * header and included from there. + * The mmap area described here is where attached data pages eill be mapped. + */ + +extern unsigned long mmap_vstart; +#define MMAP_PAGES_PER_REQUEST \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) +#define MMAP_PAGES \ + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + +/* immediately before the mmap area, we have a bunch of pages reserved + * for shared memory rings. + */ + +#define RING_PAGES 3 /* Ctrl, Front, and Back */ +extern unsigned long rings_vstart; + + +/* -------[ Here be globals ]----------------------------------------- */ +extern unsigned long blktap_mode; + +/* Connection to a single backend domain. */ +extern blkif_front_ring_t blktap_be_ring; +extern unsigned int blktap_be_evtchn; +extern unsigned int blktap_be_state; + +/* User ring status. */ +extern unsigned long blktap_ring_ok; + +/* -------[ ...and function prototypes. ]----------------------------- */ + +/* init function for character device interface. */ +int blktap_init(void); + +/* init function for the blkif cache. */ +void __init blkif_interface_init(void); +void __init blkdev_schedule_init(void); +void blkif_deschedule(blkif_t *blkif); + +/* interfaces to the char driver, passing messages to and from apps. */ +void blktap_kick_user(void); + +/* user ring access functions: */ +int blktap_write_fe_ring(blkif_request_t *req); +int blktap_write_be_ring(blkif_response_t *rsp); +int blktap_write_ctrl_ring(ctrl_msg_t *msg); + +/* fe/be ring access functions: */ +int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp); +int write_req_to_be_ring(blkif_request_t *req); + +/* event notification functions */ +void kick_fe_domain(blkif_t *blkif); +void kick_be_domain(void); + +/* Interrupt handlers. */ +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, + struct pt_regs *ptregs); +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs); + +/* Control message receiver. */ +extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id); + +/* debug */ +void print_vm_ring_idxs(void); + +#define __BLKINT_H__ +#endif diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c new file mode 100644 index 0000000000..9ce74c7442 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c @@ -0,0 +1,517 @@ +/****************************************************************************** + * blktap_controlmsg.c + * + * XenLinux virtual block-device tap. + * Control interfaces to the frontend and backend drivers. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include "blktap.h" + +static char *blkif_state_name[] = { + [BLKIF_STATE_CLOSED] = "closed", + [BLKIF_STATE_DISCONNECTED] = "disconnected", + [BLKIF_STATE_CONNECTED] = "connected", +}; + +static char * blkif_status_name[] = { + [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", + [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", + [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", + [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", +}; + +static unsigned blktap_be_irq; +unsigned int blktap_be_state = BLKIF_STATE_CLOSED; +unsigned int blktap_be_evtchn; + +/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ + +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) + +static kmem_cache_t *blkif_cachep; +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) + blkif = blkif->hash_next; + return blkif; +} + +static void __blkif_disconnect_complete(void *arg) +{ + blkif_t *blkif = (blkif_t *)arg; + ctrl_msg_t cmsg; + blkif_be_disconnect_t disc; + + /* + * These can't be done in blkif_disconnect() because at that point there + * may be outstanding requests at the disc whose asynchronous responses + * must still be notified to the remote driver. + */ + unbind_evtchn_from_irq(blkif->evtchn); + vfree(blkif->blk_ring.sring); + + /* Construct the deferred response message. */ + cmsg.type = CMSG_BLKIF_BE; + cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; + cmsg.id = blkif->disconnect_rspid; + cmsg.length = sizeof(blkif_be_disconnect_t); + disc.domid = blkif->domid; + disc.blkif_handle = blkif->handle; + disc.status = BLKIF_BE_STATUS_OKAY; + memcpy(cmsg.msg, &disc, sizeof(disc)); + + /* + * Make sure message is constructed /before/ status change, because + * after the status change the 'blkif' structure could be deallocated at + * any time. Also make sure we send the response /after/ status change, + * as otherwise a subsequent CONNECT request could spuriously fail if + * another CPU doesn't see the status change yet. + */ + mb(); + if ( blkif->status != DISCONNECTING ) + BUG(); + blkif->status = DISCONNECTED; + mb(); + + /* Send the successful response. */ + ctrl_if_send_response(&cmsg); +} + +void blkif_disconnect_complete(blkif_t *blkif) +{ + INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); + schedule_work(&blkif->work); +} + +void blkif_ptfe_create(blkif_be_create_t *create) +{ + blkif_t *blkif, **pblkif; + domid_t domid = create->domid; + unsigned int handle = create->blkif_handle; + + + /* May want to store info on the connecting domain here. */ + + DPRINTK("PT got BE_CREATE\n"); + + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) + { + DPRINTK("Could not create blkif: out of memory\n"); + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + /* blkif struct init code from blkback.c */ + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->handle = handle; + blkif->status = DISCONNECTED; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 0); + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif != NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + { + DPRINTK("Could not create blkif: already exists\n"); + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; + kmem_cache_free(blkif_cachep, blkif); + return; + } + pblkif = &(*pblkif)->hash_next; + } + + blkif->hash_next = *pblkif; + *pblkif = blkif; + + create->status = BLKIF_BE_STATUS_OKAY; +} + + +void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) +{ + /* Clear anything that we initialized above. */ + + domid_t domid = destroy->domid; + unsigned int handle = destroy->blkif_handle; + blkif_t **pblkif, *blkif; + + DPRINTK("PT got BE_DESTROY\n"); + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif = *pblkif) != NULL ) + { + if ( (blkif->domid == domid) && (blkif->handle == handle) ) + { + if ( blkif->status != DISCONNECTED ) + goto still_connected; + goto destroy; + } + pblkif = &blkif->hash_next; + } + + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pblkif = blkif->hash_next; + kmem_cache_free(blkif_cachep, blkif); + destroy->status = BLKIF_BE_STATUS_OKAY; +} + +void blkif_ptfe_connect(blkif_be_connect_t *connect) +{ + domid_t domid = connect->domid; + unsigned int handle = connect->blkif_handle; + unsigned int evtchn = connect->evtchn; + unsigned long shmem_frame = connect->shmem_frame; + struct vm_struct *vma; + pgprot_t prot; + int error; + blkif_t *blkif; + blkif_sring_t *sring; + + DPRINTK("PT got BE_CONNECT\n"); + + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", + connect->domid, connect->blkif_handle); + connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame<status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) { + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; + WPRINTK("BE_CONNECT: MAPPING error!\n"); + } + else + connect->status = BLKIF_BE_STATUS_ERROR; + vfree(vma->addr); + return; + } + + if ( blkif->status != DISCONNECTED ) + { + connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + vfree(vma->addr); + return; + } + + sring = (blkif_sring_t *)vma->addr; + SHARED_RING_INIT(sring); + BACK_RING_INIT(&blkif->blk_ring, sring); + + blkif->evtchn = evtchn; + blkif->irq = bind_evtchn_to_irq(evtchn); + blkif->shmem_frame = shmem_frame; + blkif->status = CONNECTED; + blkif_get(blkif); + + request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); + + connect->status = BLKIF_BE_STATUS_OKAY; +} + +int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) +{ + domid_t domid = disconnect->domid; + unsigned int handle = disconnect->blkif_handle; + blkif_t *blkif; + + DPRINTK("PT got BE_DISCONNECT\n"); + + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_disconnect attempted for non-existent blkif" + " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); + disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return 1; /* Caller will send response error message. */ + } + + if ( blkif->status == CONNECTED ) + { + blkif->status = DISCONNECTING; + blkif->disconnect_rspid = rsp_id; + wmb(); /* Let other CPUs see the status change. */ + free_irq(blkif->irq, blkif); + blkif_deschedule(blkif); + blkif_put(blkif); + return 0; /* Caller should not send response message. */ + } + + disconnect->status = BLKIF_BE_STATUS_OKAY; + return 1; +} + +/*-----[ Control Messages to/from Backend VM ]----------------------------*/ + +/* Tell the controller to bring up the interface. */ +static void blkif_ptbe_send_interface_connect(void) +{ + ctrl_msg_t cmsg = { + .type = CMSG_BLKIF_FE, + .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, + .length = sizeof(blkif_fe_interface_connect_t), + }; + blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; + msg->handle = 0; + msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT; + + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} + +static void blkif_ptbe_close(void) +{ +} + +/* Move from CLOSED to DISCONNECTED state. */ +static void blkif_ptbe_disconnect(void) +{ + blkif_sring_t *sring; + + sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&blktap_be_ring, sring); + blktap_be_state = BLKIF_STATE_DISCONNECTED; + DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); + blkif_ptbe_send_interface_connect(); +} + +static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) +{ + int err = 0; + + blktap_be_evtchn = status->evtchn; + blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn); + + err = request_irq(blktap_be_irq, blkif_ptbe_int, + SA_SAMPLE_RANDOM, "blkif", NULL); + if ( err ) { + WPRINTK("blkfront request_irq failed (%d)\n", err); + return; + } else { + /* transtion to connected in case we need to do a + a partion probe on a whole disk */ + blktap_be_state = BLKIF_STATE_CONNECTED; + } +} + +static void unexpected(blkif_fe_interface_status_t *status) +{ + WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", + blkif_status_name[status->status], + blkif_state_name[blktap_be_state]); +} + +static void blkif_ptbe_status( + blkif_fe_interface_status_t *status) +{ + if ( status->handle != 0 ) + { + DPRINTK("Status change on unsupported blkif %d\n", + status->handle); + return; + } + + DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]); + + switch ( status->status ) + { + case BLKIF_INTERFACE_STATUS_CLOSED: + switch ( blktap_be_state ) + { + case BLKIF_STATE_CLOSED: + unexpected(status); + break; + case BLKIF_STATE_DISCONNECTED: + case BLKIF_STATE_CONNECTED: + unexpected(status); + blkif_ptbe_close(); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_DISCONNECTED: + switch ( blktap_be_state ) + { + case BLKIF_STATE_CLOSED: + blkif_ptbe_disconnect(); + break; + case BLKIF_STATE_DISCONNECTED: + case BLKIF_STATE_CONNECTED: + printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n"); + unexpected(status); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_CONNECTED: + switch ( blktap_be_state ) + { + case BLKIF_STATE_CLOSED: + unexpected(status); + blkif_ptbe_disconnect(); + blkif_ptbe_connect(status); + break; + case BLKIF_STATE_DISCONNECTED: + blkif_ptbe_connect(status); + break; + case BLKIF_STATE_CONNECTED: + unexpected(status); + blkif_ptbe_connect(status); + break; + } + break; + + case BLKIF_INTERFACE_STATUS_CHANGED: + switch ( blktap_be_state ) + { + case BLKIF_STATE_CLOSED: + case BLKIF_STATE_DISCONNECTED: + unexpected(status); + break; + case BLKIF_STATE_CONNECTED: + /* vbd_update(); */ + /* tap doesn't really get state changes... */ + unexpected(status); + break; + } + break; + + default: + DPRINTK("Status change to unknown value %d\n", status->status); + break; + } +} + +/*-----[ All control messages enter here: ]-------------------------------*/ + +void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->type ) + { + case CMSG_BLKIF_FE: + + switch ( msg->subtype ) + { + case CMSG_BLKIF_FE_INTERFACE_STATUS: + if ( msg->length != sizeof(blkif_fe_interface_status_t) ) + goto parse_error; + blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]); + break; + + default: + goto parse_error; + } + + case CMSG_BLKIF_BE: + + /* send a copy of the message to user if wanted */ + + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + blktap_write_ctrl_ring(msg); + } + + switch ( msg->subtype ) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_CONNECT: + if ( msg->length != sizeof(blkif_be_connect_t) ) + goto parse_error; + blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]); + break; + case CMSG_BLKIF_BE_DISCONNECT: + if ( msg->length != sizeof(blkif_be_disconnect_t) ) + goto parse_error; + if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0], + msg->id) ) + return; + break; + + /* We just ignore anything to do with vbds for now. */ + + case CMSG_BLKIF_BE_VBD_CREATE: + DPRINTK("PT got VBD_CREATE\n"); + ((blkif_be_vbd_create_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_DESTROY: + DPRINTK("PT got VBD_DESTROY\n"); + ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_GROW: + DPRINTK("PT got VBD_GROW\n"); + ((blkif_be_vbd_grow_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + case CMSG_BLKIF_BE_VBD_SHRINK: + DPRINTK("PT got VBD_SHRINK\n"); + ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status + = BLKIF_BE_STATUS_OKAY; + break; + default: + goto parse_error; + } + } + + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} + +/*-----[ Initialization ]-------------------------------------------------*/ + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); + memset(blkif_hash, 0, sizeof(blkif_hash)); + + blktap_be_ring.sring = NULL; +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c new file mode 100644 index 0000000000..e88c5629a6 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c @@ -0,0 +1,472 @@ +/****************************************************************************** + * blktap_datapath.c + * + * XenLinux virtual block-device tap. + * Block request routing data path. + * + * Copyright (c) 2004, Andrew Warfield + * -- see full header in blktap.c + */ + +#include "blktap.h" +#include + +/*-----[ The data paths ]-------------------------------------------------*/ + +/* Connection to a single backend domain. */ +blkif_front_ring_t blktap_be_ring; + +/*-----[ Tracking active requests ]---------------------------------------*/ + +/* this must be the same as MAX_PENDING_REQS in blkback.c */ +#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) + +active_req_t active_reqs[MAX_ACTIVE_REQS]; +ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; +spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; +ACTIVE_RING_IDX active_prod, active_cons; +#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) +#define ACTIVE_IDX(_ar) (_ar - active_reqs) +#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) + +inline active_req_t *get_active_req(void) +{ + ACTIVE_RING_IDX idx; + active_req_t *ar; + unsigned long flags; + + ASSERT(active_cons != active_prod); + + spin_lock_irqsave(&active_req_lock, flags); + idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; + ar = &active_reqs[idx]; + spin_unlock_irqrestore(&active_req_lock, flags); + + return ar; +} + +inline void free_active_req(active_req_t *ar) +{ + unsigned long flags; + + spin_lock_irqsave(&active_req_lock, flags); + active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); + spin_unlock_irqrestore(&active_req_lock, flags); +} + +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) +{ + return &active_reqs[idx]; +} + +void active_reqs_init(void) +{ + ACTIVE_RING_IDX i; + + active_cons = 0; + active_prod = MAX_ACTIVE_REQS; + memset(active_reqs, 0, sizeof(active_reqs)); + for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) + active_req_ring[i] = i; +} + +/* Requests passing through the tap to the backend hijack the id field + * in the request message. In it we put the AR index _AND_ the fe domid. + * the domid is used by the backend to map the pages properly. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) +{ + return ( (fe_dom << 16) | idx ); +} + +/*-----[ Ring helpers ]---------------------------------------------------*/ + +inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) +{ + blkif_response_t *resp_d; + active_req_t *ar; + + ar = &active_reqs[ID_TO_IDX(rsp->id)]; + rsp->id = ar->id; + + resp_d = RING_GET_RESPONSE(&blkif->blk_ring, + blkif->blk_ring.rsp_prod_pvt); + memcpy(resp_d, rsp, sizeof(blkif_response_t)); + wmb(); + blkif->blk_ring.rsp_prod_pvt++; + + blkif_put(ar->blkif); + free_active_req(ar); + + return 0; +} + +inline int write_req_to_be_ring(blkif_request_t *req) +{ + blkif_request_t *req_d; + + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { + WPRINTK("Tap trying to access an unconnected backend!\n"); + return 0; + } + + req_d = RING_GET_REQUEST(&blktap_be_ring, + blktap_be_ring.req_prod_pvt); + memcpy(req_d, req, sizeof(blkif_request_t)); + wmb(); + blktap_be_ring.req_prod_pvt++; + + return 0; +} + +void kick_fe_domain(blkif_t *blkif) +{ + RING_PUSH_RESPONSES(&blkif->blk_ring); + notify_via_evtchn(blkif->evtchn); + DPRINTK("notified FE(dom %u)\n", blkif->domid); + +} + +void kick_be_domain(void) +{ + if ( blktap_be_state != BLKIF_STATE_CONNECTED ) + return; + + wmb(); /* Ensure that the frontend can see the requests. */ + RING_PUSH_REQUESTS(&blktap_be_ring); + notify_via_evtchn(blktap_be_evtchn); + DPRINTK("notified BE\n"); +} + +/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ + +/*-----[ Scheduler list maint -from blkback ]--- */ + +static struct list_head blkio_schedule_list; +static spinlock_t blkio_schedule_list_lock; + +static int __on_blkdev_list(blkif_t *blkif) +{ + return blkif->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(blkif_t *blkif) +{ + unsigned long flags; + if ( !__on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = NULL; + blkif_put(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + +static void add_to_blkdev_list_tail(blkif_t *blkif) +{ + unsigned long flags; + if ( __on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) + { + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); + blkif_get(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + + +/*-----[ Scheduler functions - from blkback ]--- */ + +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); + +static int do_block_io_op(blkif_t *blkif, int max_to_do); + +static int blkio_schedule(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + blkif_t *blkif; + struct list_head *ent; + + daemonize( + "xentapd" + ); + + for ( ; ; ) + { + /* Wait for work to do. */ + add_wait_queue(&blkio_schedule_wait, &wq); + set_current_state(TASK_INTERRUPTIBLE); + if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || + list_empty(&blkio_schedule_list) ) + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&blkio_schedule_wait, &wq); + + /* Queue up a batch of requests. */ + while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && + !list_empty(&blkio_schedule_list) ) + { + ent = blkio_schedule_list.next; + blkif = list_entry(ent, blkif_t, blkdev_list); + blkif_get(blkif); + remove_from_blkdev_list(blkif); + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(blkif); + blkif_put(blkif); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + /* Push the batch through to disc. */ + run_task_queue(&tq_disk); +#endif + } +} + +static void maybe_trigger_blkio_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/ + !list_empty(&blkio_schedule_list) ) + wake_up(&blkio_schedule_wait); +} + +void blkif_deschedule(blkif_t *blkif) +{ + remove_from_blkdev_list(blkif); +} + +void __init blkdev_schedule_init(void) +{ + spin_lock_init(&blkio_schedule_list_lock); + INIT_LIST_HEAD(&blkio_schedule_list); + + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) + BUG(); +} + +/*-----[ Interrupt entry from a frontend ]------ */ + +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_t *blkif = dev_id; + + add_to_blkdev_list_tail(blkif); + maybe_trigger_blkio_schedule(); + return IRQ_HANDLED; +} + +/*-----[ Other Frontend Ring functions ]-------- */ + +/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ +static int do_block_io_op(blkif_t *blkif, int max_to_do) +{ + /* we have pending messages from the real frontend. */ + + blkif_request_t *req_s; + RING_IDX i, rp; + unsigned long flags; + active_req_t *ar; + int more_to_do = 0; + int notify_be = 0, notify_user = 0; + + DPRINTK("PT got FE interrupt.\n"); + + if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1; + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + rp = blkif->blk_ring.sring->req_prod; + rmb(); + + for ( i = blkif->blk_ring.req_cons; + (i != rp) && + !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i); + i++ ) + { + + if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) + { + more_to_do = 1; + break; + } + + req_s = RING_GET_REQUEST(&blkif->blk_ring, i); + /* This is a new request: + * Assign an active request record, and remap the id. + */ + ar = get_active_req(); + ar->id = req_s->id; + ar->nr_pages = req_s->nr_segments; + blkif_get(blkif); + ar->blkif = blkif; + req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); + /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ + + /* FE -> BE interposition point is here. */ + + /* ------------------------------------------------------------- */ + /* BLKIF_OP_PROBE_HACK: */ + /* Signal to the backend that we are a tap domain. */ + + if (req_s->operation == BLKIF_OP_PROBE) { + DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); + req_s->frame_and_sects[1] = BLKTAP_COOKIE; + } + + /* ------------------------------------------------------------- */ + + /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_COPY_FE) ) { + + /* Copy the response message to UFERing */ + /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ + /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ + + DPRINTK("req->UFERing\n"); + blktap_write_fe_ring(req_s); + notify_user = 1; + } + + /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { + + /* be included to prevent noise from the fe when its off */ + /* copy the request message to the BERing */ + + DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", + (unsigned)i & (RING_SIZE(&blktap_be_ring)-1), + (unsigned)blktap_be_ring.req_prod_pvt & + (RING_SIZE((&blktap_be_ring)-1))); + + write_req_to_be_ring(req_s); + notify_be = 1; + } + } + + blkif->blk_ring.req_cons = i; + + /* unlock rings */ + spin_unlock_irqrestore(&blkif_io_lock, flags); + + if (notify_user) + blktap_kick_user(); + if (notify_be) + kick_be_domain(); + + return more_to_do; +} + +/*-----[ Data to/from Backend (server) VM ]------------------------------*/ + + +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, + struct pt_regs *ptregs) +{ + blkif_response_t *resp_s; + blkif_t *blkif; + RING_IDX rp, i; + unsigned long flags; + + DPRINTK("PT got BE interrupt.\n"); + + /* lock both rings */ + spin_lock_irqsave(&blkif_io_lock, flags); + + rp = blktap_be_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_be_ring.rsp_cons; i != rp; i++) + { + resp_s = RING_GET_RESPONSE(&blktap_be_ring, i); + + /* BE -> FE interposition point is here. */ + + blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; + + /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ + if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_COPY_BE) ) { + + /* Copy the response message to UBERing */ + /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ + /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ + + DPRINTK("rsp->UBERing\n"); + blktap_write_be_ring(resp_s); + blktap_kick_user(); + + } + + /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ + if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || + (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { + + /* (fe included to prevent random interference from the BE) */ + /* Copy the response message to FERing */ + + DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", + (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1), + (unsigned)blkif->blk_ring.rsp_prod_pvt & + (RING_SIZE((&blkif->blk_ring)-1))); + + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); + + } + } + + blktap_be_ring.rsp_cons = i; + + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} + +/* Debug : print the current ring indices. */ + +void print_vm_ring_idxs(void) +{ + int i; + blkif_t *blkif; + + WPRINTK("FE Rings: \n---------\n"); + for ( i = 0; i < 50; i++) { + blkif = blkif_find_by_handle((domid_t)i, 0); + if (blkif != NULL) { + if (blkif->blk_ring.sring != NULL) { + WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", i, + blkif->blk_ring.req_cons, + blkif->blk_ring.rsp_prod_pvt, + blkif->blk_ring.sring->req_prod, + blkif->blk_ring.sring->rsp_prod); + } else { + WPRINTK("%2d: [no device channel yet]\n", i); + } + } + } + if (blktap_be_ring.sring != NULL) { + WPRINTK("BE Ring: \n--------\n"); + WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_be_ring.rsp_cons, + blktap_be_ring.req_prod_pvt, + blktap_be_ring.sring->req_prod, + blktap_be_ring.sring->rsp_prod); + } +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c new file mode 100644 index 0000000000..3cc307fddf --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c @@ -0,0 +1,489 @@ +/****************************************************************************** + * blktap_userdev.c + * + * XenLinux virtual block-device tap. + * Control interface between the driver and a character device. + * + * Copyright (c) 2004, Andrew Warfield + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for control ring. */ + +#include "blktap.h" + + +unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; + +/* Only one process may open /dev/xen/blktap at any time. */ +static unsigned long blktap_dev_inuse; +unsigned long blktap_ring_ok; /* make this ring->state */ + +/* for poll: */ +static wait_queue_head_t blktap_wait; + +/* Where things are inside the device mapping. */ +struct vm_area_struct *blktap_vma; +unsigned long mmap_vstart; +unsigned long rings_vstart; + +/* Rings up to user space. */ +static blkif_front_ring_t blktap_ufe_ring; +static blkif_back_ring_t blktap_ube_ring; +static ctrl_front_ring_t blktap_uctrl_ring; + +/* local prototypes */ +static int blktap_read_fe_ring(void); +static int blktap_read_be_ring(void); + +/* -------[ blktap vm ops ]------------------------------------------- */ + +static struct page *blktap_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + /* + * if the page has not been mapped in by the driver then generate + * a SIGBUS to the domain. + */ + + force_sig(SIGBUS, current); + + return 0; +} + +struct vm_operations_struct blktap_vm_ops = { + nopage: blktap_nopage, +}; + +/* -------[ blktap file ops ]----------------------------------------- */ + +static int blktap_open(struct inode *inode, struct file *filp) +{ + blkif_sring_t *sring; + ctrl_sring_t *csring; + + if ( test_and_set_bit(0, &blktap_dev_inuse) ) + return -EBUSY; + + printk(KERN_ALERT "blktap open.\n"); + + /* Allocate the ctrl ring. */ + csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); + if (csring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(csring)); + + SHARED_RING_INIT(csring); + FRONT_RING_INIT(&blktap_uctrl_ring, csring); + + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_free_ctrl; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&blktap_ufe_ring, sring); + + /* Allocate the be ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_free_fe; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + BACK_RING_INIT(&blktap_ube_ring, sring); + + DPRINTK(KERN_ALERT "blktap open.\n"); + + return 0; + + fail_free_ctrl: + free_page( (unsigned long) blktap_uctrl_ring.sring); + + fail_free_fe: + free_page( (unsigned long) blktap_ufe_ring.sring); + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + blktap_dev_inuse = 0; + blktap_ring_ok = 0; + + printk(KERN_ALERT "blktap closed.\n"); + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); + free_page((unsigned long) blktap_uctrl_ring.sring); + + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); + free_page((unsigned long) blktap_ufe_ring.sring); + + ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); + free_page((unsigned long) blktap_ube_ring.sring); + + return 0; +} + +/* Note on mmap: + * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio + * work to do direct page access from userspace, this ended up being a + * problem. The bigger issue seems to be that there is no way to map + * a foreign page in to user space and have the virtual address of that + * page map sanely down to a mfn. + * Removing the VM_IO flag results in a loop in get_user_pages, as + * pfn_valid() always fails on a foreign page. + */ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + + printk(KERN_ALERT "blktap mmap (%lx, %lx)\n", + vma->vm_start, vma->vm_end); + + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { + printk(KERN_INFO + "blktap: you _must_ map exactly %d pages!\n", + MMAP_PAGES + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); + + rings_vstart = vma->vm_start; + mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + + /* not sure if I really need to do this... */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start, + __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("ctrl_ring: remap_pfn_range failure!\n"); + } + + + DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, + __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("be_ring: remap_pfn_range failure!\n"); + } + + DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); + if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), + __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("fe_ring: remap_pfn_range failure!\n"); + } + + blktap_vma = vma; + blktap_ring_ok = 1; + + return 0; +} + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ + return blktap_read_fe_ring(); + + case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ + return blktap_read_be_ring(); + + case BLKTAP_IOCTL_SETMODE: + if (BLKTAP_MODE_VALID(arg)) { + blktap_mode = arg; + /* XXX: may need to flush rings here. */ + printk(KERN_INFO "blktap: set mode to %lx\n", arg); + return 0; + } + case BLKTAP_IOCTL_PRINT_IDXS: + { + print_vm_ring_idxs(); + WPRINTK("User Rings: \n-----------\n"); + WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ufe_ring.rsp_cons, + blktap_ufe_ring.req_prod_pvt, + blktap_ufe_ring.sring->req_prod, + blktap_ufe_ring.sring->rsp_prod); + WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ube_ring.req_cons, + blktap_ube_ring.rsp_prod_pvt, + blktap_ube_ring.sring->req_prod, + blktap_ube_ring.sring->rsp_prod); + + } + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &blktap_wait, wait); + + if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) || + RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || + RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { + + RING_PUSH_REQUESTS(&blktap_uctrl_ring); + RING_PUSH_REQUESTS(&blktap_ufe_ring); + RING_PUSH_RESPONSES(&blktap_ube_ring); + return POLLIN | POLLRDNORM; + } + + return 0; +} + +void blktap_kick_user(void) +{ + /* blktap_ring->req_prod = blktap_req_prod; */ + wake_up_interruptible(&blktap_wait); +} + +static struct file_operations blktap_fops = { + owner: THIS_MODULE, + poll: blktap_poll, + ioctl: blktap_ioctl, + open: blktap_open, + release: blktap_release, + mmap: blktap_mmap, +}; + +/*-----[ Data to/from user space ]----------------------------------------*/ + + +int blktap_write_fe_ring(blkif_request_t *req) +{ + blkif_request_t *target; + int error, i; + + /* + * This is called to pass a request from the real frontend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: ufe_ring not ready for a request!\n"); + return 0; + } + + if ( RING_FULL(&blktap_ufe_ring) ) { + DPRINTK("blktap: fe_ring is full, can't add.\n"); + return 0; + } + + target = RING_GET_REQUEST(&blktap_ufe_ring, + blktap_ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + + /* Attempt to map the foreign pages directly in to the application */ + for (i=0; inr_segments; i++) { + + error = direct_remap_area_pages(blktap_vma->vm_mm, + MMAP_VADDR(ID_TO_IDX(req->id), i), + target->frame_and_sects[i] & PAGE_MASK, + PAGE_SIZE, + blktap_vma->vm_page_prot, + ID_TO_DOM(req->id)); + if ( error != 0 ) { + printk(KERN_INFO "remapping attached page failed! (%d)\n", error); + /* the request is now dropped on the floor. */ + return 0; + } + } + + blktap_ufe_ring.req_prod_pvt++; + + return 0; +} + +int blktap_write_be_ring(blkif_response_t *rsp) +{ + blkif_response_t *target; + + /* + * This is called to pass a request from the real backend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + target = RING_GET_RESPONSE(&blktap_ube_ring, + blktap_ube_ring.rsp_prod_pvt); + memcpy(target, rsp, sizeof(*rsp)); + + /* no mapping -- pages were mapped in blktap_write_fe_ring() */ + + blktap_ube_ring.rsp_prod_pvt++; + + return 0; +} + +static void blktap_fast_flush_area(int idx, int nr_pages) +{ + multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; + int i; + + for ( i = 0; i < nr_pages; i++ ) + { + mcl[i].op = __HYPERVISOR_update_va_mapping; + mcl[i].args[0] = MMAP_VADDR(idx, i); + mcl[i].args[1] = 0; + mcl[i].args[2] = 0; + } + + mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; + if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) + BUG(); +} + +static int blktap_read_fe_ring(void) +{ + /* This is called to read responses from the UFE ring. */ + + RING_IDX i, rp; + blkif_response_t *resp_s; + blkif_t *blkif; + active_req_t *ar; + + DPRINTK("blktap_read_fe_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { + + /* for each outstanding message on the UFEring */ + rp = blktap_ufe_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) + { + resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i); + + DPRINTK("resp->fe_ring\n"); + ar = lookup_active_req(ID_TO_IDX(resp_s->id)); + blkif = ar->blkif; + blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); + } + + blktap_ufe_ring.rsp_cons = i; + } + return 0; +} + +static int blktap_read_be_ring(void) +{ + /* This is called to read requests from the UBE ring. */ + + RING_IDX i, rp; + blkif_request_t *req_s; + + DPRINTK("blktap_read_be_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { + + /* for each outstanding message on the UFEring */ + rp = blktap_ube_ring.sring->req_prod; + rmb(); + for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) + { + req_s = RING_GET_REQUEST(&blktap_ube_ring, i); + + DPRINTK("req->be_ring\n"); + write_req_to_be_ring(req_s); + kick_be_domain(); + } + + blktap_ube_ring.req_cons = i; + } + + return 0; +} + +int blktap_write_ctrl_ring(ctrl_msg_t *msg) +{ + ctrl_msg_t *target; + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + target = RING_GET_REQUEST(&blktap_uctrl_ring, + blktap_uctrl_ring.req_prod_pvt); + memcpy(target, msg, sizeof(*msg)); + + blktap_uctrl_ring.req_prod_pvt++; + + /* currently treat the ring as unidirectional. */ + blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; + + return 0; + +} + +/* -------[ blktap module setup ]------------------------------------- */ + +static struct miscdevice blktap_miscdev = { + .minor = BLKTAP_MINOR, + .name = "blktap", + .fops = &blktap_fops, + .devfs_name = "misc/blktap", +}; + +int blktap_init(void) +{ + int err; + + err = misc_register(&blktap_miscdev); + if ( err != 0 ) + { + printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); + return err; + } + + init_waitqueue_head(&blktap_wait); + + + return 0; +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c index 9c57aaf3a1..5967f52842 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c @@ -176,8 +176,9 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, { unsigned long m2pv = (unsigned long)machine_to_phys_mapping; pgd_t *pgd = pgd_offset_k(m2pv); - pmd_t *pmd = pmd_offset(pgd, m2pv); - unsigned long m2p_start_mfn = pmd_val_ma(*pmd) >> PAGE_SHIFT; + pud_t *pud = pud_offset(pgd, m2pv); + pmd_t *pmd = pmd_offset(pud, m2pv); + unsigned long m2p_start_mfn = pfn_to_mfn(pmd_val(*pmd) >> PAGE_SHIFT); ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0; } break; diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h new file mode 100644 index 0000000000..bcab2041bc --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h @@ -0,0 +1,85 @@ + +#ifndef __USBIF__BACKEND__COMMON_H__ +#define __USBIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if 0 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +typedef struct usbif_priv_st usbif_priv_t; + +struct usbif_priv_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned long shmem_frame; + unsigned int evtchn; + int irq; + /* Comms Information */ + usbif_back_ring_t usb_ring; + /* Private fields. */ + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + /* + * DISCONNECT response is deferred until pending requests are ack'ed. + * We therefore need to store the id from the original request. + */ + u8 disconnect_rspid; + usbif_priv_t *hash_next; + struct list_head usbif_list; + spinlock_t usb_ring_lock; + atomic_t refcnt; + + struct work_struct work; +}; + +void usbif_create(usbif_be_create_t *create); +void usbif_destroy(usbif_be_destroy_t *destroy); +void usbif_connect(usbif_be_connect_t *connect); +int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id); +void usbif_disconnect_complete(usbif_priv_t *up); + +void usbif_release_port(usbif_be_release_port_t *msg); +int usbif_claim_port(usbif_be_claim_port_t *msg); +void usbif_release_ports(usbif_priv_t *up); + +usbif_priv_t *usbif_find(domid_t domid); +#define usbif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define usbif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + usbif_disconnect_complete(_b); \ + } while (0) + + +void usbif_interface_init(void); +void usbif_ctrlif_init(void); + +void usbif_deschedule(usbif_priv_t *up); +void remove_from_usbif_list(usbif_priv_t *up); + +irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs); + +#endif /* __USBIF__BACKEND__COMMON_H__ */ diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c new file mode 100644 index 0000000000..899394a629 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c @@ -0,0 +1,77 @@ +/****************************************************************************** + * arch/xen/drivers/usbif/backend/control.c + * + * Routines for interfacing with the control plane. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" + +static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype); + + switch ( msg->subtype ) + { + case CMSG_USBIF_BE_CREATE: + if ( msg->length != sizeof(usbif_be_create_t) ) + goto parse_error; + usbif_create((usbif_be_create_t *)&msg->msg[0]); + break; + case CMSG_USBIF_BE_DESTROY: + if ( msg->length != sizeof(usbif_be_destroy_t) ) + goto parse_error; + usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]); + break; + case CMSG_USBIF_BE_CONNECT: + if ( msg->length != sizeof(usbif_be_connect_t) ) + goto parse_error; + usbif_connect((usbif_be_connect_t *)&msg->msg[0]); + break; + case CMSG_USBIF_BE_DISCONNECT: + if ( msg->length != sizeof(usbif_be_disconnect_t) ) + goto parse_error; + if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) ) + return; /* Sending the response is deferred until later. */ + break; + case CMSG_USBIF_BE_CLAIM_PORT: + if ( msg->length != sizeof(usbif_be_claim_port_t) ) + goto parse_error; + usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]); + break; + case CMSG_USBIF_BE_RELEASE_PORT: + if ( msg->length != sizeof(usbif_be_release_port_t) ) + goto parse_error; + usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]); + break; + default: + goto parse_error; + } + + ctrl_if_send_response(msg); + return; + + parse_error: + DPRINTK("Parse error while reading message subtype %d, len %d\n", + msg->subtype, msg->length); + msg->length = 0; + ctrl_if_send_response(msg); +} + +void usbif_ctrlif_init(void) +{ + ctrl_msg_t cmsg; + usbif_be_driver_status_changed_t st; + + (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_USBIF_BE; + cmsg.subtype = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED; + cmsg.length = sizeof(usbif_be_driver_status_changed_t); + st.status = USBIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c new file mode 100644 index 0000000000..4630da8951 --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c @@ -0,0 +1,252 @@ +/****************************************************************************** + * arch/xen/drivers/usbif/backend/interface.c + * + * USB device interface management. + * + * by Mark Williamson, Copyright (c) 2004 + */ + + +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" + +#define USBIF_HASHSZ 1024 +#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1)) + +static kmem_cache_t *usbif_priv_cachep; +static usbif_priv_t *usbif_priv_hash[USBIF_HASHSZ]; + +usbif_priv_t *usbif_find(domid_t domid) +{ + usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)]; + while ( (up != NULL ) && ( up->domid != domid ) ) + up = up->hash_next; + return up; +} + +static void __usbif_disconnect_complete(void *arg) +{ + usbif_priv_t *usbif = (usbif_priv_t *)arg; + ctrl_msg_t cmsg; + usbif_be_disconnect_t disc; + + /* + * These can't be done in usbif_disconnect() because at that point there + * may be outstanding requests at the device whose asynchronous responses + * must still be notified to the remote driver. + */ + unbind_evtchn_from_irq(usbif->evtchn); + vfree(usbif->usb_ring.sring); + + /* Construct the deferred response message. */ + cmsg.type = CMSG_USBIF_BE; + cmsg.subtype = CMSG_USBIF_BE_DISCONNECT; + cmsg.id = usbif->disconnect_rspid; + cmsg.length = sizeof(usbif_be_disconnect_t); + disc.domid = usbif->domid; + disc.status = USBIF_BE_STATUS_OKAY; + memcpy(cmsg.msg, &disc, sizeof(disc)); + + /* + * Make sure message is constructed /before/ status change, because + * after the status change the 'usbif' structure could be deallocated at + * any time. Also make sure we send the response /after/ status change, + * as otherwise a subsequent CONNECT request could spuriously fail if + * another CPU doesn't see the status change yet. + */ + mb(); + if ( usbif->status != DISCONNECTING ) + BUG(); + usbif->status = DISCONNECTED; + mb(); + + /* Send the successful response. */ + ctrl_if_send_response(&cmsg); +} + +void usbif_disconnect_complete(usbif_priv_t *up) +{ + INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up); + schedule_work(&up->work); +} + +void usbif_create(usbif_be_create_t *create) +{ + domid_t domid = create->domid; + usbif_priv_t **pup, *up; + + if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL ) + { + DPRINTK("Could not create usbif: out of memory\n"); + create->status = USBIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + memset(up, 0, sizeof(*up)); + up->domid = domid; + up->status = DISCONNECTED; + spin_lock_init(&up->usb_ring_lock); + atomic_set(&up->refcnt, 0); + + pup = &usbif_priv_hash[USBIF_HASH(domid)]; + while ( *pup != NULL ) + { + if ( (*pup)->domid == domid ) + { + create->status = USBIF_BE_STATUS_INTERFACE_EXISTS; + kmem_cache_free(usbif_priv_cachep, up); + return; + } + pup = &(*pup)->hash_next; + } + + up->hash_next = *pup; + *pup = up; + + create->status = USBIF_BE_STATUS_OKAY; +} + +void usbif_destroy(usbif_be_destroy_t *destroy) +{ + domid_t domid = destroy->domid; + usbif_priv_t **pup, *up; + + pup = &usbif_priv_hash[USBIF_HASH(domid)]; + while ( (up = *pup) != NULL ) + { + if ( up->domid == domid ) + { + if ( up->status != DISCONNECTED ) + goto still_connected; + goto destroy; + } + pup = &up->hash_next; + } + + destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pup = up->hash_next; + usbif_release_ports(up); + kmem_cache_free(usbif_priv_cachep, up); + destroy->status = USBIF_BE_STATUS_OKAY; +} + +void usbif_connect(usbif_be_connect_t *connect) +{ + domid_t domid = connect->domid; + unsigned int evtchn = connect->evtchn; + unsigned long shmem_frame = connect->shmem_frame; + struct vm_struct *vma; + pgprot_t prot; + int error; + usbif_priv_t *up; + usbif_sring_t *sring; + + up = usbif_find(domid); + if ( unlikely(up == NULL) ) + { + DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n", + connect->domid); + connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame<status = USBIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) + connect->status = USBIF_BE_STATUS_MAPPING_ERROR; + else + connect->status = USBIF_BE_STATUS_ERROR; + vfree(vma->addr); + return; + } + + if ( up->status != DISCONNECTED ) + { + connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED; + vfree(vma->addr); + return; + } + + sring = (usbif_sring_t *)vma->addr; + SHARED_RING_INIT(sring); + BACK_RING_INIT(&up->usb_ring, sring); + + up->evtchn = evtchn; + up->irq = bind_evtchn_to_irq(evtchn); + up->shmem_frame = shmem_frame; + up->status = CONNECTED; + usbif_get(up); + + request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up); + + connect->status = USBIF_BE_STATUS_OKAY; +} + +/* Remove URBs for this interface before destroying it. */ +void usbif_deschedule(usbif_priv_t *up) +{ + remove_from_usbif_list(up); +} + +int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id) +{ + domid_t domid = disconnect->domid; + usbif_priv_t *up; + + up = usbif_find(domid); + if ( unlikely(up == NULL) ) + { + DPRINTK("usbif_disconnect attempted for non-existent usbif" + " (%u)\n", disconnect->domid); + disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND; + return 1; /* Caller will send response error message. */ + } + + if ( up->status == CONNECTED ) + { + up->status = DISCONNECTING; + up->disconnect_rspid = rsp_id; + wmb(); /* Let other CPUs see the status change. */ + free_irq(up->irq, up); + usbif_deschedule(up); + usbif_put(up); + return 0; /* Caller should not send response message. */ + } + + disconnect->status = USBIF_BE_STATUS_OKAY; + return 1; +} + +void __init usbif_interface_init(void) +{ + usbif_priv_cachep = kmem_cache_create("usbif_priv_cache", + sizeof(usbif_priv_t), + 0, 0, NULL, NULL); + memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash)); +} diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c new file mode 100644 index 0000000000..b039b4506b --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c @@ -0,0 +1,1070 @@ +/****************************************************************************** + * arch/xen/drivers/usbif/backend/main.c + * + * Backend for the Xen virtual USB driver - provides an abstraction of a + * USB host controller to the corresponding frontend driver. + * + * by Mark Williamson + * Copyright (c) 2004 Intel Research Cambridge + * Copyright (c) 2004, 2005 Mark Williamson + * + * Based on arch/xen/drivers/blkif/backend/main.c + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + */ + +#include "common.h" + + +#include +#include +#include +#include +#include + +/* + * This is rather arbitrary. + */ +#define MAX_PENDING_REQS 4 +#define BATCH_PER_DOMAIN 1 + +static unsigned long mmap_vstart; + +/* Needs to be sufficiently large that we can map the (large) buffers + * the USB mass storage driver wants. */ +#define MMAP_PAGES_PER_REQUEST \ + (128) +#define MMAP_PAGES \ + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) + +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + + +static spinlock_t owned_ports_lock; +LIST_HEAD(owned_ports); + +/* A list of these structures is used to track ownership of physical USB + * ports. */ +typedef struct +{ + usbif_priv_t *usbif_priv; + char path[16]; + int guest_port; + int enabled; + struct list_head list; + unsigned long guest_address; /* The USB device address that has been + * assigned by the guest. */ + int dev_present; /* Is there a device present? */ + struct usb_device * dev; + unsigned long ifaces; /* What interfaces are present on this device? */ +} owned_port_t; + + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. The request is complete, the specified + * domain has a response queued for it, with the saved 'id' passed back. + */ +typedef struct { + usbif_priv_t *usbif_priv; + unsigned long id; + int nr_pages; + unsigned short operation; + int status; +} pending_req_t; + +/* + * We can't allocate pending_req's in order, since they may complete out of + * order. We therefore maintain an allocation ring. This ring also indicates + * when enough work has been passed down -- at that point the allocation ring + * will be empty. + */ +static pending_req_t pending_reqs[MAX_PENDING_REQS]; +static unsigned char pending_ring[MAX_PENDING_REQS]; +static spinlock_t pend_prod_lock; + +/* NB. We use a different index type to differentiate from shared usb rings. */ +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do); +static void make_response(usbif_priv_t *usbif, unsigned long id, + unsigned short op, int st, int inband, + unsigned long actual_length); +static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port); +static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req); +static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid); +static owned_port_t *usbif_find_port(char *); + +/****************************************************************** + * PRIVATE DEBUG FUNCTIONS + */ + +#undef DEBUG +#ifdef DEBUG + +static void dump_port(owned_port_t *p) +{ + printk(KERN_DEBUG "owned_port_t @ %p\n" + " usbif_priv @ %p\n" + " path: %s\n" + " guest_port: %d\n" + " guest_address: %ld\n" + " dev_present: %d\n" + " dev @ %p\n" + " ifaces: 0x%lx\n", + p, p->usbif_priv, p->path, p->guest_port, p->guest_address, + p->dev_present, p->dev, p->ifaces); +} + + +static void dump_request(usbif_request_t *req) +{ + printk(KERN_DEBUG "id = 0x%lx\n" + "devnum %d\n" + "endpoint 0x%x\n" + "direction %d\n" + "speed %d\n" + "pipe_type 0x%x\n" + "transfer_buffer 0x%lx\n" + "length 0x%lx\n" + "transfer_flags 0x%lx\n" + "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n" + "iso_schedule = 0x%lx\n" + "num_iso %ld\n", + req->id, req->devnum, req->endpoint, req->direction, req->speed, + req->pipe_type, req->transfer_buffer, req->length, + req->transfer_flags, req->setup[0], req->setup[1], req->setup[2], + req->setup[3], req->setup[4], req->setup[5], req->setup[6], + req->setup[7], req->iso_schedule, req->num_iso); +} + +static void dump_urb(struct urb *urb) +{ + printk(KERN_DEBUG "dumping urb @ %p\n", urb); + +#define DUMP_URB_FIELD(name, format) \ + printk(KERN_DEBUG " " # name " " format "\n", urb-> name) + + DUMP_URB_FIELD(pipe, "0x%x"); + DUMP_URB_FIELD(status, "%d"); + DUMP_URB_FIELD(transfer_flags, "0x%x"); + DUMP_URB_FIELD(transfer_buffer, "%p"); + DUMP_URB_FIELD(transfer_buffer_length, "%d"); + DUMP_URB_FIELD(actual_length, "%d"); +} + +static void dump_response(usbif_response_t *resp) +{ + printk(KERN_DEBUG "usbback: Sending response:\n" + " id = 0x%x\n" + " op = %d\n" + " status = %d\n" + " data = %d\n" + " length = %d\n", + resp->id, resp->op, resp->status, resp->data, resp->length); +} + +#else /* DEBUG */ + +#define dump_port(blah) ((void)0) +#define dump_request(blah) ((void)0) +#define dump_urb(blah) ((void)0) +#define dump_response(blah) ((void)0) + +#endif /* DEBUG */ + +/****************************************************************** + * MEMORY MANAGEMENT + */ + +static void fast_flush_area(int idx, int nr_pages) +{ + multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; + int i; + + for ( i = 0; i < nr_pages; i++ ) + { + mcl[i].op = __HYPERVISOR_update_va_mapping; + mcl[i].args[0] = MMAP_VADDR(idx, i); + mcl[i].args[1] = 0; + mcl[i].args[2] = 0; + } + + mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB; + if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) + BUG(); +} + + +/****************************************************************** + * USB INTERFACE SCHEDULER LIST MAINTENANCE + */ + +static struct list_head usbio_schedule_list; +static spinlock_t usbio_schedule_list_lock; + +static int __on_usbif_list(usbif_priv_t *up) +{ + return up->usbif_list.next != NULL; +} + +void remove_from_usbif_list(usbif_priv_t *up) +{ + unsigned long flags; + if ( !__on_usbif_list(up) ) return; + spin_lock_irqsave(&usbio_schedule_list_lock, flags); + if ( __on_usbif_list(up) ) + { + list_del(&up->usbif_list); + up->usbif_list.next = NULL; + usbif_put(up); + } + spin_unlock_irqrestore(&usbio_schedule_list_lock, flags); +} + +static void add_to_usbif_list_tail(usbif_priv_t *up) +{ + unsigned long flags; + if ( __on_usbif_list(up) ) return; + spin_lock_irqsave(&usbio_schedule_list_lock, flags); + if ( !__on_usbif_list(up) && (up->status == CONNECTED) ) + { + list_add_tail(&up->usbif_list, &usbio_schedule_list); + usbif_get(up); + } + spin_unlock_irqrestore(&usbio_schedule_list_lock, flags); +} + +void free_pending(int pending_idx) +{ + unsigned long flags; + + /* Free the pending request. */ + spin_lock_irqsave(&pend_prod_lock, flags); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + spin_unlock_irqrestore(&pend_prod_lock, flags); +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called as urb->complete() + */ + +static void maybe_trigger_usbio_schedule(void); + +static void __end_usb_io_op(struct urb *purb) +{ + pending_req_t *pending_req; + int pending_idx; + + pending_req = purb->context; + + pending_idx = pending_req - pending_reqs; + + ASSERT(purb->actual_length <= purb->transfer_buffer_length); + ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE); + + /* An error fails the entire request. */ + if ( purb->status ) + { + printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status); + } + + if ( usb_pipetype(purb->pipe) == 0 ) + { + int i; + usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1); + + /* If we're dealing with an iso pipe, we need to copy back the schedule. */ + for ( i = 0; i < purb->number_of_packets; i++ ) + { + sched[i].length = purb->iso_frame_desc[i].actual_length; + ASSERT(sched[i].buffer_offset == + purb->iso_frame_desc[i].offset); + sched[i].status = purb->iso_frame_desc[i].status; + } + } + + fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages); + + kfree(purb->setup_packet); + + make_response(pending_req->usbif_priv, pending_req->id, + pending_req->operation, pending_req->status, 0, purb->actual_length); + usbif_put(pending_req->usbif_priv); + + usb_free_urb(purb); + + free_pending(pending_idx); + + rmb(); + + /* Check for anything still waiting in the rings, having freed a request... */ + maybe_trigger_usbio_schedule(); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait); + +static int usbio_schedule(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + usbif_priv_t *up; + struct list_head *ent; + + daemonize(); + + for ( ; ; ) + { + /* Wait for work to do. */ + add_wait_queue(&usbio_schedule_wait, &wq); + set_current_state(TASK_INTERRUPTIBLE); + if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || + list_empty(&usbio_schedule_list) ) + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&usbio_schedule_wait, &wq); + + /* Queue up a batch of requests. */ + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && + !list_empty(&usbio_schedule_list) ) + { + ent = usbio_schedule_list.next; + up = list_entry(ent, usbif_priv_t, usbif_list); + usbif_get(up); + remove_from_usbif_list(up); + if ( do_usb_io_op(up, BATCH_PER_DOMAIN) ) + add_to_usbif_list_tail(up); + usbif_put(up); + } + } +} + +static void maybe_trigger_usbio_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( !list_empty(&usbio_schedule_list) ) + wake_up(&usbio_schedule_wait); +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + usbif_priv_t *up = dev_id; + + smp_mb(); + + add_to_usbif_list_tail(up); + + /* Will in fact /always/ trigger an io schedule in this case. */ + maybe_trigger_usbio_schedule(); + + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the usb-device layer proper. + */ + +static int do_usb_io_op(usbif_priv_t *up, int max_to_do) +{ + usbif_back_ring_t *usb_ring = &up->usb_ring; + usbif_request_t *req; + RING_IDX i, rp; + int more_to_do = 0; + + rp = usb_ring->sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + /* Take items off the comms ring, taking care not to overflow. */ + for ( i = usb_ring->req_cons; + (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i); + i++ ) + { + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) + { + more_to_do = 1; + break; + } + + req = RING_GET_REQUEST(usb_ring, i); + + switch ( req->operation ) + { + case USBIF_OP_PROBE: + dispatch_usb_probe(up, req->id, req->port); + break; + + case USBIF_OP_IO: + /* Assemble an appropriate URB. */ + dispatch_usb_io(up, req); + break; + + case USBIF_OP_RESET: + dispatch_usb_reset(up, req->port); + break; + + default: + DPRINTK("error: unknown USB io operation [%d]\n", + req->operation); + make_response(up, req->id, req->operation, -EINVAL, 0, 0); + break; + } + } + + usb_ring->req_cons = i; + + return more_to_do; +} + +static owned_port_t *find_guest_port(usbif_priv_t *up, int port) +{ + unsigned long flags; + struct list_head *l; + + spin_lock_irqsave(&owned_ports_lock, flags); + list_for_each(l, &owned_ports) + { + owned_port_t *p = list_entry(l, owned_port_t, list); + if(p->usbif_priv == up && p->guest_port == port) + { + spin_unlock_irqrestore(&owned_ports_lock, flags); + return p; + } + } + spin_unlock_irqrestore(&owned_ports_lock, flags); + + return NULL; +} + +static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid) +{ + owned_port_t *port = find_guest_port(up, portid); + int ret = 0; + + + /* Allowing the guest to actually reset the device causes more problems + * than it's worth. We just fake it out in software but we will do a real + * reset when the interface is destroyed. */ + + dump_port(port); + + port->guest_address = 0; + /* If there's an attached device then the port is now enabled. */ + if ( port->dev_present ) + port->enabled = 1; + else + port->enabled = 0; + + make_response(up, 0, USBIF_OP_RESET, ret, 0, 0); +} + +static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid) +{ + owned_port_t *port = find_guest_port(up, portid); + int ret; + + if ( port != NULL ) + ret = port->dev_present; + else + { + ret = -EINVAL; + printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request " + "(port %ld)\n", portid); + } + + /* Probe result is sent back in-band. Probes don't have an associated id + * right now... */ + make_response(up, id, USBIF_OP_PROBE, ret, portid, 0); +} + +/** + * check_iso_schedule - safety check the isochronous schedule for an URB + * @purb : the URB in question + */ +static int check_iso_schedule(struct urb *purb) +{ + int i; + unsigned long total_length = 0; + + for ( i = 0; i < purb->number_of_packets; i++ ) + { + struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i]; + + if ( desc->offset >= purb->transfer_buffer_length + || ( desc->offset + desc->length) > purb->transfer_buffer_length ) + return -EINVAL; + + total_length += desc->length; + + if ( total_length > purb->transfer_buffer_length ) + return -EINVAL; + } + + return 0; +} + +owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req); + +static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req) +{ + unsigned long buffer_mach; + int i = 0, offset = 0, + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + pending_req_t *pending_req; + unsigned long remap_prot; + multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST]; + struct urb *purb = NULL; + owned_port_t *port; + unsigned char *setup; + + dump_request(req); + + if ( NR_PENDING_REQS == MAX_PENDING_REQS ) + { + printk(KERN_WARNING "usbback: Max requests already queued. " + "Giving up!\n"); + + return; + } + + port = find_port_for_request(up, req); + + if ( port == NULL ) + { + printk(KERN_WARNING "No such device! (%d)\n", req->devnum); + dump_request(req); + + make_response(up, req->id, req->operation, -ENODEV, 0, 0); + return; + } + else if ( !port->dev_present ) + { + /* In normal operation, we'll only get here if a device is unplugged + * and the frontend hasn't noticed yet. */ + make_response(up, req->id, req->operation, -ENODEV, 0, 0); + return; + } + + + setup = kmalloc(8, GFP_KERNEL); + + if ( setup == NULL ) + goto no_mem; + + /* Copy request out for safety. */ + memcpy(setup, req->setup, 8); + + if( setup[0] == 0x0 && setup[1] == 0x5) + { + /* To virtualise the USB address space, we need to intercept + * set_address messages and emulate. From the USB specification: + * bmRequestType = 0x0; + * Brequest = SET_ADDRESS (i.e. 0x5) + * wValue = device address + * wIndex = 0 + * wLength = 0 + * data = None + */ + /* Store into the guest transfer buffer using cpu_to_le16 */ + port->guest_address = le16_to_cpu(*(u16 *)(setup + 2)); + /* Make a successful response. That was easy! */ + + make_response(up, req->id, req->operation, 0, 0, 0); + + kfree(setup); + return; + } + else if ( setup[0] == 0x0 && setup[1] == 0x9 ) + { + /* The host kernel needs to know what device configuration is in use + * because various error checks get confused otherwise. We just do + * configuration settings here, under controlled conditions. + */ + + /* Ignore configuration setting and hope that the host kernel + did it right. */ + /* usb_set_configuration(port->dev, setup[2]); */ + + make_response(up, req->id, req->operation, 0, 0, 0); + + kfree(setup); + return; + } + else if ( setup[0] == 0x1 && setup[1] == 0xB ) + { + /* The host kernel needs to know what device interface is in use + * because various error checks get confused otherwise. We just do + * configuration settings here, under controlled conditions. + */ + usb_set_interface(port->dev, (setup[4] | setup[5] << 8), + (setup[2] | setup[3] << 8) ); + + make_response(up, req->id, req->operation, 0, 0, 0); + + kfree(setup); + return; + } + + if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK) + + req->length ) + > MMAP_PAGES_PER_REQUEST * PAGE_SIZE ) + { + printk(KERN_WARNING "usbback: request of %lu bytes too large\n", + req->length); + make_response(up, req->id, req->operation, -EINVAL, 0, 0); + kfree(setup); + return; + } + + buffer_mach = req->transfer_buffer; + + if( buffer_mach == 0 ) + goto no_remap; + + ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST); + ASSERT(buffer_mach); + + /* Always map writeable for now. */ + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; + + for ( i = 0, offset = 0; offset < req->length; + i++, offset += PAGE_SIZE ) + { + mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain; + mcl[i].args[0] = MMAP_VADDR(pending_idx, i); + mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot; + mcl[i].args[2] = 0; + mcl[i].args[3] = up->domid; + + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = + FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT); + + ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i)) + == buffer_mach + i << PAGE_SHIFT); + } + + if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */ + { + /* Map in ISO schedule, if necessary. */ + mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain; + mcl[i].args[0] = MMAP_VADDR(pending_idx, i); + mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot; + mcl[i].args[2] = 0; + mcl[i].args[3] = up->domid; + + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = + FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT); + + i++; + } + + if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) ) + BUG(); + + { + int j; + for ( j = 0; j < i; j++ ) + { + if ( unlikely(mcl[j].args[5] != 0) ) + { + printk(KERN_WARNING + "invalid buffer %d -- could not remap it\n", j); + fast_flush_area(pending_idx, i); + goto bad_descriptor; + } + } + } + + no_remap: + + ASSERT(i <= MMAP_PAGES_PER_REQUEST); + ASSERT(i * PAGE_SIZE >= req->length); + + /* We have to do this because some things might complete out of order. */ + pending_req = &pending_reqs[pending_idx]; + pending_req->usbif_priv= up; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->nr_pages = i; + + pending_cons++; + + usbif_get(up); + + /* Fill out an actual request for the USB layer. */ + purb = usb_alloc_urb(req->num_iso); + + if ( purb == NULL ) + { + usbif_put(up); + free_pending(pending_idx); + goto no_mem; + } + + purb->dev = port->dev; + purb->context = pending_req; + purb->transfer_buffer = + (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK)); + if(buffer_mach == 0) + purb->transfer_buffer = NULL; + purb->complete = __end_usb_io_op; + purb->transfer_buffer_length = req->length; + purb->transfer_flags = req->transfer_flags; + + purb->pipe = 0; + purb->pipe |= req->direction << 7; + purb->pipe |= port->dev->devnum << 8; + purb->pipe |= req->speed << 26; + purb->pipe |= req->pipe_type << 30; + purb->pipe |= req->endpoint << 15; + + purb->number_of_packets = req->num_iso; + + if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE ) + goto urb_error; + + /* Make sure there's always some kind of timeout. */ + purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000 + : 1000; + + purb->setup_packet = setup; + + if ( req->pipe_type == 0 ) /* ISO */ + { + int j; + usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1); + + /* If we're dealing with an iso pipe, we need to copy in a schedule. */ + for ( j = 0; j < purb->number_of_packets; j++ ) + { + purb->iso_frame_desc[j].length = iso_sched[j].length; + purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset; + iso_sched[j].status = 0; + } + } + + if ( check_iso_schedule(purb) != 0 ) + goto urb_error; + + if ( usb_submit_urb(purb) != 0 ) + goto urb_error; + + return; + + urb_error: + dump_urb(purb); + usbif_put(up); + free_pending(pending_idx); + + bad_descriptor: + kfree ( setup ); + if ( purb != NULL ) + usb_free_urb(purb); + make_response(up, req->id, req->operation, -EINVAL, 0, 0); + return; + + no_mem: + if ( setup != NULL ) + kfree(setup); + make_response(up, req->id, req->operation, -ENOMEM, 0, 0); + return; +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(usbif_priv_t *up, unsigned long id, + unsigned short op, int st, int inband, + unsigned long length) +{ + usbif_response_t *resp; + unsigned long flags; + usbif_back_ring_t *usb_ring = &up->usb_ring; + + /* Place on the response ring for the relevant domain. */ + spin_lock_irqsave(&up->usb_ring_lock, flags); + resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt); + resp->id = id; + resp->operation = op; + resp->status = st; + resp->data = inband; + resp->length = length; + wmb(); /* Ensure other side can see the response fields. */ + + dump_response(resp); + + usb_ring->rsp_prod_pvt++; + RING_PUSH_RESPONSES(usb_ring); + spin_unlock_irqrestore(&up->usb_ring_lock, flags); + + /* Kick the relevant domain. */ + notify_via_evtchn(up->evtchn); +} + +/** + * usbif_claim_port - claim devices on a port on behalf of guest + * + * Once completed, this will ensure that any device attached to that + * port is claimed by this driver for use by the guest. + */ +int usbif_claim_port(usbif_be_claim_port_t *msg) +{ + owned_port_t *o_p; + + /* Sanity... */ + if ( usbif_find_port(msg->path) != NULL ) + { + printk(KERN_WARNING "usbback: Attempted to claim USB port " + "we already own!\n"); + return -EINVAL; + } + + /* No need for a slab cache - this should be infrequent. */ + o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL); + + if ( o_p == NULL ) + return -ENOMEM; + + o_p->enabled = 0; + o_p->usbif_priv = usbif_find(msg->domid); + o_p->guest_port = msg->usbif_port; + o_p->dev_present = 0; + o_p->guest_address = 0; /* Default address. */ + + strcpy(o_p->path, msg->path); + + spin_lock_irq(&owned_ports_lock); + + list_add(&o_p->list, &owned_ports); + + spin_unlock_irq(&owned_ports_lock); + + printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path, + msg->domid, msg->usbif_port); + + /* Force a reprobe for unclaimed devices. */ + usb_scan_devices(); + + return 0; +} + +owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req) +{ + unsigned long flags; + struct list_head *port; + + /* I'm assuming this is not called from IRQ context - correct? I think + * it's probably only called in response to control messages or plug events + * in the USB hub kernel thread, so should be OK. */ + spin_lock_irqsave(&owned_ports_lock, flags); + list_for_each(port, &owned_ports) + { + owned_port_t *p = list_entry(port, owned_port_t, list); + if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled ) + { + dump_port(p); + + spin_unlock_irqrestore(&owned_ports_lock, flags); + return p; + } + } + spin_unlock_irqrestore(&owned_ports_lock, flags); + + return NULL; +} + +owned_port_t *__usbif_find_port(char *path) +{ + struct list_head *port; + + list_for_each(port, &owned_ports) + { + owned_port_t *p = list_entry(port, owned_port_t, list); + if(!strcmp(path, p->path)) + { + return p; + } + } + + return NULL; +} + +owned_port_t *usbif_find_port(char *path) +{ + owned_port_t *ret; + unsigned long flags; + + spin_lock_irqsave(&owned_ports_lock, flags); + ret = __usbif_find_port(path); + spin_unlock_irqrestore(&owned_ports_lock, flags); + + return ret; +} + + +static void *probe(struct usb_device *dev, unsigned iface, + const struct usb_device_id *id) +{ + owned_port_t *p; + + /* We don't care what the device is - if we own the port, we want it. We + * don't deal with device-specifics in this driver, so we don't care what + * the device actually is ;-) */ + if ( ( p = usbif_find_port(dev->devpath) ) != NULL ) + { + printk(KERN_INFO "usbback: claimed device attached to owned port\n"); + + p->dev_present = 1; + p->dev = dev; + set_bit(iface, &p->ifaces); + + return p->usbif_priv; + } + else + printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n", + dev->devpath); + + + return NULL; +} + +static void disconnect(struct usb_device *dev, void *usbif) +{ + /* Note the device is removed so we can tell the guest when it probes. */ + owned_port_t *port = usbif_find_port(dev->devpath); + port->dev_present = 0; + port->dev = NULL; + port->ifaces = 0; +} + + +struct usb_driver driver = +{ + .owner = THIS_MODULE, + .name = "Xen USB Backend", + .probe = probe, + .disconnect = disconnect, + .id_table = NULL, +}; + +/* __usbif_release_port - internal mechanics for releasing a port */ +void __usbif_release_port(owned_port_t *p) +{ + int i; + + for ( i = 0; p->ifaces != 0; i++) + if ( p->ifaces & 1 << i ) + { + usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i)); + clear_bit(i, &p->ifaces); + } + list_del(&p->list); + + /* Reset the real device. We don't simulate disconnect / probe for other + * drivers in this kernel because we assume the device is completely under + * the control of ourselves (i.e. the guest!). This should ensure that the + * device is in a sane state for the next customer ;-) */ + + /* MAW NB: we're not resetting the real device here. This looks perfectly + * valid to me but it causes memory corruption. We seem to get away with not + * resetting for now, although it'd be nice to have this tracked down. */ +/* if ( p->dev != NULL) */ +/* usb_reset_device(p->dev); */ + + kfree(p); +} + + +/** + * usbif_release_port - stop claiming devices on a port on behalf of guest + */ +void usbif_release_port(usbif_be_release_port_t *msg) +{ + owned_port_t *p; + + spin_lock_irq(&owned_ports_lock); + p = __usbif_find_port(msg->path); + __usbif_release_port(p); + spin_unlock_irq(&owned_ports_lock); +} + +void usbif_release_ports(usbif_priv_t *up) +{ + struct list_head *port, *tmp; + unsigned long flags; + + spin_lock_irqsave(&owned_ports_lock, flags); + list_for_each_safe(port, tmp, &owned_ports) + { + owned_port_t *p = list_entry(port, owned_port_t, list); + if ( p->usbif_priv == up ) + __usbif_release_port(p); + } + spin_unlock_irqrestore(&owned_ports_lock, flags); +} + +static int __init usbif_init(void) +{ + int i; + + if ( !(xen_start_info.flags & SIF_INITDOMAIN) && + !(xen_start_info.flags & SIF_USB_BE_DOMAIN) ) + return 0; + + if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) + BUG(); + + pending_cons = 0; + pending_prod = MAX_PENDING_REQS; + memset(pending_reqs, 0, sizeof(pending_reqs)); + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + pending_ring[i] = i; + + spin_lock_init(&pend_prod_lock); + + spin_lock_init(&owned_ports_lock); + INIT_LIST_HEAD(&owned_ports); + + spin_lock_init(&usbio_schedule_list_lock); + INIT_LIST_HEAD(&usbio_schedule_list); + + if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) + BUG(); + + usbif_interface_init(); + + usbif_ctrlif_init(); + + usb_register(&driver); + + printk(KERN_INFO "Xen USB Backend Initialised"); + + return 0; +} + +__initcall(usbif_init); diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c new file mode 100644 index 0000000000..46cca3058a --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c @@ -0,0 +1,1664 @@ +/* + * Xen Virtual USB Frontend Driver + * + * This file contains the first version of the Xen virtual USB hub + * that I've managed not to delete by mistake (3rd time lucky!). + * + * Based on Linux's uhci.c, original copyright notices are displayed + * below. Portions also (c) 2004 Intel Research Cambridge + * and (c) 2004, 2005 Mark Williamson + * + * Contact or + * regarding this code. + * + * Still to be (maybe) implemented: + * - migration / backend restart support? + * - support for building / using as a module + */ + +/* + * Universal Host Controller Interface driver for USB. + * + * Maintainer: Johannes Erdfelt + * + * (C) Copyright 1999 Linus Torvalds + * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com + * (C) Copyright 1999 Randy Dunlap + * (C) Copyright 1999 Georg Acher, acher@in.tum.de + * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de + * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch + * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at + * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface + * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com). + * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c) + * + * Intel documents this fairly well, and as far as I know there + * are no royalties or anything like that, but even so there are + * people who decided that they want to do the same thing in a + * completely different way. + * + * WARNING! The USB documentation is downright evil. Most of it + * is just crap, written by a committee. You're better off ignoring + * most of it, the important stuff is: + * - the low-level protocol (fairly simple but lots of small details) + * - working around the horridness of the rest + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_USB_DEBUG +#define DEBUG +#else +#undef DEBUG +#endif +#include + +#include +#include + +#include "xhci.h" + +#include "../../../../../drivers/usb/hcd.h" + +#include +#include +#include + +/* + * Version Information + */ +#define DRIVER_VERSION "v1.0" +#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \ + "Randy Dunlap, Georg Acher, Deti Fliegl, " \ + "Thomas Sailer, Roman Weissgaerber, Mark Williamson" +#define DRIVER_DESC "Xen Virtual USB Host Controller Interface" + +/* + * debug = 0, no debugging messages + * debug = 1, dump failed URB's except for stalls + * debug = 2, dump all failed URB's (including stalls) + */ +#ifdef DEBUG +static int debug = 1; +#else +static int debug = 0; +#endif +MODULE_PARM(debug, "i"); +MODULE_PARM_DESC(debug, "Debug level"); +static char *errbuf; +#define ERRBUF_LEN (PAGE_SIZE * 8) + +static int rh_submit_urb(struct urb *urb); +static int rh_unlink_urb(struct urb *urb); +static int xhci_unlink_urb(struct urb *urb); +static void xhci_call_completion(struct urb *urb); +static void xhci_drain_ring(void); +static void xhci_transfer_result(struct xhci *xhci, struct urb *urb); +static void xhci_finish_completion(void); + +#define MAX_URB_LOOP 2048 /* Maximum number of linked URB's */ + +static kmem_cache_t *xhci_up_cachep; /* urb_priv cache */ +static struct xhci *xhci; /* XHCI structure for the interface */ + +/****************************************************************************** + * DEBUGGING + */ + +#ifdef DEBUG + +static void dump_urb(struct urb *urb) +{ + printk(KERN_DEBUG "dumping urb @ %p\n" + " hcpriv = %p\n" + " next = %p\n" + " dev = %p\n" + " pipe = 0x%lx\n" + " status = %d\n" + " transfer_flags = 0x%lx\n" + " transfer_buffer = %p\n" + " transfer_buffer_length = %d\n" + " actual_length = %d\n" + " bandwidth = %d\n" + " setup_packet = %p\n", + urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status, + urb->transfer_flags, urb->transfer_buffer, + urb->transfer_buffer_length, urb->actual_length, urb->bandwidth, + urb->setup_packet); + if ( urb->setup_packet != NULL ) + printk(KERN_DEBUG + "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n", + urb->setup_packet[0], urb->setup_packet[1], + urb->setup_packet[2], urb->setup_packet[3], + urb->setup_packet[4], urb->setup_packet[5], + urb->setup_packet[6], urb->setup_packet[7]); + printk(KERN_DEBUG "complete = %p\n" + "interval = %d\n", urb->complete, urb->interval); + +} + +static void xhci_show_resp(usbif_response_t *r) +{ + printk(KERN_DEBUG "dumping response @ %p\n" + " id=0x%lx\n" + " op=0x%x\n" + " data=0x%x\n" + " status=0x%x\n" + " length=0x%lx\n", + r->id, r->operation, r->data, r->status, r->length); +} + +#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__) + +#else /* DEBUG */ + +#define dump_urb(blah) ((void)0) +#define xhci_show_resp(blah) ((void)0) +#define DPRINTK(blah,...) ((void)0) + +#endif /* DEBUG */ + +/****************************************************************************** + * RING REQUEST HANDLING + */ + +/** + * xhci_construct_isoc - add isochronous information to a request + */ +static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb) +{ + usbif_iso_t *schedule; + int i; + struct urb_priv *urb_priv = urb->hcpriv; + + req->num_iso = urb->number_of_packets; + schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL); + + if ( schedule == NULL ) + return -ENOMEM; + + for ( i = 0; i < req->num_iso; i++ ) + { + schedule[i].buffer_offset = urb->iso_frame_desc[i].offset; + schedule[i].length = urb->iso_frame_desc[i].length; + } + + urb_priv->schedule = schedule; + req->iso_schedule = virt_to_machine(schedule); + + return 0; +} + +/** + * xhci_queue_req - construct and queue request for an URB + */ +static int xhci_queue_req(struct urb *urb) +{ + usbif_request_t *req; + usbif_front_ring_t *usb_ring = &xhci->usb_ring; + +#if DEBUG + printk(KERN_DEBUG + "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n", + usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod), + usbif->resp_prod, xhci->usb_resp_cons); +#endif + + + if ( RING_FULL(usb_ring) ) + { + printk(KERN_WARNING + "xhci_queue_req(): USB ring full, not queuing request\n"); + return -ENOBUFS; + } + + /* Stick something in the shared communications ring. */ + req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); + + req->operation = USBIF_OP_IO; + req->port = 0; /* We don't care what the port is. */ + req->id = (unsigned long) urb->hcpriv; + req->transfer_buffer = virt_to_machine(urb->transfer_buffer); + req->devnum = usb_pipedevice(urb->pipe); + req->direction = usb_pipein(urb->pipe); + req->speed = usb_pipeslow(urb->pipe); + req->pipe_type = usb_pipetype(urb->pipe); + req->length = urb->transfer_buffer_length; + req->transfer_flags = urb->transfer_flags; + req->endpoint = usb_pipeendpoint(urb->pipe); + req->speed = usb_pipeslow(urb->pipe); + req->timeout = urb->timeout * (1000 / HZ); + + if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */ + { + int ret = xhci_construct_isoc(req, urb); + if ( ret != 0 ) + return ret; + } + + if(urb->setup_packet != NULL) + memcpy(req->setup, urb->setup_packet, 8); + else + memset(req->setup, 0, 8); + + usb_ring->req_prod_pvt++; + RING_PUSH_REQUESTS(usb_ring); + + notify_via_evtchn(xhci->evtchn); + + DPRINTK("Queued request for an URB.\n"); + dump_urb(urb); + + return -EINPROGRESS; +} + +/** + * xhci_queue_probe - queue a probe request for a particular port + */ +static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port) +{ + usbif_request_t *req; + usbif_front_ring_t *usb_ring = &xhci->usb_ring; + +#if DEBUG + printk(KERN_DEBUG + "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, " + "resp_cons = %d\n", usbif->req_prod, + virt_to_machine(&usbif->req_prod), + usbif->resp_prod, xhci->usb_resp_cons); +#endif + + if ( RING_FULL(usb_ring) ) + { + printk(KERN_WARNING + "xhci_queue_probe(): ring full, not queuing request\n"); + return NULL; + } + + /* Stick something in the shared communications ring. */ + req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); + + memset(req, sizeof(*req), 0); + + req->operation = USBIF_OP_PROBE; + req->port = port; + + usb_ring->req_prod_pvt++; + RING_PUSH_REQUESTS(usb_ring); + + notify_via_evtchn(xhci->evtchn); + + return req; +} + +/** + * xhci_port_reset - queue a reset request for a particular port + */ +static int xhci_port_reset(usbif_vdev_t port) +{ + usbif_request_t *req; + usbif_front_ring_t *usb_ring = &xhci->usb_ring; + + /* We only reset one port at a time, so we only need one variable per + * hub. */ + xhci->awaiting_reset = 1; + + /* Stick something in the shared communications ring. */ + req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt); + + memset(req, sizeof(*req), 0); + + req->operation = USBIF_OP_RESET; + req->port = port; + + usb_ring->req_prod_pvt++; + RING_PUSH_REQUESTS(usb_ring); + + notify_via_evtchn(xhci->evtchn); + + while ( xhci->awaiting_reset > 0 ) + { + mdelay(1); + xhci_drain_ring(); + } + + xhci->rh.ports[port].pe = 1; + xhci->rh.ports[port].pe_chg = 1; + + return xhci->awaiting_reset; +} + + +/****************************************************************************** + * RING RESPONSE HANDLING + */ + +static void receive_usb_reset(usbif_response_t *resp) +{ + xhci->awaiting_reset = resp->status; + rmb(); + +} + +static void receive_usb_probe(usbif_response_t *resp) +{ + spin_lock(&xhci->rh.port_state_lock); + + if ( resp->status > 0 ) + { + if ( resp->status == 1 ) + { + /* If theres a device there and there wasn't one before there must + * have been a connection status change. */ + if( xhci->rh.ports[resp->data].cs == 0 ) + { + xhci->rh.ports[resp->data].cs = 1; + xhci->rh.ports[resp->data].ccs = 1; + xhci->rh.ports[resp->data].cs_chg = 1; + } + } + else + printk(KERN_WARNING "receive_usb_probe(): unexpected status %d " + "for port %d\n", resp->status, resp->data); + } + else if ( resp->status < 0) + printk(KERN_WARNING "receive_usb_probe(): got error status %d\n", + resp->status); + + spin_unlock(&xhci->rh.port_state_lock); +} + +static void receive_usb_io(usbif_response_t *resp) +{ + struct urb_priv *urbp = (struct urb_priv *)resp->id; + struct urb *urb = urbp->urb; + + urb->actual_length = resp->length; + urbp->in_progress = 0; + + if( usb_pipetype(urb->pipe) == 0 ) /* ISO */ + { + int i; + + /* Copy ISO schedule results back in. */ + for ( i = 0; i < urb->number_of_packets; i++ ) + { + urb->iso_frame_desc[i].status + = urbp->schedule[i].status; + urb->iso_frame_desc[i].actual_length + = urbp->schedule[i].length; + } + free_page((unsigned long)urbp->schedule); + } + + /* Only set status if it's not been changed since submission. It might + * have been changed if the URB has been unlinked asynchronously, for + * instance. */ + if ( urb->status == -EINPROGRESS ) + urbp->status = urb->status = resp->status; +} + +/** + * xhci_drain_ring - drain responses from the ring, calling handlers + * + * This may be called from interrupt context when an event is received from the + * backend domain, or sometimes in process context whilst waiting for a port + * reset or URB completion. + */ +static void xhci_drain_ring(void) +{ + struct list_head *tmp, *head; + usbif_front_ring_t *usb_ring = &xhci->usb_ring; + usbif_response_t *resp; + RING_IDX i, rp; + + /* Walk the ring here to get responses, updating URBs to show what + * completed. */ + + rp = usb_ring->sring->rsp_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + /* Take items off the comms ring, taking care not to overflow. */ + for ( i = usb_ring->rsp_cons; i != rp; i++ ) + { + resp = RING_GET_RESPONSE(usb_ring, i); + + /* May need to deal with batching and with putting a ceiling on + the number dispatched for performance and anti-dos reasons */ + + xhci_show_resp(resp); + + switch ( resp->operation ) + { + case USBIF_OP_PROBE: + receive_usb_probe(resp); + break; + + case USBIF_OP_IO: + receive_usb_io(resp); + break; + + case USBIF_OP_RESET: + receive_usb_reset(resp); + break; + + default: + printk(KERN_WARNING + "error: unknown USB io operation response [%d]\n", + resp->operation); + break; + } + } + + usb_ring->rsp_cons = i; + + /* Walk the list of pending URB's to see which ones completed and do + * callbacks, etc. */ + spin_lock(&xhci->urb_list_lock); + head = &xhci->urb_list; + tmp = head->next; + while (tmp != head) { + struct urb *urb = list_entry(tmp, struct urb, urb_list); + + tmp = tmp->next; + + /* Checks the status and does all of the magic necessary */ + xhci_transfer_result(xhci, urb); + } + spin_unlock(&xhci->urb_list_lock); + + xhci_finish_completion(); +} + + +static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs) +{ + xhci_drain_ring(); +} + +/****************************************************************************** + * HOST CONTROLLER FUNCTIONALITY + */ + +/** + * no-op implementation of private device alloc / free routines + */ +static int xhci_do_nothing_dev(struct usb_device *dev) +{ + return 0; +} + +static inline void xhci_add_complete(struct urb *urb) +{ + struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv; + unsigned long flags; + + spin_lock_irqsave(&xhci->complete_list_lock, flags); + list_add_tail(&urbp->complete_list, &xhci->complete_list); + spin_unlock_irqrestore(&xhci->complete_list_lock, flags); +} + +/* When this returns, the owner of the URB may free its + * storage. + * + * We spin and wait for the URB to complete before returning. + * + * Call with urb->lock acquired. + */ +static void xhci_delete_urb(struct urb *urb) +{ + struct urb_priv *urbp; + + urbp = urb->hcpriv; + + /* If there's no urb_priv structure for this URB then it can't have + * been submitted at all. */ + if ( urbp == NULL ) + return; + + /* For now we just spin until the URB completes. It shouldn't take too + * long and we don't expect to have to do this very often. */ + while ( urb->status == -EINPROGRESS ) + { + xhci_drain_ring(); + mdelay(1); + } + + /* Now we know that further transfers to the buffer won't + * occur, so we can safely return. */ +} + +static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb) +{ + struct urb_priv *urbp; + + urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC); + if (!urbp) { + err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n"); + return NULL; + } + + memset((void *)urbp, 0, sizeof(*urbp)); + + urbp->inserttime = jiffies; + urbp->urb = urb; + urbp->dev = urb->dev; + + INIT_LIST_HEAD(&urbp->complete_list); + + urb->hcpriv = urbp; + + return urbp; +} + +/* + * MUST be called with urb->lock acquired + */ +/* When is this called? Do we need to stop the transfer (as we + * currently do)? */ +static void xhci_destroy_urb_priv(struct urb *urb) +{ + struct urb_priv *urbp; + + urbp = (struct urb_priv *)urb->hcpriv; + if (!urbp) + return; + + if (!list_empty(&urb->urb_list)) + warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb); + + if (!list_empty(&urbp->complete_list)) + warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb); + + kmem_cache_free(xhci_up_cachep, urb->hcpriv); + + urb->hcpriv = NULL; +} + +/** + * Try to find URBs in progress on the same pipe to the same device. + * + * MUST be called with xhci->urb_list_lock acquired + */ +static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb) +{ + struct list_head *tmp, *head; + + /* We don't match Isoc transfers since they are special */ + if (usb_pipeisoc(urb->pipe)) + return NULL; + + head = &xhci->urb_list; + tmp = head->next; + while (tmp != head) { + struct urb *u = list_entry(tmp, struct urb, urb_list); + + tmp = tmp->next; + + if (u->dev == urb->dev && u->pipe == urb->pipe && + u->status == -EINPROGRESS) + return u; + } + + return NULL; +} + +static int xhci_submit_urb(struct urb *urb) +{ + int ret = -EINVAL; + unsigned long flags; + struct urb *eurb; + int bustime; + + DPRINTK("URB submitted to XHCI driver.\n"); + dump_urb(urb); + + if (!urb) + return -EINVAL; + + if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) { + warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb); + return -ENODEV; + } + + if ( urb->dev->devpath == NULL ) + BUG(); + + usb_inc_dev_use(urb->dev); + + spin_lock_irqsave(&xhci->urb_list_lock, flags); + spin_lock(&urb->lock); + + if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET || + urb->status == -ECONNABORTED) { + dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status); + /* Since we can have problems on the out path */ + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + usb_dec_dev_use(urb->dev); + + return ret; + } + + INIT_LIST_HEAD(&urb->urb_list); + if (!xhci_alloc_urb_priv(urb)) { + ret = -ENOMEM; + + goto out; + } + + ( (struct urb_priv *)urb->hcpriv )->in_progress = 1; + + eurb = xhci_find_urb_ep(xhci, urb); + if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) { + ret = -ENXIO; + + goto out; + } + + /* Short circuit the virtual root hub */ + if (urb->dev == xhci->rh.dev) { + ret = rh_submit_urb(urb); + + goto out; + } + + switch (usb_pipetype(urb->pipe)) { + case PIPE_CONTROL: + case PIPE_BULK: + ret = xhci_queue_req(urb); + break; + + case PIPE_INTERRUPT: + if (urb->bandwidth == 0) { /* not yet checked/allocated */ + bustime = usb_check_bandwidth(urb->dev, urb); + if (bustime < 0) + ret = bustime; + else { + ret = xhci_queue_req(urb); + if (ret == -EINPROGRESS) + usb_claim_bandwidth(urb->dev, urb, + bustime, 0); + } + } else /* bandwidth is already set */ + ret = xhci_queue_req(urb); + break; + + case PIPE_ISOCHRONOUS: + if (urb->bandwidth == 0) { /* not yet checked/allocated */ + if (urb->number_of_packets <= 0) { + ret = -EINVAL; + break; + } + bustime = usb_check_bandwidth(urb->dev, urb); + if (bustime < 0) { + ret = bustime; + break; + } + + ret = xhci_queue_req(urb); + if (ret == -EINPROGRESS) + usb_claim_bandwidth(urb->dev, urb, bustime, 1); + } else /* bandwidth is already set */ + ret = xhci_queue_req(urb); + break; + } +out: + urb->status = ret; + + if (ret == -EINPROGRESS) { + /* We use _tail to make find_urb_ep more efficient */ + list_add_tail(&urb->urb_list, &xhci->urb_list); + + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + + return 0; + } + + xhci_delete_urb(urb); + + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + + /* Only call completion if it was successful */ + if (!ret) + xhci_call_completion(urb); + + return ret; +} + +/* + * Return the result of a transfer + * + * MUST be called with urb_list_lock acquired + */ +static void xhci_transfer_result(struct xhci *xhci, struct urb *urb) +{ + int ret = 0; + unsigned long flags; + struct urb_priv *urbp; + + /* The root hub is special */ + if (urb->dev == xhci->rh.dev) + return; + + spin_lock_irqsave(&urb->lock, flags); + + urbp = (struct urb_priv *)urb->hcpriv; + + if ( ( (struct urb_priv *)urb->hcpriv )->in_progress ) + ret = -EINPROGRESS; + + if (urb->actual_length < urb->transfer_buffer_length) { + if (urb->transfer_flags & USB_DISABLE_SPD) { + ret = -EREMOTEIO; + } + } + + if (urb->status == -EPIPE) + { + ret = urb->status; + /* endpoint has stalled - mark it halted */ + usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe), + usb_pipeout(urb->pipe)); + } + + if ((debug == 1 && ret != 0 && ret != -EPIPE) || + (ret != 0 && debug > 1)) { + /* Some debugging code */ + dbg("xhci_result_interrupt/bulk() failed with status %x", + status); + } + + if (ret == -EINPROGRESS) + goto out; + + switch (usb_pipetype(urb->pipe)) { + case PIPE_CONTROL: + case PIPE_BULK: + case PIPE_ISOCHRONOUS: + /* Release bandwidth for Interrupt or Isoc. transfers */ + /* Spinlock needed ? */ + if (urb->bandwidth) + usb_release_bandwidth(urb->dev, urb, 1); + xhci_delete_urb(urb); + break; + case PIPE_INTERRUPT: + /* Interrupts are an exception */ + if (urb->interval) + goto out_complete; + + /* Release bandwidth for Interrupt or Isoc. transfers */ + /* Spinlock needed ? */ + if (urb->bandwidth) + usb_release_bandwidth(urb->dev, urb, 0); + xhci_delete_urb(urb); + break; + default: + info("xhci_transfer_result: unknown pipe type %d for urb %p\n", + usb_pipetype(urb->pipe), urb); + } + + /* Remove it from xhci->urb_list */ + list_del_init(&urb->urb_list); + +out_complete: + xhci_add_complete(urb); + +out: + spin_unlock_irqrestore(&urb->lock, flags); +} + +static int xhci_unlink_urb(struct urb *urb) +{ + unsigned long flags; + struct urb_priv *urbp = urb->hcpriv; + + if (!urb) + return -EINVAL; + + if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) + return -ENODEV; + + spin_lock_irqsave(&xhci->urb_list_lock, flags); + spin_lock(&urb->lock); + + /* Release bandwidth for Interrupt or Isoc. transfers */ + /* Spinlock needed ? */ + if (urb->bandwidth) { + switch (usb_pipetype(urb->pipe)) { + case PIPE_INTERRUPT: + usb_release_bandwidth(urb->dev, urb, 0); + break; + case PIPE_ISOCHRONOUS: + usb_release_bandwidth(urb->dev, urb, 1); + break; + default: + break; + } + } + + if (urb->status != -EINPROGRESS) { + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + return 0; + } + + list_del_init(&urb->urb_list); + + /* Short circuit the virtual root hub */ + if (urb->dev == xhci->rh.dev) { + rh_unlink_urb(urb); + + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + + xhci_call_completion(urb); + } else { + if (urb->transfer_flags & USB_ASYNC_UNLINK) { + /* We currently don't currently attempt to cancel URBs + * that have been queued in the ring. We handle async + * unlinked URBs when they complete. */ + urbp->status = urb->status = -ECONNABORTED; + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + } else { + urb->status = -ENOENT; + + spin_unlock(&urb->lock); + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + + if (in_interrupt()) { /* wait at least 1 frame */ + static int errorcount = 10; + + if (errorcount--) + dbg("xhci_unlink_urb called from interrupt for urb %p", urb); + udelay(1000); + } else + schedule_timeout(1+1*HZ/1000); + + xhci_delete_urb(urb); + + xhci_call_completion(urb); + } + } + + return 0; +} + +static void xhci_call_completion(struct urb *urb) +{ + struct urb_priv *urbp; + struct usb_device *dev = urb->dev; + int is_ring = 0, killed, resubmit_interrupt, status; + struct urb *nurb; + unsigned long flags; + + spin_lock_irqsave(&urb->lock, flags); + + urbp = (struct urb_priv *)urb->hcpriv; + if (!urbp || !urb->dev) { + spin_unlock_irqrestore(&urb->lock, flags); + return; + } + + killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED || + urb->status == -ECONNRESET); + resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT && + urb->interval); + + nurb = urb->next; + if (nurb && !killed) { + int count = 0; + + while (nurb && nurb != urb && count < MAX_URB_LOOP) { + if (nurb->status == -ENOENT || + nurb->status == -ECONNABORTED || + nurb->status == -ECONNRESET) { + killed = 1; + break; + } + + nurb = nurb->next; + count++; + } + + if (count == MAX_URB_LOOP) + err("xhci_call_completion: too many linked URB's, loop? (first loop)"); + + /* Check to see if chain is a ring */ + is_ring = (nurb == urb); + } + + status = urbp->status; + if (!resubmit_interrupt || killed) + /* We don't need urb_priv anymore */ + xhci_destroy_urb_priv(urb); + + if (!killed) + urb->status = status; + + spin_unlock_irqrestore(&urb->lock, flags); + + if (urb->complete) + urb->complete(urb); + + if (resubmit_interrupt) + /* Recheck the status. The completion handler may have */ + /* unlinked the resubmitting interrupt URB */ + killed = (urb->status == -ENOENT || + urb->status == -ECONNABORTED || + urb->status == -ECONNRESET); + + if (resubmit_interrupt && !killed) { + if ( urb->dev != xhci->rh.dev ) + xhci_queue_req(urb); /* XXX What if this fails? */ + /* Don't need to resubmit URBs for the virtual root dev. */ + } else { + if (is_ring && !killed) { + urb->dev = dev; + xhci_submit_urb(urb); + } else { + /* We decrement the usage count after we're done */ + /* with everything */ + usb_dec_dev_use(dev); + } + } +} + +static void xhci_finish_completion(void) +{ + struct list_head *tmp, *head; + unsigned long flags; + + spin_lock_irqsave(&xhci->complete_list_lock, flags); + head = &xhci->complete_list; + tmp = head->next; + while (tmp != head) { + struct urb_priv *urbp = list_entry(tmp, struct urb_priv, + complete_list); + struct urb *urb = urbp->urb; + + list_del_init(&urbp->complete_list); + spin_unlock_irqrestore(&xhci->complete_list_lock, flags); + + xhci_call_completion(urb); + + spin_lock_irqsave(&xhci->complete_list_lock, flags); + head = &xhci->complete_list; + tmp = head->next; + } + spin_unlock_irqrestore(&xhci->complete_list_lock, flags); +} + +static struct usb_operations xhci_device_operations = { + .allocate = xhci_do_nothing_dev, + .deallocate = xhci_do_nothing_dev, + /* It doesn't look like any drivers actually care what the frame number + * is at the moment! If necessary, we could approximate the current + * frame nubmer by passing it from the backend in response messages. */ + .get_frame_number = NULL, + .submit_urb = xhci_submit_urb, + .unlink_urb = xhci_unlink_urb +}; + +/****************************************************************************** + * VIRTUAL ROOT HUB EMULATION + */ + +static __u8 root_hub_dev_des[] = +{ + 0x12, /* __u8 bLength; */ + 0x01, /* __u8 bDescriptorType; Device */ + 0x00, /* __u16 bcdUSB; v1.0 */ + 0x01, + 0x09, /* __u8 bDeviceClass; HUB_CLASSCODE */ + 0x00, /* __u8 bDeviceSubClass; */ + 0x00, /* __u8 bDeviceProtocol; */ + 0x08, /* __u8 bMaxPacketSize0; 8 Bytes */ + 0x00, /* __u16 idVendor; */ + 0x00, + 0x00, /* __u16 idProduct; */ + 0x00, + 0x00, /* __u16 bcdDevice; */ + 0x00, + 0x00, /* __u8 iManufacturer; */ + 0x02, /* __u8 iProduct; */ + 0x01, /* __u8 iSerialNumber; */ + 0x01 /* __u8 bNumConfigurations; */ +}; + + +/* Configuration descriptor */ +static __u8 root_hub_config_des[] = +{ + 0x09, /* __u8 bLength; */ + 0x02, /* __u8 bDescriptorType; Configuration */ + 0x19, /* __u16 wTotalLength; */ + 0x00, + 0x01, /* __u8 bNumInterfaces; */ + 0x01, /* __u8 bConfigurationValue; */ + 0x00, /* __u8 iConfiguration; */ + 0x40, /* __u8 bmAttributes; + Bit 7: Bus-powered, 6: Self-powered, + Bit 5 Remote-wakeup, 4..0: resvd */ + 0x00, /* __u8 MaxPower; */ + + /* interface */ + 0x09, /* __u8 if_bLength; */ + 0x04, /* __u8 if_bDescriptorType; Interface */ + 0x00, /* __u8 if_bInterfaceNumber; */ + 0x00, /* __u8 if_bAlternateSetting; */ + 0x01, /* __u8 if_bNumEndpoints; */ + 0x09, /* __u8 if_bInterfaceClass; HUB_CLASSCODE */ + 0x00, /* __u8 if_bInterfaceSubClass; */ + 0x00, /* __u8 if_bInterfaceProtocol; */ + 0x00, /* __u8 if_iInterface; */ + + /* endpoint */ + 0x07, /* __u8 ep_bLength; */ + 0x05, /* __u8 ep_bDescriptorType; Endpoint */ + 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */ + 0x03, /* __u8 ep_bmAttributes; Interrupt */ + 0x08, /* __u16 ep_wMaxPacketSize; 8 Bytes */ + 0x00, + 0xff /* __u8 ep_bInterval; 255 ms */ +}; + +static __u8 root_hub_hub_des[] = +{ + 0x09, /* __u8 bLength; */ + 0x29, /* __u8 bDescriptorType; Hub-descriptor */ + 0x02, /* __u8 bNbrPorts; */ + 0x00, /* __u16 wHubCharacteristics; */ + 0x00, + 0x01, /* __u8 bPwrOn2pwrGood; 2ms */ + 0x00, /* __u8 bHubContrCurrent; 0 mA */ + 0x00, /* __u8 DeviceRemovable; *** 7 Ports max *** */ + 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */ +}; + +/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */ +static int rh_send_irq(struct urb *urb) +{ + struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv; + xhci_port_t *ports = xhci->rh.ports; + unsigned long flags; + int i, len = 1; + __u16 data = 0; + + spin_lock_irqsave(&urb->lock, flags); + for (i = 0; i < xhci->rh.numports; i++) { + /* Set a bit if anything at all has changed on the port, as per + * USB spec 11.12 */ + data |= (ports[i].cs_chg || ports[i].pe_chg ) + ? (1 << (i + 1)) + : 0; + + len = (i + 1) / 8 + 1; + } + + *(__u16 *) urb->transfer_buffer = cpu_to_le16(data); + urb->actual_length = len; + urbp->status = 0; + + spin_unlock_irqrestore(&urb->lock, flags); + + if ((data > 0) && (xhci->rh.send != 0)) { + dbg("root-hub INT complete: data: %x", data); + xhci_call_completion(urb); + } + + return 0; +} + +/* Virtual Root Hub INTs are polled by this timer every "interval" ms */ +static int rh_init_int_timer(struct urb *urb); + +static void rh_int_timer_do(unsigned long ptr) +{ + struct urb *urb = (struct urb *)ptr; + struct list_head list, *tmp, *head; + unsigned long flags; + int i; + + for ( i = 0; i < xhci->rh.numports; i++) + xhci_queue_probe(i); + + if (xhci->rh.send) + rh_send_irq(urb); + + INIT_LIST_HEAD(&list); + + spin_lock_irqsave(&xhci->urb_list_lock, flags); + head = &xhci->urb_list; + tmp = head->next; + while (tmp != head) { + struct urb *u = list_entry(tmp, struct urb, urb_list); + struct urb_priv *up = (struct urb_priv *)u->hcpriv; + + tmp = tmp->next; + + spin_lock(&u->lock); + + /* Check if the URB timed out */ + if (u->timeout && time_after_eq(jiffies, + up->inserttime + u->timeout)) { + list_del(&u->urb_list); + list_add_tail(&u->urb_list, &list); + } + + spin_unlock(&u->lock); + } + spin_unlock_irqrestore(&xhci->urb_list_lock, flags); + + head = &list; + tmp = head->next; + while (tmp != head) { + struct urb *u = list_entry(tmp, struct urb, urb_list); + + tmp = tmp->next; + + u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED; + xhci_unlink_urb(u); + } + + rh_init_int_timer(urb); +} + +/* Root Hub INTs are polled by this timer */ +static int rh_init_int_timer(struct urb *urb) +{ + xhci->rh.interval = urb->interval; + init_timer(&xhci->rh.rh_int_timer); + xhci->rh.rh_int_timer.function = rh_int_timer_do; + xhci->rh.rh_int_timer.data = (unsigned long)urb; + xhci->rh.rh_int_timer.expires = jiffies + + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000; + add_timer(&xhci->rh.rh_int_timer); + + return 0; +} + +#define OK(x) len = (x); break + +/* Root Hub Control Pipe */ +static int rh_submit_urb(struct urb *urb) +{ + unsigned int pipe = urb->pipe; + struct usb_ctrlrequest *cmd = + (struct usb_ctrlrequest *)urb->setup_packet; + void *data = urb->transfer_buffer; + int leni = urb->transfer_buffer_length; + int len = 0; + xhci_port_t *status; + int stat = 0; + int i; + int retstatus; + unsigned long flags; + + __u16 cstatus; + __u16 bmRType_bReq; + __u16 wValue; + __u16 wIndex; + __u16 wLength; + + if (usb_pipetype(pipe) == PIPE_INTERRUPT) { + xhci->rh.urb = urb; + xhci->rh.send = 1; + xhci->rh.interval = urb->interval; + rh_init_int_timer(urb); + + return -EINPROGRESS; + } + + bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8; + wValue = le16_to_cpu(cmd->wValue); + wIndex = le16_to_cpu(cmd->wIndex); + wLength = le16_to_cpu(cmd->wLength); + + for (i = 0; i < 8; i++) + xhci->rh.c_p_r[i] = 0; + + status = &xhci->rh.ports[wIndex - 1]; + + spin_lock_irqsave(&xhci->rh.port_state_lock, flags); + + switch (bmRType_bReq) { + /* Request Destination: + without flags: Device, + RH_INTERFACE: interface, + RH_ENDPOINT: endpoint, + RH_CLASS means HUB here, + RH_OTHER | RH_CLASS almost ever means HUB_PORT here + */ + + case RH_GET_STATUS: + *(__u16 *)data = cpu_to_le16(1); + OK(2); + case RH_GET_STATUS | RH_INTERFACE: + *(__u16 *)data = cpu_to_le16(0); + OK(2); + case RH_GET_STATUS | RH_ENDPOINT: + *(__u16 *)data = cpu_to_le16(0); + OK(2); + case RH_GET_STATUS | RH_CLASS: + *(__u32 *)data = cpu_to_le32(0); + OK(4); /* hub power */ + case RH_GET_STATUS | RH_OTHER | RH_CLASS: + cstatus = (status->cs_chg) | + (status->pe_chg << 1) | + (xhci->rh.c_p_r[wIndex - 1] << 4); + retstatus = (status->ccs) | + (status->pe << 1) | + (status->susp << 2) | + (status->pr << 8) | + (1 << 8) | /* power on */ + (status->lsda << 9); + *(__u16 *)data = cpu_to_le16(retstatus); + *(__u16 *)(data + 2) = cpu_to_le16(cstatus); + OK(4); + case RH_CLEAR_FEATURE | RH_ENDPOINT: + switch (wValue) { + case RH_ENDPOINT_STALL: + OK(0); + } + break; + case RH_CLEAR_FEATURE | RH_CLASS: + switch (wValue) { + case RH_C_HUB_OVER_CURRENT: + OK(0); /* hub power over current */ + } + break; + case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS: + switch (wValue) { + case RH_PORT_ENABLE: + status->pe = 0; + OK(0); + case RH_PORT_SUSPEND: + status->susp = 0; + OK(0); + case RH_PORT_POWER: + OK(0); /* port power */ + case RH_C_PORT_CONNECTION: + status->cs_chg = 0; + OK(0); + case RH_C_PORT_ENABLE: + status->pe_chg = 0; + OK(0); + case RH_C_PORT_SUSPEND: + /*** WR_RH_PORTSTAT(RH_PS_PSSC); */ + OK(0); + case RH_C_PORT_OVER_CURRENT: + OK(0); /* port power over current */ + case RH_C_PORT_RESET: + xhci->rh.c_p_r[wIndex - 1] = 0; + OK(0); + } + break; + case RH_SET_FEATURE | RH_OTHER | RH_CLASS: + switch (wValue) { + case RH_PORT_SUSPEND: + status->susp = 1; + OK(0); + case RH_PORT_RESET: + { + int ret; + xhci->rh.c_p_r[wIndex - 1] = 1; + status->pr = 0; + status->pe = 1; + ret = xhci_port_reset(wIndex - 1); + /* XXX MAW: should probably cancel queued transfers during reset... *\/ */ + if ( ret == 0 ) { OK(0); } + else { return ret; } + } + break; + case RH_PORT_POWER: + OK(0); /* port power ** */ + case RH_PORT_ENABLE: + status->pe = 1; + OK(0); + } + break; + case RH_SET_ADDRESS: + xhci->rh.devnum = wValue; + OK(0); + case RH_GET_DESCRIPTOR: + switch ((wValue & 0xff00) >> 8) { + case 0x01: /* device descriptor */ + len = min_t(unsigned int, leni, + min_t(unsigned int, + sizeof(root_hub_dev_des), wLength)); + memcpy(data, root_hub_dev_des, len); + OK(len); + case 0x02: /* configuration descriptor */ + len = min_t(unsigned int, leni, + min_t(unsigned int, + sizeof(root_hub_config_des), wLength)); + memcpy (data, root_hub_config_des, len); + OK(len); + case 0x03: /* string descriptors */ + len = usb_root_hub_string (wValue & 0xff, + 0, "XHCI-alt", + data, wLength); + if (len > 0) { + OK(min_t(int, leni, len)); + } else + stat = -EPIPE; + } + break; + case RH_GET_DESCRIPTOR | RH_CLASS: + root_hub_hub_des[2] = xhci->rh.numports; + len = min_t(unsigned int, leni, + min_t(unsigned int, sizeof(root_hub_hub_des), wLength)); + memcpy(data, root_hub_hub_des, len); + OK(len); + case RH_GET_CONFIGURATION: + *(__u8 *)data = 0x01; + OK(1); + case RH_SET_CONFIGURATION: + OK(0); + case RH_GET_INTERFACE | RH_INTERFACE: + *(__u8 *)data = 0x00; + OK(1); + case RH_SET_INTERFACE | RH_INTERFACE: + OK(0); + default: + stat = -EPIPE; + } + + spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags); + + urb->actual_length = len; + + return stat; +} + +/* + * MUST be called with urb->lock acquired + */ +static int rh_unlink_urb(struct urb *urb) +{ + if (xhci->rh.urb == urb) { + urb->status = -ENOENT; + xhci->rh.send = 0; + xhci->rh.urb = NULL; + del_timer(&xhci->rh.rh_int_timer); + } + return 0; +} + +/****************************************************************************** + * CONTROL PLANE FUNCTIONALITY + */ + +/** + * alloc_xhci - initialise a new virtual root hub for a new USB device channel + */ +static int alloc_xhci(void) +{ + int retval; + struct usb_bus *bus; + + retval = -EBUSY; + + xhci = kmalloc(sizeof(*xhci), GFP_KERNEL); + if (!xhci) { + err("couldn't allocate xhci structure"); + retval = -ENOMEM; + goto err_alloc_xhci; + } + + xhci->state = USBIF_STATE_CLOSED; + + spin_lock_init(&xhci->urb_list_lock); + INIT_LIST_HEAD(&xhci->urb_list); + + spin_lock_init(&xhci->complete_list_lock); + INIT_LIST_HEAD(&xhci->complete_list); + + spin_lock_init(&xhci->frame_list_lock); + + bus = usb_alloc_bus(&xhci_device_operations); + + if (!bus) { + err("unable to allocate bus"); + goto err_alloc_bus; + } + + xhci->bus = bus; + bus->bus_name = "XHCI"; + bus->hcpriv = xhci; + + usb_register_bus(xhci->bus); + + /* Initialize the root hub */ + + xhci->rh.numports = 0; + + xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus); + if (!xhci->rh.dev) { + err("unable to allocate root hub"); + goto err_alloc_root_hub; + } + + xhci->state = 0; + + return 0; + +/* + * error exits: + */ +err_alloc_root_hub: + usb_deregister_bus(xhci->bus); + usb_free_bus(xhci->bus); + xhci->bus = NULL; + +err_alloc_bus: + kfree(xhci); + +err_alloc_xhci: + return retval; +} + +/** + * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message + */ +static void usbif_status_change(usbif_fe_interface_status_changed_t *status) +{ + ctrl_msg_t cmsg; + usbif_fe_interface_connect_t up; + long rc; + usbif_sring_t *sring; + + switch ( status->status ) + { + case USBIF_INTERFACE_STATUS_DESTROYED: + printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n", + xhci->state); + break; + + case USBIF_INTERFACE_STATUS_DISCONNECTED: + if ( xhci->state != USBIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message" + " in state %d\n", xhci->state); + break; + /* Not bothering to do recovery here for now. Keep things + * simple. */ + } + + /* Move from CLOSED to DISCONNECTED state. */ + sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL); + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&xhci->usb_ring, sring); + xhci->state = USBIF_STATE_DISCONNECTED; + + /* Construct an interface-CONNECT message for the domain controller. */ + cmsg.type = CMSG_USBIF_FE; + cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT; + cmsg.length = sizeof(usbif_fe_interface_connect_t); + up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT; + memcpy(cmsg.msg, &up, sizeof(up)); + + /* Tell the controller to bring up the interface. */ + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + break; + + case USBIF_INTERFACE_STATUS_CONNECTED: + if ( xhci->state == USBIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected usbif-CONNECTED message" + " in state %d\n", xhci->state); + break; + } + + xhci->evtchn = status->evtchn; + xhci->irq = bind_evtchn_to_irq(xhci->evtchn); + xhci->bandwidth = status->bandwidth; + xhci->rh.numports = status->num_ports; + + xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL); + memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports); + + usb_connect(xhci->rh.dev); + + if (usb_new_device(xhci->rh.dev) != 0) { + err("unable to start root hub"); + } + + /* Allocate the appropriate USB bandwidth here... Need to + * somehow know what the total available is thought to be so we + * can calculate the reservation correctly. */ + usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb, + 1000 - xhci->bandwidth, 0); + + if ( (rc = request_irq(xhci->irq, xhci_interrupt, + SA_SAMPLE_RANDOM, "usbif", xhci)) ) + printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc); + + DPRINTK(KERN_INFO __FILE__ + ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n", + xhci->usb_ring.sring, virt_to_machine(xhci->usbif), + xhci->evtchn, xhci->irq); + + xhci->state = USBIF_STATE_CONNECTED; + + break; + + default: + printk(KERN_WARNING "Status change to unknown value %d\n", + status->status); + break; + } +} + +/** + * usbif_ctrlif_rx - demux control messages by subtype + */ +static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->subtype ) + { + case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED: + if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) ) + goto parse_error; + usbif_status_change((usbif_fe_interface_status_changed_t *) + &msg->msg[0]); + break; + + /* New interface...? */ + default: + goto parse_error; + } + + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} + + +static int __init xhci_hcd_init(void) +{ + int retval = -ENOMEM, i; + usbif_fe_interface_status_changed_t st; + control_msg_t cmsg; + + if ( (xen_start_info.flags & SIF_INITDOMAIN) + || (xen_start_info.flags & SIF_USB_BE_DOMAIN) ) + return 0; + + info(DRIVER_DESC " " DRIVER_VERSION); + + if (debug) { + errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL); + if (!errbuf) + goto errbuf_failed; + } + + xhci_up_cachep = kmem_cache_create("xhci_urb_priv", + sizeof(struct urb_priv), 0, 0, NULL, NULL); + if (!xhci_up_cachep) + goto up_failed; + + /* Let the domain controller know we're here. For now we wait until + * connection, as for the block and net drivers. This is only strictly + * necessary if we're going to boot off a USB device. */ + printk(KERN_INFO "Initialising Xen virtual USB hub\n"); + + (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + alloc_xhci(); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_USBIF_FE; + cmsg.subtype = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED; + cmsg.length = sizeof(usbif_fe_driver_status_changed_t); + st.status = USBIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + /* + * We should read 'nr_interfaces' from response message and wait + * for notifications before proceeding. For now we assume that we + * will be notified of exactly one interface. + */ + for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } + + if (xhci->state != USBIF_STATE_CONNECTED) + printk(KERN_WARNING "Timeout connecting USB frontend driver!\n"); + + return 0; + +up_failed: + if (errbuf) + kfree(errbuf); + +errbuf_failed: + return retval; +} + +module_init(xhci_hcd_init); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); + diff --git a/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h new file mode 100644 index 0000000000..f503e59ebc --- /dev/null +++ b/linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h @@ -0,0 +1,180 @@ +/****************************************************************************** + * xhci.h + * + * Private definitions for the Xen Virtual USB Controller. Based on + * drivers/usb/host/uhci.h from Linux. Copyright for the imported content is + * retained by the original authors. + * + * Modifications are: + * Copyright (C) 2004 Intel Research Cambridge + * Copyright (C) 2004, 2005 Mark Williamson + */ + +#ifndef __LINUX_XHCI_H +#define __LINUX_XHCI_H + +#include +#include +#include +#include + +/* xhci_port_t - current known state of a virtual hub ports */ +typedef struct { + unsigned int cs :1; /* Connection status. do we really need this /and/ ccs? */ + unsigned int cs_chg :1; /* Connection status change. */ + unsigned int pe :1; /* Port enable. */ + unsigned int pe_chg :1; /* Port enable change. */ + unsigned int ccs :1; /* Current connect status. */ + unsigned int susp :1; /* Suspended. */ + unsigned int lsda :1; /* Low speed device attached. */ + unsigned int pr :1; /* Port reset. */ +} xhci_port_t; + +/* struct virt_root_hub - state related to the virtual root hub */ +struct virt_root_hub { + struct usb_device *dev; + int devnum; /* Address of Root Hub endpoint */ + struct urb *urb; + void *int_addr; + int send; + int interval; + int numports; + int c_p_r[8]; + struct timer_list rh_int_timer; + spinlock_t port_state_lock; + xhci_port_t *ports; +}; + +/* struct xhci - contains the state associated with a single USB interface */ +struct xhci { + +#ifdef CONFIG_PROC_FS + /* procfs */ + int num; + struct proc_dir_entry *proc_entry; +#endif + + int evtchn; /* Interdom channel to backend */ + int irq; /* Bound to evtchn */ + enum { USBIF_STATE_CONNECTED = 2, + USBIF_STATE_DISCONNECTED = 1, + USBIF_STATE_CLOSED = 0 + } state; /* State of this USB interface */ + unsigned long bandwidth; + + struct usb_bus *bus; + + /* Main list of URB's currently controlled by this HC */ + spinlock_t urb_list_lock; + struct list_head urb_list; /* P: xhci->urb_list_lock */ + + /* List of URB's awaiting completion callback */ + spinlock_t complete_list_lock; + struct list_head complete_list; /* P: xhci->complete_list_lock */ + + struct virt_root_hub rh; /* private data of the virtual root hub */ + + usbif_front_ring_t usb_ring; + + int awaiting_reset; +}; + +/* per-URB private data structure for the host controller */ +struct urb_priv { + struct urb *urb; + usbif_iso_t *schedule; + struct usb_device *dev; + + int in_progress : 1; /* QH was queued (not linked in) */ + int short_control_packet : 1; /* If we get a short packet during */ + /* a control transfer, retrigger */ + /* the status phase */ + + int status; /* Final status */ + + unsigned long inserttime; /* In jiffies */ + + struct list_head complete_list; /* P: xhci->complete_list_lock */ +}; + +/* + * Locking in xhci.c + * + * spinlocks are used extensively to protect the many lists and data + * structures we have. It's not that pretty, but it's necessary. We + * need to be done with all of the locks (except complete_list_lock) when + * we call urb->complete. I've tried to make it simple enough so I don't + * have to spend hours racking my brain trying to figure out if the + * locking is safe. + * + * Here's the safe locking order to prevent deadlocks: + * + * #1 xhci->urb_list_lock + * #2 urb->lock + * #3 xhci->urb_remove_list_lock + * #4 xhci->complete_list_lock + * + * If you're going to grab 2 or more locks at once, ALWAYS grab the lock + * at the lowest level FIRST and NEVER grab locks at the same level at the + * same time. + * + * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock + */ + +/* ------------------------------------------------------------------------- + Virtual Root HUB + ------------------------------------------------------------------------- */ +/* destination of request */ +#define RH_DEVICE 0x00 +#define RH_INTERFACE 0x01 +#define RH_ENDPOINT 0x02 +#define RH_OTHER 0x03 + +#define RH_CLASS 0x20 +#define RH_VENDOR 0x40 + +/* Requests: bRequest << 8 | bmRequestType */ +#define RH_GET_STATUS 0x0080 +#define RH_CLEAR_FEATURE 0x0100 +#define RH_SET_FEATURE 0x0300 +#define RH_SET_ADDRESS 0x0500 +#define RH_GET_DESCRIPTOR 0x0680 +#define RH_SET_DESCRIPTOR 0x0700 +#define RH_GET_CONFIGURATION 0x0880 +#define RH_SET_CONFIGURATION 0x0900 +#define RH_GET_STATE 0x0280 +#define RH_GET_INTERFACE 0x0A80 +#define RH_SET_INTERFACE 0x0B00 +#define RH_SYNC_FRAME 0x0C80 +/* Our Vendor Specific Request */ +#define RH_SET_EP 0x2000 + +/* Hub port features */ +#define RH_PORT_CONNECTION 0x00 +#define RH_PORT_ENABLE 0x01 +#define RH_PORT_SUSPEND 0x02 +#define RH_PORT_OVER_CURRENT 0x03 +#define RH_PORT_RESET 0x04 +#define RH_PORT_POWER 0x08 +#define RH_PORT_LOW_SPEED 0x09 +#define RH_C_PORT_CONNECTION 0x10 +#define RH_C_PORT_ENABLE 0x11 +#define RH_C_PORT_SUSPEND 0x12 +#define RH_C_PORT_OVER_CURRENT 0x13 +#define RH_C_PORT_RESET 0x14 + +/* Hub features */ +#define RH_C_HUB_LOCAL_POWER 0x00 +#define RH_C_HUB_OVER_CURRENT 0x01 +#define RH_DEVICE_REMOTE_WAKEUP 0x00 +#define RH_ENDPOINT_STALL 0x01 + +/* Our Vendor Specific feature */ +#define RH_REMOVE_EP 0x00 + +#define RH_ACK 0x01 +#define RH_REQ_ERR -1 +#define RH_NACK 0x00 + +#endif + diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h new file mode 100644 index 0000000000..421a81f17b --- /dev/null +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h @@ -0,0 +1,59 @@ +/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws + * which needs to alter them. */ + +static inline void smpboot_clear_io_apic_irqs(void) +{ +#if 1 + printk("smpboot_clear_io_apic_irqs\n"); +#else + io_apic_irqs = 0; +#endif +} + +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) +{ +#if 1 + printk("smpboot_setup_warm_reset_vector\n"); +#else + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + Dprintk("1.\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + Dprintk("2.\n"); + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + Dprintk("3.\n"); +#endif +} + +static inline void smpboot_restore_warm_reset_vector(void) +{ + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; +} + +static inline void smpboot_setup_io_apic(void) +{ +#if 1 + printk("smpboot_setup_io_apic\n"); +#else + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif +} + + +#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1) diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h index d0acc128a1..e3dfb002c7 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h @@ -114,7 +114,6 @@ static inline unsigned long pgd_val(pgd_t x) if (ret) ret = machine_to_phys(ret); return ret; } -#define pgd_val_ma(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) static inline pte_t __pte(unsigned long x) diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h new file mode 100644 index 0000000000..d7189a7c28 --- /dev/null +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h @@ -0,0 +1,250 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include +#include +#include +#include +#include + +asmlinkage int printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int slock; +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned magic; +#endif +#ifdef CONFIG_PREEMPT + unsigned int break_lock; +#endif +} spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#ifdef CONFIG_DEBUG_SPINLOCK +#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC +#else +#define SPINLOCK_MAGIC_INIT /* */ +#endif + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + "3:\n\t" + +#define spin_lock_string_flags \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ + "jz 3f\n\t" \ + "#sti\n\t" \ + "3:\t" \ + "rep;nop\n\t" \ + "cmpb $0, %0\n\t" \ + "jle 3b\n\t" \ + "#cli\n\t" \ + "jmp 1b\n" \ + "4:\n\t" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) + +#define spin_unlock_string \ + "movb $1,%0" \ + :"=m" (lock->slock) : : "memory" + + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + BUG_ON(lock->magic != SPINLOCK_MAGIC); + BUG_ON(!spin_is_locked(lock)); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#else + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->slock) \ + :"0" (oldval) : "memory" + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + char oldval = 1; +#ifdef CONFIG_DEBUG_SPINLOCK + BUG_ON(lock->magic != SPINLOCK_MAGIC); + BUG_ON(!spin_is_locked(lock)); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#endif + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + if (unlikely(lock->magic != SPINLOCK_MAGIC)) { + printk("eip: %p\n", __builtin_return_address(0)); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->slock) : : "memory"); +} + +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + if (unlikely(lock->magic != SPINLOCK_MAGIC)) { + printk("eip: %p\n", __builtin_return_address(0)); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string_flags + :"=m" (lock->slock) : "r" (flags) : "memory"); +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile unsigned int lock; +#ifdef CONFIG_DEBUG_SPINLOCK + unsigned magic; +#endif +#ifdef CONFIG_PREEMPT + unsigned int break_lock; +#endif +} rwlock_t; + +#define RWLOCK_MAGIC 0xdeaf1eed + +#ifdef CONFIG_DEBUG_SPINLOCK +#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT /* */ +#endif + +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +/** + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +#define read_can_lock(x) ((int)(x)->lock > 0) + +/** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) + +/* + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + * + * The inline assembly is non-obvious. Think about it. + * + * Changed to use the same technique as rw semaphores. See + * semaphore.h for details. -ben + */ +/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ + +static inline void _raw_read_lock(rwlock_t *rw) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + BUG_ON(rw->magic != RWLOCK_MAGIC); +#endif + __build_read_lock(rw, "__read_lock_failed"); +} + +static inline void _raw_write_lock(rwlock_t *rw) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + BUG_ON(rw->magic != RWLOCK_MAGIC); +#endif + __build_write_lock(rw, "__write_lock_failed"); +} + +#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + +static inline int _raw_read_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + atomic_dec(count); + if (atomic_read(count) >= 0) + return 1; + atomic_inc(count); + return 0; +} + +static inline int _raw_write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +#endif /* __ASM_SPINLOCK_H */