Update to Linux 2.6.11.
Signed-off-by: Christian Limpach <chris@xensource.com>
3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.29-xen-sparse/mm/swapfile.c
41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.29-xen-sparse/mm/vmalloc.c
41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.29-xen-sparse/net/core/skbuff.c
-41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
-41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
-41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
-418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
-41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
-41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
-41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
-41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
-41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
-41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
-41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.10-xen-sparse/drivers/xen/usbback/common.h
-41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.10-xen-sparse/drivers/xen/usbback/control.c
-41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c
-41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c
-41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c
-41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.10-xen-sparse/drivers/xen/usbfront/xhci.h
-41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
-4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.11-xen-sparse/arch/xen/Kconfig
40f56237utH41NPukqHksuNf29IC9A linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers
40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.11-xen-sparse/arch/xen/Makefile
40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S
40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c
+41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/irq.c
40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c
4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c
40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c
40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c
40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c
+41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smp.c
+41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c
40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c
40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile
40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c
414c113396tK1HTVeUalm3u-1DF16g linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c
+418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.11-xen-sparse/arch/xen/kernel/smp.c
3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c
41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.11-xen-sparse/drivers/Makefile
4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.11-xen-sparse/drivers/char/mem.c
40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
+41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.11-xen-sparse/drivers/xen/blktap/Makefile
+41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.c
+41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap.h
+41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
+41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c
+41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c
40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.11-xen-sparse/drivers/xen/console/Makefile
3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.11-xen-sparse/drivers/xen/console/console.c
40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile
405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile
3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c
+41ee5e8bYDQkjRVKnFn5uFyy0KreCw linux-2.6.11-xen-sparse/drivers/xen/usbback/common.h
+41ee5e8bt7xeBUJqG5XJS-ofukdsgA linux-2.6.11-xen-sparse/drivers/xen/usbback/control.c
+41ee5e8bSs3BGC7yegM_ek2Tn0Ahvw linux-2.6.11-xen-sparse/drivers/xen/usbback/interface.c
+41ee5e8bglvqKvZSY5uJ5JGQejEwyQ linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
+41ee5e8ckZ9xVNvu9NHIZDK7JqApmQ linux-2.6.11-xen-sparse/drivers/xen/usbfront/usbfront.c
+41ee5e8ck9scpGirfqEZRARbGDyTXA linux-2.6.11-xen-sparse/drivers/xen/usbfront/xhci.h
412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h
40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h
4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
+41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h
40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h
412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h
40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h
40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h
+4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h
40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
+++ /dev/null
-/*
- * linux/arch/i386/kernel/irq.c
- *
- * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
- *
- * This file contains the lowest level x86-specific interrupt
- * entry, irq-stacks and irq statistics code. All the remaining
- * irq logic is done by the generic kernel/irq/ code and
- * by the x86-specific irq controller code. (e.g. i8259.c and
- * io_apic.c.)
- */
-
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/kernel_stat.h>
-
-#ifndef CONFIG_X86_LOCAL_APIC
-/*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themselves.
- */
-void ack_bad_irq(unsigned int irq)
-{
- printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-#endif
-
-#ifdef CONFIG_4KSTACKS
-/*
- * per-CPU IRQ handling contexts (thread information and stack)
- */
-union irq_ctx {
- struct thread_info tinfo;
- u32 stack[THREAD_SIZE/sizeof(u32)];
-};
-
-static union irq_ctx *hardirq_ctx[NR_CPUS];
-static union irq_ctx *softirq_ctx[NR_CPUS];
-#endif
-
-/*
- * do_IRQ handles all normal device IRQ's (the special
- * SMP cross-CPU interrupts have their own specific
- * handlers).
- */
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
-{
- /* high bits used in ret_from_ code */
- int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
-#ifdef CONFIG_4KSTACKS
- union irq_ctx *curctx, *irqctx;
- u32 *isp;
-#endif
-
- irq_enter();
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- /* Debugging check for stack overflow: is there less than 1KB free? */
- {
- long esp;
-
- __asm__ __volatile__("andl %%esp,%0" :
- "=r" (esp) : "0" (THREAD_SIZE - 1));
- if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
- esp - sizeof(struct thread_info));
- dump_stack();
- }
- }
-#endif
-
-#ifdef CONFIG_4KSTACKS
-
- curctx = (union irq_ctx *) current_thread_info();
- irqctx = hardirq_ctx[smp_processor_id()];
-
- /*
- * this is where we switch to the IRQ stack. However, if we are
- * already using the IRQ stack (because we interrupted a hardirq
- * handler) we can't do that and just have to keep using the
- * current stack (which is the irq stack already after all)
- */
- if (curctx != irqctx) {
- int arg1, arg2, ebx;
-
- /* build the stack frame on the IRQ stack */
- isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
- irqctx->tinfo.task = curctx->tinfo.task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call __do_IRQ \n"
- " movl %%ebx,%%esp \n"
- : "=a" (arg1), "=d" (arg2), "=b" (ebx)
- : "0" (irq), "1" (regs), "2" (isp)
- : "memory", "cc", "ecx"
- );
- } else
-#endif
- __do_IRQ(irq, regs);
-
- irq_exit();
-
- return 1;
-}
-
-#ifdef CONFIG_4KSTACKS
-
-/*
- * These should really be __section__(".bss.page_aligned") as well, but
- * gcc's 3.0 and earlier don't handle that correctly.
- */
-static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
-
-/*
- * allocate per-cpu stacks for hardirq and for softirq processing
- */
-void irq_ctx_init(int cpu)
-{
- union irq_ctx *irqctx;
-
- if (hardirq_ctx[cpu])
- return;
-
- irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
- irqctx->tinfo.task = NULL;
- irqctx->tinfo.exec_domain = NULL;
- irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
-
- hardirq_ctx[cpu] = irqctx;
-
- irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
- irqctx->tinfo.task = NULL;
- irqctx->tinfo.exec_domain = NULL;
- irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET;
- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
-
- softirq_ctx[cpu] = irqctx;
-
- printk("CPU %u irqstacks, hard=%p soft=%p\n",
- cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
-}
-
-extern asmlinkage void __do_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- unsigned long flags;
- struct thread_info *curctx;
- union irq_ctx *irqctx;
- u32 *isp;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- curctx = current_thread_info();
- irqctx = softirq_ctx[smp_processor_id()];
- irqctx->tinfo.task = curctx->task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- /* build the stack frame on the softirq stack */
- isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
-
- asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call __do_softirq \n"
- " movl %%ebx,%%esp \n"
- : "=b"(isp)
- : "0"(isp)
- : "memory", "cc", "edx", "ecx", "eax"
- );
- }
-
- local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(do_softirq);
-#endif
-
-/*
- * Interrupt statistics:
- */
-
-atomic_t irq_err_count;
-
-/*
- * /proc/interrupts printing:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for (j=0; j<NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "CPU%d ",j);
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#endif
- seq_printf(p, " %14s", irq_desc[i].handler->typename);
- seq_printf(p, " %s", action->name);
-
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
-
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS) {
- seq_printf(p, "NMI: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ", nmi_count(j));
- seq_putc(p, '\n');
-#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "LOC: ");
- for (j = 0; j < NR_CPUS; j++)
- if (cpu_online(j))
- seq_printf(p, "%10u ",
- irq_stat[j].apic_timer_irqs);
- seq_putc(p, '\n');
-#endif
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
-#endif
- }
- return 0;
-}
+++ /dev/null
-/*
- * Intel SMP support routines.
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/irq.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#if 0
-#include <mach_apic.h>
-#endif
-#include <asm-xen/evtchn.h>
-
-#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
-
-/*
- * Some notes on x86 processor bugs affecting SMP operation:
- *
- * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- * The Linux implications for SMP are handled as follows:
- *
- * Pentium III / [Xeon]
- * None of the E1AP-E3AP errata are visible to the user.
- *
- * E1AP. see PII A1AP
- * E2AP. see PII A2AP
- * E3AP. see PII A3AP
- *
- * Pentium II / [Xeon]
- * None of the A1AP-A3AP errata are visible to the user.
- *
- * A1AP. see PPro 1AP
- * A2AP. see PPro 2AP
- * A3AP. see PPro 7AP
- *
- * Pentium Pro
- * None of 1AP-9AP errata are visible to the normal user,
- * except occasional delivery of 'spurious interrupt' as trap #15.
- * This is very rare and a non-problem.
- *
- * 1AP. Linux maps APIC as non-cacheable
- * 2AP. worked around in hardware
- * 3AP. fixed in C0 and above steppings microcode update.
- * Linux does not use excessive STARTUP_IPIs.
- * 4AP. worked around in hardware
- * 5AP. symmetric IO mode (normal Linux operation) not affected.
- * 'noapic' mode has vector 0xf filled out properly.
- * 6AP. 'noapic' mode might be affected - fixed in later steppings
- * 7AP. We do not assume writes to the LVT deassering IRQs
- * 8AP. We do not enable low power mode (deep sleep) during MP bootup
- * 9AP. We do not use mixed mode
- *
- * Pentium
- * There is a marginal case where REP MOVS on 100MHz SMP
- * machines with B stepping processors can fail. XXX should provide
- * an L1cache=Writethrough or L1cache=off option.
- *
- * B stepping CPUs may hang. There are hardware work arounds
- * for this. We warn about it in case your board doesn't have the work
- * arounds. Basically thats so I can tell anyone with a B stepping
- * CPU and SMP problems "tough".
- *
- * Specific items [From Pentium Processor Specification Update]
- *
- * 1AP. Linux doesn't use remote read
- * 2AP. Linux doesn't trust APIC errors
- * 3AP. We work around this
- * 4AP. Linux never generated 3 interrupts of the same priority
- * to cause a lost local interrupt.
- * 5AP. Remote read is never used
- * 6AP. not affected - worked around in hardware
- * 7AP. not affected - worked around in hardware
- * 8AP. worked around in hardware - we get explicit CS errors if not
- * 9AP. only 'noapic' mode affected. Might generate spurious
- * interrupts, we log only the first one and count the
- * rest silently.
- * 10AP. not affected - worked around in hardware
- * 11AP. Linux reads the APIC between writes to avoid this, as per
- * the documentation. Make sure you preserve this as it affects
- * the C stepping chips too.
- * 12AP. not affected - worked around in hardware
- * 13AP. not affected - worked around in hardware
- * 14AP. we always deassert INIT during bootup
- * 15AP. not affected - worked around in hardware
- * 16AP. not affected - worked around in hardware
- * 17AP. not affected - worked around in hardware
- * 18AP. not affected - worked around in hardware
- * 19AP. not affected - worked around in BIOS
- *
- * If this sounds worrying believe me these bugs are either ___RARE___,
- * or are signal timing bugs worked around in hardware and there's
- * about nothing of note with C stepping upwards.
- */
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
- return SET_APIC_DEST_FIELD(mask);
-}
-
-DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
-
-static inline void __send_IPI_one(unsigned int cpu, int vector)
-{
- unsigned int evtchn;
-
- evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
- // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
- if (evtchn) {
-#if 0
- shared_info_t *s = HYPERVISOR_shared_info;
- while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
- synch_test_bit(evtchn, &s->evtchn_mask[0]))
- ;
-#endif
- notify_via_evtchn(evtchn);
- } else
- printk("send_IPI to unbound port %d/%d",
- cpu, vector);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
- int cpu;
-
- switch (shortcut) {
- case APIC_DEST_SELF:
- __send_IPI_one(smp_processor_id(), vector);
- break;
- case APIC_DEST_ALLBUT:
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu == smp_processor_id())
- continue;
- if (cpu_isset(cpu, cpu_online_map)) {
- __send_IPI_one(cpu, vector);
- }
- }
- break;
- default:
- printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
- vector);
- break;
- }
-}
-
-void fastcall send_IPI_self(int vector)
-{
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(cpumask_t mask, int vector)
-{
- unsigned long flags;
- unsigned int cpu;
-
- local_irq_save(flags);
-
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
- if (cpu_isset(cpu, mask)) {
- __send_IPI_one(cpu, vector);
- }
- }
-
- local_irq_restore(flags);
-}
-
-inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
-{
-
- send_IPI_mask_bitmask(mask, vector);
-}
-
-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
-#define FLUSH_ALL 0xffffffff
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-static inline void leave_mm (unsigned long cpu)
-{
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
- BUG();
- cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
- load_cr3(swapper_pg_dir);
-}
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
-{
- unsigned long cpu;
-
- cpu = get_cpu();
-
- if (!cpu_isset(cpu, flush_cpumask))
- goto out;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
- if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
- if (flush_va == FLUSH_ALL)
- local_flush_tlb();
- else
- __flush_tlb_one(flush_va);
- } else
- leave_mm(cpu);
- }
- smp_mb__before_clear_bit();
- cpu_clear(cpu, flush_cpumask);
- smp_mb__after_clear_bit();
-out:
- put_cpu_no_resched();
-
- return IRQ_HANDLED;
-}
-
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
- unsigned long va)
-{
- cpumask_t tmp;
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - we do not send IPIs to not-yet booted CPUs.
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- BUG_ON(cpus_empty(cpumask));
-
- cpus_and(tmp, cpumask, cpu_online_map);
- BUG_ON(!cpus_equal(cpumask, tmp));
- BUG_ON(cpu_isset(smp_processor_id(), cpumask));
- BUG_ON(!mm);
-
- /*
- * i'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * Temporarily this turns IRQs off, so that lockups are
- * detected by the NMI watchdog.
- */
- spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
-#if NR_CPUS <= BITS_PER_LONG
- atomic_set_mask(cpumask, &flush_cpumask);
-#else
- {
- int k;
- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
- unsigned long *cpu_mask = (unsigned long *)&cpumask;
- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
- }
-#endif
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
- while (!cpus_empty(flush_cpumask))
- /* nothing. lockup detection does not belong here */
- mb();
-
- flush_mm = NULL;
- flush_va = 0;
- spin_unlock(&tlbstate_lock);
-}
-
-void flush_tlb_current_task(void)
-{
- struct mm_struct *mm = current->mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- local_flush_tlb();
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
- preempt_enable();
-}
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
- preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
- struct mm_struct *mm = vma->vm_mm;
- cpumask_t cpu_mask;
-
- preempt_disable();
- cpu_mask = mm->cpu_vm_mask;
- cpu_clear(smp_processor_id(), cpu_mask);
-
- if (current->active_mm == mm) {
- if(current->mm)
- __flush_tlb_one(va);
- else
- leave_mm(smp_processor_id());
- }
-
- if (!cpus_empty(cpu_mask))
- flush_tlb_others(cpu_mask, mm, va);
-
- preempt_enable();
-}
-
-static void do_flush_tlb_all(void* info)
-{
- unsigned long cpu = smp_processor_id();
-
- __flush_tlb_all();
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
- leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-void smp_send_reschedule(int cpu)
-{
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-{
- struct call_data_struct data;
- int cpus = num_online_cpus()-1;
-
- if (!cpus)
- return 0;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- spin_lock(&call_lock);
- call_data = &data;
- mb();
-
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- barrier();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- barrier();
- spin_unlock(&call_lock);
-
- return 0;
-}
-
-static void stop_this_cpu (void * dummy)
-{
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
-#if 1
- xxprint("stop_this_cpu disable_local_APIC\n");
-#else
- disable_local_APIC();
-#endif
- if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) __asm__("hlt");
- for (;;);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-void smp_send_stop(void)
-{
- smp_call_function(stop_this_cpu, NULL, 1, 0);
-
- local_irq_disable();
-#if 1
- xxprint("smp_send_stop disable_local_APIC\n");
-#else
- disable_local_APIC();
-#endif
- local_irq_enable();
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
-{
-
- return IRQ_HANDLED;
-}
-
-#include <linux/kallsyms.h>
-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
-{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- irq_enter();
- (*func)(info);
- irq_exit();
-
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
-
- return IRQ_HANDLED;
-}
-
+++ /dev/null
-/*
- * x86 SMP booting functions
- *
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- * Much of the core SMP work is based on previous work by Thomas Radke, to
- * whom a great many thanks are extended.
- *
- * Thanks to Intel for making available several different Pentium,
- * Pentium Pro and Pentium-II/Xeon MP machines.
- * Original development of Linux SMP code supported by Caldera.
- *
- * This code is released under the GNU General Public License version 2 or
- * later.
- *
- * Fixes
- * Felix Koop : NR_CPUS used properly
- * Jose Renau : Handle single CPU case.
- * Alan Cox : By repeated request 8) - Total BogoMIPS report.
- * Greg Wright : Fix for kernel stacks panic.
- * Erich Boleyn : MP v1.4 and additional changes.
- * Matthias Sattler : Changes for 2.1 kernel map.
- * Michel Lespinasse : Changes for 2.1 kernel map.
- * Michael Chastain : Change trampoline.S to gnu as.
- * Alan Cox : Dumb bug: 'B' step PPro's are fine
- * Ingo Molnar : Added APIC timers, based on code
- * from Jose Renau
- * Ingo Molnar : various cleanups and rewrites
- * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs
- * Martin J. Bligh : Added support for multi-quad systems
- * Dave Jones : Report invalid combinations of Athlon CPUs.
-* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
-
-#include <linux/module.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
-#include <linux/irq.h>
-#include <linux/bootmem.h>
-
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
-#include <asm/desc.h>
-#include <asm/arch_hooks.h>
-
-#if 1
-#define Dprintk(args...)
-#else
-#include <mach_apic.h>
-#endif
-#include <mach_wakecpu.h>
-#include <smpboot_hooks.h>
-
-/* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
-
-/* bitmap of online cpus */
-cpumask_t cpu_online_map;
-
-static cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-static cpumask_t smp_commenced_mask;
-
-/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-
-u8 x86_cpu_to_apicid[NR_CPUS] =
- { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-#if 0
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
-static unsigned char *trampoline_base;
-static int trampoline_exec;
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __init setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
-}
-#endif
-
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-#if 1
- int cpu;
-
- for (cpu = 1; cpu < NR_CPUS; cpu++) {
- cpu_gdt_descr[cpu].address = (unsigned long)
- alloc_bootmem_low_pages(PAGE_SIZE);
- /* XXX free unused pages later */
- }
-#else
- trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
- /*
- * Has to be in very low memory so we can execute
- * real-mode AP code.
- */
- if (__pa(trampoline_base) >= 0x9F000)
- BUG();
- /*
- * Make the SMP trampoline executable:
- */
- trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
-#endif
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __init smp_store_cpu_info(int id)
-{
- struct cpuinfo_x86 *c = cpu_data + id;
-
- *c = boot_cpu_data;
- if (id!=0)
- identify_cpu(c);
- /*
- * Mask B, Pentium, but not Pentium MMX
- */
- if (c->x86_vendor == X86_VENDOR_INTEL &&
- c->x86 == 5 &&
- c->x86_mask >= 1 && c->x86_mask <= 4 &&
- c->x86_model <= 3)
- /*
- * Remember we have B step Pentia with bugs
- */
- smp_b_stepping = 1;
-
- /*
- * Certain Athlons might work (for various values of 'work') in SMP
- * but they are not certified as MP capable.
- */
- if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
- /* Athlon 660/661 is valid. */
- if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
- goto valid_k7;
-
- /* Duron 670 is valid */
- if ((c->x86_model==7) && (c->x86_mask==0))
- goto valid_k7;
-
- /*
- * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
- * It's worth noting that the A5 stepping (662) of some Athlon XP's
- * have the MP bit set.
- * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
- */
- if (((c->x86_model==6) && (c->x86_mask>=2)) ||
- ((c->x86_model==7) && (c->x86_mask>=1)) ||
- (c->x86_model> 7))
- if (cpu_has_mp)
- goto valid_k7;
-
- /* If we get here, it's not a certified SMP capable AMD system. */
- tainted |= TAINT_UNSAFE_SMP;
- }
-
-valid_k7:
- ;
-}
-
-#if 0
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned long one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
-
- atomic_set(&tsc_start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
-
- rdtscll(tsc_values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
- }
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
-
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
- }
-
- sum += delta;
- }
- if (!buggy)
- printk("passed.\n");
-}
-
-static void __init synchronize_tsc_ap (void)
-{
- int i;
-
- /*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc_start_flag)) mb();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
-
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
- }
-}
-#undef NR_LOOPS
-#endif
-
-extern void calibrate_delay(void);
-
-static atomic_t init_deasserted;
-
-void __init smp_callin(void)
-{
- int cpuid, phys_id;
- unsigned long timeout;
-
-#if 0
- /*
- * If waken up by an INIT in an 82489DX configuration
- * we may get here before an INIT-deassert IPI reaches
- * our local APIC. We have to wait for the IPI or we'll
- * lock up on an APIC access.
- */
- wait_for_init_deassert(&init_deasserted);
-#endif
-
- /*
- * (This works even if the APIC is not enabled.)
- */
- phys_id = smp_processor_id();
- cpuid = smp_processor_id();
- if (cpu_isset(cpuid, cpu_callin_map)) {
- printk("huh, phys CPU#%d, CPU#%d already present??\n",
- phys_id, cpuid);
- BUG();
- }
- Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
- /*
- * STARTUP IPIs are fragile beasts as they might sometimes
- * trigger some glue motherboard logic. Complete APIC bus
- * silence for 1 second, this overestimates the time the
- * boot CPU is spending to send the up to 2 STARTUP IPIs
- * by a factor of two. This should be enough.
- */
-
- /*
- * Waiting 2s total for startup (udelay is not yet working)
- */
- timeout = jiffies + 2*HZ;
- while (time_before(jiffies, timeout)) {
- /*
- * Has the boot CPU finished it's STARTUP sequence?
- */
- if (cpu_isset(cpuid, cpu_callout_map))
- break;
- rep_nop();
- }
-
- if (!time_before(jiffies, timeout)) {
- printk("BUG: CPU%d started up but did not get a callout!\n",
- cpuid);
- BUG();
- }
-
-#if 0
- /*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
- */
-
- Dprintk("CALLIN, before setup_local_APIC().\n");
- smp_callin_clear_local_apic();
- setup_local_APIC();
-#endif
- map_cpu_to_logical_apicid();
-
- local_irq_enable();
-
- /*
- * Get our bogomips.
- */
- calibrate_delay();
- Dprintk("Stack at about %p\n",&cpuid);
-
- /*
- * Save our processor parameters
- */
- smp_store_cpu_info(cpuid);
-
-#if 0
- disable_APIC_timer();
-#endif
- local_irq_disable();
- /*
- * Allow the master to continue.
- */
- cpu_set(cpuid, cpu_callin_map);
-
-#if 0
- /*
- * Synchronize the TSC with the BP
- */
- if (cpu_has_tsc && cpu_khz)
- synchronize_tsc_ap();
-#endif
-}
-
-int cpucount;
-
-extern int cpu_idle(void);
-
-
-static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
-{
-
- return IRQ_HANDLED;
-}
-
-static struct irqaction local_irq_debug = {
- local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
- NULL, NULL
-};
-
-void local_setup_debug(void)
-{
- (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
-}
-
-
-extern void local_setup_timer(void);
-
-/*
- * Activate a secondary processor.
- */
-int __init start_secondary(void *unused)
-{
- /*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
- */
- cpu_init();
- smp_callin();
- while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
- rep_nop();
- local_setup_timer();
- local_setup_debug(); /* XXX */
- smp_intr_init();
- local_irq_enable();
- /*
- * low-memory mappings have been cleared, flush them from
- * the local TLBs too.
- */
- local_flush_tlb();
- cpu_set(smp_processor_id(), cpu_online_map);
- wmb();
- if (0) {
- char *msg2 = "delay2\n";
- int timeout;
- for (timeout = 0; timeout < 50000; timeout++) {
- udelay(1000);
- if (timeout == 2000) {
- (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
- timeout = 0;
- }
- }
- }
- return cpu_idle();
-}
-
-/*
- * Everything has been set up for the secondary
- * CPUs - they just need to reload everything
- * from the task structure
- * This function must not return.
- */
-void __init initialize_secondary(void)
-{
- /*
- * We don't actually need to load the full TSS,
- * basically just the stack pointer and the eip.
- */
-
- asm volatile(
- "movl %0,%%esp\n\t"
- "jmp *%1"
- :
- :"r" (current->thread.esp),"r" (current->thread.eip));
-}
-
-extern struct {
- void * esp;
- unsigned short ss;
-} stack_start;
-
-#ifdef CONFIG_NUMA
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
- { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-/* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_2_node);
-
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
- printk("Mapping cpu %d to node %d\n", cpu, node);
- cpu_set(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk("Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node ++)
- cpu_clear(cpu, node_2_cpu_mask[node]);
- cpu_2_node[cpu] = 0;
-}
-#else /* !CONFIG_NUMA */
-
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-
-#endif /* CONFIG_NUMA */
-
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-
-void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = smp_processor_id();
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, apicid_to_node(apicid));
-}
-
-void unmap_cpu_to_logical_apicid(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-
-#if APIC_DEBUG
-static inline void __inquire_remote_apic(int apicid)
-{
- int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
- char *names[] = { "ID", "VERSION", "SPIV" };
- int timeout, status;
-
- printk("Inquiring remote APIC #%d...\n", apicid);
-
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
- printk("... APIC #%d %s: ", apicid, names[i]);
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
- timeout = 0;
- do {
- udelay(100);
- status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
- } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
- switch (status) {
- case APIC_ICR_RR_VALID:
- status = apic_read(APIC_RRR);
- printk("%08x\n", status);
- break;
- default:
- printk("failed\n");
- }
- }
-}
-#endif
-
-#if 0
-#ifdef WAKE_SECONDARY_VIA_NMI
-/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-static int __init
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int timeout, maxlvt;
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- maxlvt = get_maxlvt();
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- Dprintk("NMI sent.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
- unsigned long send_status = 0, accept_status = 0;
- int maxlvt, timeout, num_starts, j;
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- Dprintk("Asserting INIT.\n");
-
- /*
- * Turn INIT on target chip
- */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /*
- * Send IPI
- */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
- | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- mdelay(10);
-
- Dprintk("Deasserting INIT.\n");
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Send IPI */
- apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- atomic_set(&init_deasserted, 1);
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't send the STARTUP IPIs.
- */
- if (APIC_INTEGRATED(apic_version[phys_apicid]))
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
- Dprintk("#startup loops: %d.\n", num_starts);
-
- maxlvt = get_maxlvt();
-
- for (j = 1; j <= num_starts; j++) {
- Dprintk("Sending STARTUP #%d.\n",j);
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- Dprintk("After apic_write.\n");
-
- /*
- * STARTUP IPI
- */
-
- /* Target chip */
- apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
- /* Boot on the stack */
- /* Kick the second */
- apic_write_around(APIC_ICR, APIC_DM_STARTUP
- | (start_eip >> 12));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(300);
-
- Dprintk("Startup point 1.\n");
-
- Dprintk("Waiting for send to finish...\n");
- timeout = 0;
- do {
- Dprintk("+");
- udelay(100);
- send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
- } while (send_status && (timeout++ < 1000));
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
- /*
- * Due to the Pentium erratum 3AP.
- */
- if (maxlvt > 3) {
- apic_read_around(APIC_SPIV);
- apic_write(APIC_ESR, 0);
- }
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- if (send_status || accept_status)
- break;
- }
- Dprintk("After Startup.\n");
-
- if (send_status)
- printk("APIC never delivered???\n");
- if (accept_status)
- printk("APIC delivery error (%lx).\n", accept_status);
-
- return (send_status | accept_status);
-}
-#endif /* WAKE_SECONDARY_VIA_INIT */
-#endif
-
-extern cpumask_t cpu_initialized;
-
-static int __init do_boot_cpu(int apicid)
-/*
- * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
- * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
- */
-{
- struct task_struct *idle;
- unsigned long boot_error;
- int timeout, cpu;
- unsigned long start_eip;
-#if 0
- unsigned short nmi_high = 0, nmi_low = 0;
-#endif
- full_execution_context_t ctxt;
- extern void startup_32_smp(void);
- extern void hypervisor_callback(void);
- extern void failsafe_callback(void);
- extern int smp_trap_init(trap_info_t *);
- int i;
-
- cpu = ++cpucount;
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- idle = fork_idle(cpu);
- if (IS_ERR(idle))
- panic("failed fork for CPU %d", cpu);
- idle->thread.eip = (unsigned long) start_secondary;
- /* start_eip had better be page-aligned! */
- start_eip = (unsigned long)startup_32_smp;
-
- /* So we see what's up */
- printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
- /* Stack for startup_32 can be just as for start_secondary onwards */
- stack_start.esp = (void *) idle->thread.esp;
-
- irq_ctx_init(cpu);
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- atomic_set(&init_deasserted, 0);
-
-#if 1
- if (cpu_gdt_descr[0].size > PAGE_SIZE)
- BUG();
- cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
- memcpy((void *)cpu_gdt_descr[cpu].address,
- (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
- memset((char *)cpu_gdt_descr[cpu].address +
- FIRST_RESERVED_GDT_ENTRY * 8, 0,
- NR_RESERVED_GDT_ENTRIES * 8);
-
- memset(&ctxt, 0, sizeof(ctxt));
-
- ctxt.cpu_ctxt.ds = __USER_DS;
- ctxt.cpu_ctxt.es = __USER_DS;
- ctxt.cpu_ctxt.fs = 0;
- ctxt.cpu_ctxt.gs = 0;
- ctxt.cpu_ctxt.ss = __KERNEL_DS;
- ctxt.cpu_ctxt.cs = __KERNEL_CS;
- ctxt.cpu_ctxt.eip = start_eip;
- ctxt.cpu_ctxt.esp = idle->thread.esp;
- ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
-
- /* FPU is set up to default initial state. */
- memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
- /* Virtual IDT is empty at start-of-day. */
- for ( i = 0; i < 256; i++ )
- {
- ctxt.trap_ctxt[i].vector = i;
- ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
- }
- ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
-
- /* No LDT. */
- ctxt.ldt_ents = 0;
-
- {
- unsigned long va;
- int f;
-
- for (va = cpu_gdt_descr[cpu].address, f = 0;
- va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
- va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
- make_page_readonly((void *)va);
- }
- ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
- flush_page_update_queue();
- }
-
- /* Ring 1 stack is the initial stack. */
- ctxt.kernel_ss = __KERNEL_DS;
- ctxt.kernel_esp = idle->thread.esp;
-
- /* Callback handlers. */
- ctxt.event_callback_cs = __KERNEL_CS;
- ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
- ctxt.failsafe_callback_cs = __KERNEL_CS;
- ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
- ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
-
- boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- }
- }
- x86_cpu_to_apicid[cpu] = apicid;
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
- cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpucount--;
- }
-
-#else
- Dprintk("Setting warm reset code and vector.\n");
-
- store_NMI_vector(&nmi_high, &nmi_low);
-
- smpboot_setup_warm_reset_vector(start_eip);
-
- /*
- * Starting actual IPI sequence...
- */
- boot_error = wakeup_secondary_cpu(apicid, start_eip);
-
- if (!boot_error) {
- /*
- * allow APs to start initializing.
- */
- Dprintk("Before Callout %d.\n", cpu);
- cpu_set(cpu, cpu_callout_map);
- Dprintk("After Callout %d.\n", cpu);
-
- /*
- * Wait 5s total for a response
- */
- for (timeout = 0; timeout < 50000; timeout++) {
- if (cpu_isset(cpu, cpu_callin_map))
- break; /* It has booted */
- udelay(100);
- }
-
- if (cpu_isset(cpu, cpu_callin_map)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- Dprintk("OK.\n");
- printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
- Dprintk("CPU has booted.\n");
- } else {
- boot_error= 1;
- if (*((volatile unsigned char *)trampoline_base)
- == 0xA5)
- /* trampoline started but...? */
- printk("Stuck ??\n");
- else
- /* trampoline code not run */
- printk("Not responding.\n");
- inquire_remote_apic(apicid);
- }
- }
- x86_cpu_to_apicid[cpu] = apicid;
- if (boot_error) {
- /* Try to put things back the way they were before ... */
- unmap_cpu_to_logical_apicid(cpu);
- cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
- cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpucount--;
- }
-
- /* mark "stuck" area as not stuck */
- *((volatile unsigned long *)trampoline_base) = 0;
-#endif
-
- return boot_error;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-static void smp_tune_scheduling (void)
-{
- unsigned long cachesize; /* kB */
- unsigned long bandwidth = 350; /* MB/s */
- /*
- * Rough estimation for SMP scheduling, this is the number of
- * cycles it takes for a fully memory-limited process to flush
- * the SMP-local cache.
- *
- * (For a P5 this pretty much means we will choose another idle
- * CPU almost always at wakeup time (this is due to the small
- * L1 cache), on PIIs it's around 50-100 usecs, depending on
- * the cache size)
- */
-
- if (!cpu_khz) {
- /*
- * this basically disables processor-affinity
- * scheduling on SMP without a TSC.
- */
- cacheflush_time = 0;
- return;
- } else {
- cachesize = boot_cpu_data.x86_cache_size;
- if (cachesize == -1) {
- cachesize = 16; /* Pentiums, 2x8kB cache */
- bandwidth = 100;
- }
-
- cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
- }
-
- cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
-
- printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
- (long)cacheflush_time/(cpu_khz/1000),
- ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
- printk("task migration cache decay timeout: %ld msecs.\n",
- cache_decay_ticks);
-}
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-#if 0
-static int boot_cpu_logical_apicid;
-#endif
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
- int cpu, kicked;
- unsigned long bogosum = 0;
-#if 0
- int apicid, bit;
-#endif
-
- /*
- * Setup boot CPU information
- */
- smp_store_cpu_info(0); /* Final full version of the data */
- printk("CPU%d: ", 0);
- print_cpu_info(&cpu_data[0]);
-
-#if 0
- boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
- boot_cpu_logical_apicid = logical_smp_processor_id();
- x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
-#else
- // boot_cpu_physical_apicid = 0;
- // boot_cpu_logical_apicid = 0;
- x86_cpu_to_apicid[0] = 0;
-#endif
-
- current_thread_info()->cpu = 0;
- smp_tune_scheduling();
- cpus_clear(cpu_sibling_map[0]);
- cpu_set(0, cpu_sibling_map[0]);
-
- /*
- * If we couldn't find an SMP configuration at boot time,
- * get out of here now!
- */
- if (!smp_found_config /* && !acpi_lapic) */) {
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
- smpboot_clear_io_apic_irqs();
-#if 0
- phys_cpu_present_map = physid_mask_of_physid(0);
- if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
-#endif
- map_cpu_to_logical_apicid();
- return;
- }
-
-#if 0
- /*
- * Should not be necessary because the MP table should list the boot
- * CPU too, but we do it for the sake of robustness anyway.
- * Makes no sense to do this check in clustered apic mode, so skip it
- */
- if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
- printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
- physid_set(hard_smp_processor_id(), phys_cpu_present_map);
- }
-
- /*
- * If we couldn't find a local APIC, then get out of here now!
- */
- if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
- printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
- boot_cpu_physical_apicid);
- printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
- smpboot_clear_io_apic_irqs();
- phys_cpu_present_map = physid_mask_of_physid(0);
- return;
- }
-
- verify_local_APIC();
-#endif
-
- /*
- * If SMP should be disabled, then really disable it!
- */
- if (!max_cpus) {
- HYPERVISOR_shared_info->n_vcpu = 1;
- printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
- smpboot_clear_io_apic_irqs();
-#if 0
- phys_cpu_present_map = physid_mask_of_physid(0);
-#endif
- return;
- }
-
- smp_intr_init();
-
-#if 0
- connect_bsp_APIC();
- setup_local_APIC();
-#endif
- map_cpu_to_logical_apicid();
-#if 0
-
-
- setup_portio_remap();
-
- /*
- * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
- *
- * In clustered apic mode, phys_cpu_present_map is a constructed thus:
- * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
- * clustered apic ID.
- */
- Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-#endif
- Dprintk("CPU present map: %lx\n",
- (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
-
- kicked = 1;
- for (cpu = 1; kicked < NR_CPUS &&
- cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
- if (max_cpus <= cpucount+1)
- continue;
-
- if (do_boot_cpu(cpu))
- printk("CPU #%d not responding - cannot use it.\n",
- cpu);
- else
- ++kicked;
- }
-
-#if 0
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
-#endif
-
- /*
- * Allow the user to impress friends.
- */
- Dprintk("Before bogomips.\n");
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (cpu_isset(cpu, cpu_callout_map))
- bogosum += cpu_data[cpu].loops_per_jiffy;
- printk(KERN_INFO
- "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
-
- Dprintk("Before bogocount - setting activated=1.\n");
-
- if (smp_b_stepping)
- printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
-
- /*
- * Don't taint if we are running SMP kernel on a single non-MP
- * approved Athlon
- */
- if (tainted & TAINT_UNSAFE_SMP) {
- if (cpucount)
- printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
- else
- tainted &= ~TAINT_UNSAFE_SMP;
- }
-
- Dprintk("Boot done.\n");
-
- /*
- * construct cpu_sibling_map[], so that we can tell sibling CPUs
- * efficiently.
- */
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpus_clear(cpu_sibling_map[cpu]);
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- int siblings = 0;
- int i;
- if (!cpu_isset(cpu, cpu_callout_map))
- continue;
-
- if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- siblings++;
- cpu_set(i, cpu_sibling_map[cpu]);
- }
- }
- } else {
- siblings++;
- cpu_set(cpu, cpu_sibling_map[cpu]);
- }
-
- if (siblings != smp_num_siblings)
- printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
- }
-
-#if 0
- if (nmi_watchdog == NMI_LOCAL_APIC)
- check_nmi_watchdog();
-
- smpboot_setup_io_apic();
-
- setup_boot_APIC_clock();
-
- /*
- * Synchronize the TSC with the AP
- */
- if (cpu_has_tsc && cpucount && cpu_khz)
- synchronize_tsc_bp();
-#endif
-}
-
-/* These are wrappers to interface to the new boot process. Someone
- who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
- smp_boot_cpus(max_cpus);
-}
-
-void __devinit smp_prepare_boot_cpu(void)
-{
- cpu_set(smp_processor_id(), cpu_online_map);
- cpu_set(smp_processor_id(), cpu_callout_map);
-}
-
-int __devinit __cpu_up(unsigned int cpu)
-{
- /* This only works at boot for x86. See "rewrite" above. */
- if (cpu_isset(cpu, smp_commenced_mask)) {
- local_irq_enable();
- return -ENOSYS;
- }
-
- /* In case one didn't come up */
- if (!cpu_isset(cpu, cpu_callin_map)) {
- local_irq_enable();
- return -EIO;
- }
-
- local_irq_enable();
- /* Unleash the CPU! */
- cpu_set(cpu, smp_commenced_mask);
- while (!cpu_isset(cpu, cpu_online_map))
- mb();
- return 0;
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-#if 1
-#else
-#ifdef CONFIG_X86_IO_APIC
- setup_ioapic_dest();
-#endif
- zap_low_mappings();
- /*
- * Disable executability of the SMP trampoline:
- */
- set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
-#endif
-}
-
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction reschedule_irq = {
- smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
- NULL, NULL
-};
-
-extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction invalidate_irq = {
- smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
- NULL, NULL
-};
-
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
-
-static struct irqaction call_function_irq = {
- smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
- "call_function", NULL, NULL
-};
-
-void __init smp_intr_init(void)
-{
-
- (void)setup_irq(
- bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
- &reschedule_irq);
- (void)setup_irq(
- bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
- &invalidate_irq);
- (void)setup_irq(
- bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
- &call_function_irq);
-}
+++ /dev/null
-/* Copyright (C) 2004, Christian Limpach */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/threads.h>
-
-unsigned int __initdata maxcpus = NR_CPUS;
-
-
-/*
- * the frequency of the profiling timer can be changed
- * by writing a multiplier value into /proc/profile.
- */
-int setup_profiling_timer(unsigned int multiplier)
-{
- printk("setup_profiling_timer\n");
-
- return 0;
-}
+++ /dev/null
-
-obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o
-
+++ /dev/null
-/******************************************************************************
- * blktap.c
- *
- * XenLinux virtual block-device tap.
- *
- * Copyright (c) 2004, Andrew Warfield
- *
- * Based on the original split block driver:
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
- * Copyright (c) 2004, Christian Limpach
- *
- * Note that unlike the split block driver code, this driver has been developed
- * strictly for Linux 2.6
- */
-
-#include "blktap.h"
-
-int __init xlblktap_init(void)
-{
- ctrl_msg_t cmsg;
- blkif_fe_driver_status_t fe_st;
- blkif_be_driver_status_t be_st;
-
- printk(KERN_INFO "Initialising Xen block tap device\n");
-
- DPRINTK(" tap - Backend connection init:\n");
-
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_BLKIF_FE;
- cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
- cmsg.length = sizeof(blkif_fe_driver_status_t);
- fe_st.status = BLKIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
- DPRINTK(" tap - Frontend connection init:\n");
-
- active_reqs_init();
- blkif_interface_init();
- blkdev_schedule_init();
-
- (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_BLKIF_BE;
- cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS;
- cmsg.length = sizeof(blkif_be_driver_status_t);
- be_st.status = BLKIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &be_st, sizeof(be_st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
- DPRINTK(" tap - Userland channel init:\n");
-
- blktap_init();
-
- DPRINTK("Blkif tap device initialized.\n");
-
- return 0;
-}
-
-#if 0 /* tap doesn't handle suspend/resume */
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
- ctrl_msg_t cmsg;
- blkif_fe_driver_status_t st;
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_BLKIF_FE;
- cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
- cmsg.length = sizeof(blkif_fe_driver_status_t);
- st.status = BLKIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-#endif
-
-__initcall(xlblktap_init);
+++ /dev/null
-/*
- * blktap.h
- *
- * Interfaces for the Xen block tap driver.
- *
- * (c) 2004, Andrew Warfield, University of Cambridge
- *
- */
-
-#ifndef __BLKTAP_H__
-#define __BLKTAP_H__
-
-#include <linux/version.h>
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <asm-xen/ctrl_if.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/hypervisor.h>
-#include <asm-xen/xen-public/io/blkif.h>
-#include <asm-xen/xen-public/io/ring.h>
-
-/* Used to signal to the backend that this is a tap domain. */
-#define BLKTAP_COOKIE 0xbeadfeed
-
-/* -------[ debug / pretty printing ]--------------------------------- */
-
-#if 0
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
-#else
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p) ((void)0)
-#endif
-
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
-
-
-/* -------[ state descriptors ]--------------------------------------- */
-
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
-
-/* -------[ connection tracking ]------------------------------------- */
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#endif
-
-extern spinlock_t blkif_io_lock;
-
-typedef struct blkif_st {
- /* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
- /* Physical parameters of the comms window. */
- unsigned long shmem_frame;
- unsigned int evtchn;
- int irq;
- /* Comms information. */
- blkif_back_ring_t blk_ring;
-
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
- struct blkif_st *hash_next;
- struct list_head blkdev_list;
- spinlock_t blk_ring_lock;
- atomic_t refcnt;
- struct work_struct work;
-} blkif_t;
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
-void blkif_disconnect_complete(blkif_t *blkif);
-#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define blkif_put(_b) \
- do { \
- if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- blkif_disconnect_complete(_b); \
- } while (0)
-
-
-/* -------[ active request tracking ]--------------------------------- */
-
-typedef struct {
- blkif_t *blkif;
- unsigned long id;
- int nr_pages;
- unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int next_free;
-} active_req_t;
-
-typedef unsigned int ACTIVE_RING_IDX;
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
-
-extern inline unsigned int ID_TO_IDX(unsigned long id)
-{
- return ( id & 0x0000ffff );
-}
-
-extern inline domid_t ID_TO_DOM(unsigned long id)
-{
- return (id >> 16);
-}
-
-void active_reqs_init(void);
-
-/* -------[ interposition -> character device interface ]------------- */
-
-/* /dev/xen/blktap resides at device number major=10, minor=200 */
-#define BLKTAP_MINOR 202
-
-/* size of the extra VMA area to map in attached pages. */
-#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
-
-/* blktap IOCTLs: */
-#define BLKTAP_IOCTL_KICK_FE 1
-#define BLKTAP_IOCTL_KICK_BE 2
-#define BLKTAP_IOCTL_SETMODE 3
-#define BLKTAP_IOCTL_PRINT_IDXS 100
-
-/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
-#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
-#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
-#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
-#define BLKTAP_MODE_COPY_FE 0x00000004
-#define BLKTAP_MODE_COPY_BE 0x00000008
-#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
-#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
-
-#define BLKTAP_MODE_INTERPOSE \
- (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
-
-#define BLKTAP_MODE_COPY_BOTH \
- (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
-
-#define BLKTAP_MODE_COPY_BOTH_PAGES \
- (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
-
-static inline int BLKTAP_MODE_VALID(unsigned long arg)
-{
- return (
- ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
- ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
- ( arg == BLKTAP_MODE_INTERPOSE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
- ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
- );
-}
-
-
-
-/* -------[ Mappings to User VMA ]------------------------------------ */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-extern struct vm_area_struct *blktap_vma;
-
-/* The following are from blkback.c and should probably be put in a
- * header and included from there.
- * The mmap area described here is where attached data pages eill be mapped.
- */
-
-extern unsigned long mmap_vstart;
-#define MMAP_PAGES_PER_REQUEST \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-
-/* immediately before the mmap area, we have a bunch of pages reserved
- * for shared memory rings.
- */
-
-#define RING_PAGES 3 /* Ctrl, Front, and Back */
-extern unsigned long rings_vstart;
-
-
-/* -------[ Here be globals ]----------------------------------------- */
-extern unsigned long blktap_mode;
-
-/* Connection to a single backend domain. */
-extern blkif_front_ring_t blktap_be_ring;
-extern unsigned int blktap_be_evtchn;
-extern unsigned int blktap_be_state;
-
-/* User ring status. */
-extern unsigned long blktap_ring_ok;
-
-/* -------[ ...and function prototypes. ]----------------------------- */
-
-/* init function for character device interface. */
-int blktap_init(void);
-
-/* init function for the blkif cache. */
-void __init blkif_interface_init(void);
-void __init blkdev_schedule_init(void);
-void blkif_deschedule(blkif_t *blkif);
-
-/* interfaces to the char driver, passing messages to and from apps. */
-void blktap_kick_user(void);
-
-/* user ring access functions: */
-int blktap_write_fe_ring(blkif_request_t *req);
-int blktap_write_be_ring(blkif_response_t *rsp);
-int blktap_write_ctrl_ring(ctrl_msg_t *msg);
-
-/* fe/be ring access functions: */
-int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
-int write_req_to_be_ring(blkif_request_t *req);
-
-/* event notification functions */
-void kick_fe_domain(blkif_t *blkif);
-void kick_be_domain(void);
-
-/* Interrupt handlers. */
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
- struct pt_regs *ptregs);
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
-
-/* Control message receiver. */
-extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
-
-/* debug */
-void print_vm_ring_idxs(void);
-
-#define __BLKINT_H__
-#endif
+++ /dev/null
-/******************************************************************************
- * blktap_controlmsg.c
- *
- * XenLinux virtual block-device tap.
- * Control interfaces to the frontend and backend drivers.
- *
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-
-#include "blktap.h"
-
-static char *blkif_state_name[] = {
- [BLKIF_STATE_CLOSED] = "closed",
- [BLKIF_STATE_DISCONNECTED] = "disconnected",
- [BLKIF_STATE_CONNECTED] = "connected",
-};
-
-static char * blkif_status_name[] = {
- [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
- [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
- [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
- [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
-};
-
-static unsigned blktap_be_irq;
-unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
-unsigned int blktap_be_evtchn;
-
-/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
-
-#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
-
-static kmem_cache_t *blkif_cachep;
-static blkif_t *blkif_hash[BLKIF_HASHSZ];
-
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
- blkif = blkif->hash_next;
- return blkif;
-}
-
-static void __blkif_disconnect_complete(void *arg)
-{
- blkif_t *blkif = (blkif_t *)arg;
- ctrl_msg_t cmsg;
- blkif_be_disconnect_t disc;
-
- /*
- * These can't be done in blkif_disconnect() because at that point there
- * may be outstanding requests at the disc whose asynchronous responses
- * must still be notified to the remote driver.
- */
- unbind_evtchn_from_irq(blkif->evtchn);
- vfree(blkif->blk_ring.sring);
-
- /* Construct the deferred response message. */
- cmsg.type = CMSG_BLKIF_BE;
- cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
- cmsg.id = blkif->disconnect_rspid;
- cmsg.length = sizeof(blkif_be_disconnect_t);
- disc.domid = blkif->domid;
- disc.blkif_handle = blkif->handle;
- disc.status = BLKIF_BE_STATUS_OKAY;
- memcpy(cmsg.msg, &disc, sizeof(disc));
-
- /*
- * Make sure message is constructed /before/ status change, because
- * after the status change the 'blkif' structure could be deallocated at
- * any time. Also make sure we send the response /after/ status change,
- * as otherwise a subsequent CONNECT request could spuriously fail if
- * another CPU doesn't see the status change yet.
- */
- mb();
- if ( blkif->status != DISCONNECTING )
- BUG();
- blkif->status = DISCONNECTED;
- mb();
-
- /* Send the successful response. */
- ctrl_if_send_response(&cmsg);
-}
-
-void blkif_disconnect_complete(blkif_t *blkif)
-{
- INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
- schedule_work(&blkif->work);
-}
-
-void blkif_ptfe_create(blkif_be_create_t *create)
-{
- blkif_t *blkif, **pblkif;
- domid_t domid = create->domid;
- unsigned int handle = create->blkif_handle;
-
-
- /* May want to store info on the connecting domain here. */
-
- DPRINTK("PT got BE_CREATE\n");
-
- if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
- {
- DPRINTK("Could not create blkif: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- /* blkif struct init code from blkback.c */
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->status = DISCONNECTED;
- spin_lock_init(&blkif->blk_ring_lock);
- atomic_set(&blkif->refcnt, 0);
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTK("Could not create blkif: already exists\n");
- create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- kmem_cache_free(blkif_cachep, blkif);
- return;
- }
- pblkif = &(*pblkif)->hash_next;
- }
-
- blkif->hash_next = *pblkif;
- *pblkif = blkif;
-
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
-{
- /* Clear anything that we initialized above. */
-
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- DPRINTK("PT got BE_DESTROY\n");
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- if ( blkif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pblkif = &blkif->hash_next;
- }
-
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pblkif = blkif->hash_next;
- kmem_cache_free(blkif_cachep, blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_ptfe_connect(blkif_be_connect_t *connect)
-{
- domid_t domid = connect->domid;
- unsigned int handle = connect->blkif_handle;
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
- struct vm_struct *vma;
- pgprot_t prot;
- int error;
- blkif_t *blkif;
- blkif_sring_t *sring;
-
- DPRINTK("PT got BE_CONNECT\n");
-
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
- connect->domid, connect->blkif_handle);
- connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
- {
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
- error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
- shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- WPRINTK("BE_CONNECT: error! (%d)\n", error);
- if ( error == -ENOMEM )
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT ) {
- connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- WPRINTK("BE_CONNECT: MAPPING error!\n");
- }
- else
- connect->status = BLKIF_BE_STATUS_ERROR;
- vfree(vma->addr);
- return;
- }
-
- if ( blkif->status != DISCONNECTED )
- {
- connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- vfree(vma->addr);
- return;
- }
-
- sring = (blkif_sring_t *)vma->addr;
- SHARED_RING_INIT(sring);
- BACK_RING_INIT(&blkif->blk_ring, sring);
-
- blkif->evtchn = evtchn;
- blkif->irq = bind_evtchn_to_irq(evtchn);
- blkif->shmem_frame = shmem_frame;
- blkif->status = CONNECTED;
- blkif_get(blkif);
-
- request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
-
- connect->status = BLKIF_BE_STATUS_OKAY;
-}
-
-int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
-{
- domid_t domid = disconnect->domid;
- unsigned int handle = disconnect->blkif_handle;
- blkif_t *blkif;
-
- DPRINTK("PT got BE_DISCONNECT\n");
-
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_disconnect attempted for non-existent blkif"
- " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
- disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return 1; /* Caller will send response error message. */
- }
-
- if ( blkif->status == CONNECTED )
- {
- blkif->status = DISCONNECTING;
- blkif->disconnect_rspid = rsp_id;
- wmb(); /* Let other CPUs see the status change. */
- free_irq(blkif->irq, blkif);
- blkif_deschedule(blkif);
- blkif_put(blkif);
- return 0; /* Caller should not send response message. */
- }
-
- disconnect->status = BLKIF_BE_STATUS_OKAY;
- return 1;
-}
-
-/*-----[ Control Messages to/from Backend VM ]----------------------------*/
-
-/* Tell the controller to bring up the interface. */
-static void blkif_ptbe_send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
- msg->handle = 0;
- msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-static void blkif_ptbe_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_ptbe_disconnect(void)
-{
- blkif_sring_t *sring;
-
- sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blktap_be_ring, sring);
- blktap_be_state = BLKIF_STATE_DISCONNECTED;
- DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
- blkif_ptbe_send_interface_connect();
-}
-
-static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
-{
- int err = 0;
-
- blktap_be_evtchn = status->evtchn;
- blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn);
-
- err = request_irq(blktap_be_irq, blkif_ptbe_int,
- SA_SAMPLE_RANDOM, "blkif", NULL);
- if ( err ) {
- WPRINTK("blkfront request_irq failed (%d)\n", err);
- return;
- } else {
- /* transtion to connected in case we need to do a
- a partion probe on a whole disk */
- blktap_be_state = BLKIF_STATE_CONNECTED;
- }
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
- WPRINTK(" TAP: Unexpected blkif status %s in state %s\n",
- blkif_status_name[status->status],
- blkif_state_name[blktap_be_state]);
-}
-
-static void blkif_ptbe_status(
- blkif_fe_interface_status_t *status)
-{
- if ( status->handle != 0 )
- {
- DPRINTK("Status change on unsupported blkif %d\n",
- status->handle);
- return;
- }
-
- DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
-
- switch ( status->status )
- {
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch ( blktap_be_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_ptbe_close();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch ( blktap_be_state )
- {
- case BLKIF_STATE_CLOSED:
- blkif_ptbe_disconnect();
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
- unexpected(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch ( blktap_be_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- blkif_ptbe_disconnect();
- blkif_ptbe_connect(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- blkif_ptbe_connect(status);
- break;
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_ptbe_connect(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch ( blktap_be_state )
- {
- case BLKIF_STATE_CLOSED:
- case BLKIF_STATE_DISCONNECTED:
- unexpected(status);
- break;
- case BLKIF_STATE_CONNECTED:
- /* vbd_update(); */
- /* tap doesn't really get state changes... */
- unexpected(status);
- break;
- }
- break;
-
- default:
- DPRINTK("Status change to unknown value %d\n", status->status);
- break;
- }
-}
-
-/*-----[ All control messages enter here: ]-------------------------------*/
-
-void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->type )
- {
- case CMSG_BLKIF_FE:
-
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- if ( msg->length != sizeof(blkif_fe_interface_status_t) )
- goto parse_error;
- blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
- break;
-
- default:
- goto parse_error;
- }
-
- case CMSG_BLKIF_BE:
-
- /* send a copy of the message to user if wanted */
-
- if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
- (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-
- blktap_write_ctrl_ring(msg);
- }
-
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_BE_CREATE:
- if ( msg->length != sizeof(blkif_be_create_t) )
- goto parse_error;
- blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_DESTROY:
- if ( msg->length != sizeof(blkif_be_destroy_t) )
- goto parse_error;
- blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_CONNECT:
- if ( msg->length != sizeof(blkif_be_connect_t) )
- goto parse_error;
- blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
- break;
- case CMSG_BLKIF_BE_DISCONNECT:
- if ( msg->length != sizeof(blkif_be_disconnect_t) )
- goto parse_error;
- if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
- msg->id) )
- return;
- break;
-
- /* We just ignore anything to do with vbds for now. */
-
- case CMSG_BLKIF_BE_VBD_CREATE:
- DPRINTK("PT got VBD_CREATE\n");
- ((blkif_be_vbd_create_t *)&msg->msg[0])->status
- = BLKIF_BE_STATUS_OKAY;
- break;
- case CMSG_BLKIF_BE_VBD_DESTROY:
- DPRINTK("PT got VBD_DESTROY\n");
- ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
- = BLKIF_BE_STATUS_OKAY;
- break;
- case CMSG_BLKIF_BE_VBD_GROW:
- DPRINTK("PT got VBD_GROW\n");
- ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
- = BLKIF_BE_STATUS_OKAY;
- break;
- case CMSG_BLKIF_BE_VBD_SHRINK:
- DPRINTK("PT got VBD_SHRINK\n");
- ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
- = BLKIF_BE_STATUS_OKAY;
- break;
- default:
- goto parse_error;
- }
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-/*-----[ Initialization ]-------------------------------------------------*/
-
-void __init blkif_interface_init(void)
-{
- blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
- 0, 0, NULL, NULL);
- memset(blkif_hash, 0, sizeof(blkif_hash));
-
- blktap_be_ring.sring = NULL;
-}
+++ /dev/null
-/******************************************************************************
- * blktap_datapath.c
- *
- * XenLinux virtual block-device tap.
- * Block request routing data path.
- *
- * Copyright (c) 2004, Andrew Warfield
- * -- see full header in blktap.c
- */
-
-#include "blktap.h"
-#include <asm-xen/evtchn.h>
-
-/*-----[ The data paths ]-------------------------------------------------*/
-
-/* Connection to a single backend domain. */
-blkif_front_ring_t blktap_be_ring;
-
-/*-----[ Tracking active requests ]---------------------------------------*/
-
-/* this must be the same as MAX_PENDING_REQS in blkback.c */
-#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
-
-active_req_t active_reqs[MAX_ACTIVE_REQS];
-ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
-spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
-ACTIVE_RING_IDX active_prod, active_cons;
-#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
-#define ACTIVE_IDX(_ar) (_ar - active_reqs)
-#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
-
-inline active_req_t *get_active_req(void)
-{
- ACTIVE_RING_IDX idx;
- active_req_t *ar;
- unsigned long flags;
-
- ASSERT(active_cons != active_prod);
-
- spin_lock_irqsave(&active_req_lock, flags);
- idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
- ar = &active_reqs[idx];
- spin_unlock_irqrestore(&active_req_lock, flags);
-
- return ar;
-}
-
-inline void free_active_req(active_req_t *ar)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&active_req_lock, flags);
- active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
- spin_unlock_irqrestore(&active_req_lock, flags);
-}
-
-active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
-{
- return &active_reqs[idx];
-}
-
-void active_reqs_init(void)
-{
- ACTIVE_RING_IDX i;
-
- active_cons = 0;
- active_prod = MAX_ACTIVE_REQS;
- memset(active_reqs, 0, sizeof(active_reqs));
- for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
- active_req_ring[i] = i;
-}
-
-/* Requests passing through the tap to the backend hijack the id field
- * in the request message. In it we put the AR index _AND_ the fe domid.
- * the domid is used by the backend to map the pages properly.
- */
-
-static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
-{
- return ( (fe_dom << 16) | idx );
-}
-
-/*-----[ Ring helpers ]---------------------------------------------------*/
-
-inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
-{
- blkif_response_t *resp_d;
- active_req_t *ar;
-
- ar = &active_reqs[ID_TO_IDX(rsp->id)];
- rsp->id = ar->id;
-
- resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
- blkif->blk_ring.rsp_prod_pvt);
- memcpy(resp_d, rsp, sizeof(blkif_response_t));
- wmb();
- blkif->blk_ring.rsp_prod_pvt++;
-
- blkif_put(ar->blkif);
- free_active_req(ar);
-
- return 0;
-}
-
-inline int write_req_to_be_ring(blkif_request_t *req)
-{
- blkif_request_t *req_d;
-
- if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
- WPRINTK("Tap trying to access an unconnected backend!\n");
- return 0;
- }
-
- req_d = RING_GET_REQUEST(&blktap_be_ring,
- blktap_be_ring.req_prod_pvt);
- memcpy(req_d, req, sizeof(blkif_request_t));
- wmb();
- blktap_be_ring.req_prod_pvt++;
-
- return 0;
-}
-
-void kick_fe_domain(blkif_t *blkif)
-{
- RING_PUSH_RESPONSES(&blkif->blk_ring);
- notify_via_evtchn(blkif->evtchn);
- DPRINTK("notified FE(dom %u)\n", blkif->domid);
-
-}
-
-void kick_be_domain(void)
-{
- if ( blktap_be_state != BLKIF_STATE_CONNECTED )
- return;
-
- wmb(); /* Ensure that the frontend can see the requests. */
- RING_PUSH_REQUESTS(&blktap_be_ring);
- notify_via_evtchn(blktap_be_evtchn);
- DPRINTK("notified BE\n");
-}
-
-/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
-
-/*-----[ Scheduler list maint -from blkback ]--- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
- return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
- unsigned long flags;
- if ( !__on_blkdev_list(blkif) ) return;
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if ( __on_blkdev_list(blkif) )
- {
- list_del(&blkif->blkdev_list);
- blkif->blkdev_list.next = NULL;
- blkif_put(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
- unsigned long flags;
- if ( __on_blkdev_list(blkif) ) return;
- spin_lock_irqsave(&blkio_schedule_list_lock, flags);
- if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
- {
- list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
- blkif_get(blkif);
- }
- spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
-/*-----[ Scheduler functions - from blkback ]--- */
-
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-
-static int blkio_schedule(void *arg)
-{
- DECLARE_WAITQUEUE(wq, current);
-
- blkif_t *blkif;
- struct list_head *ent;
-
- daemonize(
- "xentapd"
- );
-
- for ( ; ; )
- {
- /* Wait for work to do. */
- add_wait_queue(&blkio_schedule_wait, &wq);
- set_current_state(TASK_INTERRUPTIBLE);
- if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
- list_empty(&blkio_schedule_list) )
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&blkio_schedule_wait, &wq);
-
- /* Queue up a batch of requests. */
- while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
- !list_empty(&blkio_schedule_list) )
- {
- ent = blkio_schedule_list.next;
- blkif = list_entry(ent, blkif_t, blkdev_list);
- blkif_get(blkif);
- remove_from_blkdev_list(blkif);
- if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
- add_to_blkdev_list_tail(blkif);
- blkif_put(blkif);
- }
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
- /* Push the batch through to disc. */
- run_task_queue(&tq_disk);
-#endif
- }
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
- /*
- * Needed so that two processes, who together make the following predicate
- * true, don't both read stale values and evaluate the predicate
- * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
- */
- smp_mb();
-
- if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
- !list_empty(&blkio_schedule_list) )
- wake_up(&blkio_schedule_wait);
-}
-
-void blkif_deschedule(blkif_t *blkif)
-{
- remove_from_blkdev_list(blkif);
-}
-
-void __init blkdev_schedule_init(void)
-{
- spin_lock_init(&blkio_schedule_list_lock);
- INIT_LIST_HEAD(&blkio_schedule_list);
-
- if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
- BUG();
-}
-
-/*-----[ Interrupt entry from a frontend ]------ */
-
-irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
-{
- blkif_t *blkif = dev_id;
-
- add_to_blkdev_list_tail(blkif);
- maybe_trigger_blkio_schedule();
- return IRQ_HANDLED;
-}
-
-/*-----[ Other Frontend Ring functions ]-------- */
-
-/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
-{
- /* we have pending messages from the real frontend. */
-
- blkif_request_t *req_s;
- RING_IDX i, rp;
- unsigned long flags;
- active_req_t *ar;
- int more_to_do = 0;
- int notify_be = 0, notify_user = 0;
-
- DPRINTK("PT got FE interrupt.\n");
-
- if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
-
- /* lock both rings */
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- rp = blkif->blk_ring.sring->req_prod;
- rmb();
-
- for ( i = blkif->blk_ring.req_cons;
- (i != rp) &&
- !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
- i++ )
- {
-
- if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
- {
- more_to_do = 1;
- break;
- }
-
- req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
- /* This is a new request:
- * Assign an active request record, and remap the id.
- */
- ar = get_active_req();
- ar->id = req_s->id;
- ar->nr_pages = req_s->nr_segments;
- blkif_get(blkif);
- ar->blkif = blkif;
- req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
- /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
-
- /* FE -> BE interposition point is here. */
-
- /* ------------------------------------------------------------- */
- /* BLKIF_OP_PROBE_HACK: */
- /* Signal to the backend that we are a tap domain. */
-
- if (req_s->operation == BLKIF_OP_PROBE) {
- DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
- req_s->frame_and_sects[1] = BLKTAP_COOKIE;
- }
-
- /* ------------------------------------------------------------- */
-
- /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
- if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
- (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
-
- /* Copy the response message to UFERing */
- /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
- /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
-
- DPRINTK("req->UFERing\n");
- blktap_write_fe_ring(req_s);
- notify_user = 1;
- }
-
- /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
- if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
- (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
-
- /* be included to prevent noise from the fe when its off */
- /* copy the request message to the BERing */
-
- DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
- (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
- (unsigned)blktap_be_ring.req_prod_pvt &
- (RING_SIZE((&blktap_be_ring)-1)));
-
- write_req_to_be_ring(req_s);
- notify_be = 1;
- }
- }
-
- blkif->blk_ring.req_cons = i;
-
- /* unlock rings */
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- if (notify_user)
- blktap_kick_user();
- if (notify_be)
- kick_be_domain();
-
- return more_to_do;
-}
-
-/*-----[ Data to/from Backend (server) VM ]------------------------------*/
-
-
-irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
- struct pt_regs *ptregs)
-{
- blkif_response_t *resp_s;
- blkif_t *blkif;
- RING_IDX rp, i;
- unsigned long flags;
-
- DPRINTK("PT got BE interrupt.\n");
-
- /* lock both rings */
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- rp = blktap_be_ring.sring->rsp_prod;
- rmb();
-
- for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
- {
- resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
-
- /* BE -> FE interposition point is here. */
-
- blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
-
- /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
- if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
- (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
-
- /* Copy the response message to UBERing */
- /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
- /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
-
- DPRINTK("rsp->UBERing\n");
- blktap_write_be_ring(resp_s);
- blktap_kick_user();
-
- }
-
- /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
- if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
- (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
-
- /* (fe included to prevent random interference from the BE) */
- /* Copy the response message to FERing */
-
- DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
- (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
- (unsigned)blkif->blk_ring.rsp_prod_pvt &
- (RING_SIZE((&blkif->blk_ring)-1)));
-
- write_resp_to_fe_ring(blkif, resp_s);
- kick_fe_domain(blkif);
-
- }
- }
-
- blktap_be_ring.rsp_cons = i;
-
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- return IRQ_HANDLED;
-}
-
-/* Debug : print the current ring indices. */
-
-void print_vm_ring_idxs(void)
-{
- int i;
- blkif_t *blkif;
-
- WPRINTK("FE Rings: \n---------\n");
- for ( i = 0; i < 50; i++) {
- blkif = blkif_find_by_handle((domid_t)i, 0);
- if (blkif != NULL) {
- if (blkif->blk_ring.sring != NULL) {
- WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
- "| req_prod: %2d, rsp_prod: %2d\n", i,
- blkif->blk_ring.req_cons,
- blkif->blk_ring.rsp_prod_pvt,
- blkif->blk_ring.sring->req_prod,
- blkif->blk_ring.sring->rsp_prod);
- } else {
- WPRINTK("%2d: [no device channel yet]\n", i);
- }
- }
- }
- if (blktap_be_ring.sring != NULL) {
- WPRINTK("BE Ring: \n--------\n");
- WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
- "| req_prod: %2d, rsp_prod: %2d\n",
- blktap_be_ring.rsp_cons,
- blktap_be_ring.req_prod_pvt,
- blktap_be_ring.sring->req_prod,
- blktap_be_ring.sring->rsp_prod);
- }
-}
+++ /dev/null
-/******************************************************************************
- * blktap_userdev.c
- *
- * XenLinux virtual block-device tap.
- * Control interface between the driver and a character device.
- *
- * Copyright (c) 2004, Andrew Warfield
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
-#include <linux/errno.h>
-#include <linux/major.h>
-#include <linux/gfp.h>
-#include <linux/poll.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
-
-#include "blktap.h"
-
-
-unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
-
-/* Only one process may open /dev/xen/blktap at any time. */
-static unsigned long blktap_dev_inuse;
-unsigned long blktap_ring_ok; /* make this ring->state */
-
-/* for poll: */
-static wait_queue_head_t blktap_wait;
-
-/* Where things are inside the device mapping. */
-struct vm_area_struct *blktap_vma;
-unsigned long mmap_vstart;
-unsigned long rings_vstart;
-
-/* Rings up to user space. */
-static blkif_front_ring_t blktap_ufe_ring;
-static blkif_back_ring_t blktap_ube_ring;
-static ctrl_front_ring_t blktap_uctrl_ring;
-
-/* local prototypes */
-static int blktap_read_fe_ring(void);
-static int blktap_read_be_ring(void);
-
-/* -------[ blktap vm ops ]------------------------------------------- */
-
-static struct page *blktap_nopage(struct vm_area_struct *vma,
- unsigned long address,
- int *type)
-{
- /*
- * if the page has not been mapped in by the driver then generate
- * a SIGBUS to the domain.
- */
-
- force_sig(SIGBUS, current);
-
- return 0;
-}
-
-struct vm_operations_struct blktap_vm_ops = {
- nopage: blktap_nopage,
-};
-
-/* -------[ blktap file ops ]----------------------------------------- */
-
-static int blktap_open(struct inode *inode, struct file *filp)
-{
- blkif_sring_t *sring;
- ctrl_sring_t *csring;
-
- if ( test_and_set_bit(0, &blktap_dev_inuse) )
- return -EBUSY;
-
- printk(KERN_ALERT "blktap open.\n");
-
- /* Allocate the ctrl ring. */
- csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
- if (csring == NULL)
- goto fail_nomem;
-
- SetPageReserved(virt_to_page(csring));
-
- SHARED_RING_INIT(csring);
- FRONT_RING_INIT(&blktap_uctrl_ring, csring);
-
-
- /* Allocate the fe ring. */
- sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
- if (sring == NULL)
- goto fail_free_ctrl;
-
- SetPageReserved(virt_to_page(sring));
-
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blktap_ufe_ring, sring);
-
- /* Allocate the be ring. */
- sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
- if (sring == NULL)
- goto fail_free_fe;
-
- SetPageReserved(virt_to_page(sring));
-
- SHARED_RING_INIT(sring);
- BACK_RING_INIT(&blktap_ube_ring, sring);
-
- DPRINTK(KERN_ALERT "blktap open.\n");
-
- return 0;
-
- fail_free_ctrl:
- free_page( (unsigned long) blktap_uctrl_ring.sring);
-
- fail_free_fe:
- free_page( (unsigned long) blktap_ufe_ring.sring);
-
- fail_nomem:
- return -ENOMEM;
-}
-
-static int blktap_release(struct inode *inode, struct file *filp)
-{
- blktap_dev_inuse = 0;
- blktap_ring_ok = 0;
-
- printk(KERN_ALERT "blktap closed.\n");
-
- /* Free the ring page. */
- ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
- free_page((unsigned long) blktap_uctrl_ring.sring);
-
- ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
- free_page((unsigned long) blktap_ufe_ring.sring);
-
- ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
- free_page((unsigned long) blktap_ube_ring.sring);
-
- return 0;
-}
-
-/* Note on mmap:
- * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio
- * work to do direct page access from userspace, this ended up being a
- * problem. The bigger issue seems to be that there is no way to map
- * a foreign page in to user space and have the virtual address of that
- * page map sanely down to a mfn.
- * Removing the VM_IO flag results in a loop in get_user_pages, as
- * pfn_valid() always fails on a foreign page.
- */
-static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- int size;
-
- printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
- vma->vm_start, vma->vm_end);
-
- vma->vm_ops = &blktap_vm_ops;
-
- size = vma->vm_end - vma->vm_start;
- if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
- printk(KERN_INFO
- "blktap: you _must_ map exactly %d pages!\n",
- MMAP_PAGES + RING_PAGES);
- return -EAGAIN;
- }
-
- size >>= PAGE_SHIFT;
- printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
-
- rings_vstart = vma->vm_start;
- mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
-
- /* Map the ring pages to the start of the region and reserve it. */
-
- /* not sure if I really need to do this... */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
- if (remap_pfn_range(vma, vma->vm_start,
- __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
- }
-
-
- DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
- if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE,
- __pa(blktap_ube_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("be_ring: remap_pfn_range failure!\n");
- }
-
- DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
- if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ),
- __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot)) {
- WPRINTK("fe_ring: remap_pfn_range failure!\n");
- }
-
- blktap_vma = vma;
- blktap_ring_ok = 1;
-
- return 0;
-}
-
-static int blktap_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
-{
- switch(cmd) {
- case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
- return blktap_read_fe_ring();
-
- case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
- return blktap_read_be_ring();
-
- case BLKTAP_IOCTL_SETMODE:
- if (BLKTAP_MODE_VALID(arg)) {
- blktap_mode = arg;
- /* XXX: may need to flush rings here. */
- printk(KERN_INFO "blktap: set mode to %lx\n", arg);
- return 0;
- }
- case BLKTAP_IOCTL_PRINT_IDXS:
- {
- print_vm_ring_idxs();
- WPRINTK("User Rings: \n-----------\n");
- WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
- "| req_prod: %2d, rsp_prod: %2d\n",
- blktap_ufe_ring.rsp_cons,
- blktap_ufe_ring.req_prod_pvt,
- blktap_ufe_ring.sring->req_prod,
- blktap_ufe_ring.sring->rsp_prod);
- WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
- "| req_prod: %2d, rsp_prod: %2d\n",
- blktap_ube_ring.req_cons,
- blktap_ube_ring.rsp_prod_pvt,
- blktap_ube_ring.sring->req_prod,
- blktap_ube_ring.sring->rsp_prod);
-
- }
- }
- return -ENOIOCTLCMD;
-}
-
-static unsigned int blktap_poll(struct file *file, poll_table *wait)
-{
- poll_wait(file, &blktap_wait, wait);
-
- if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
- RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ||
- RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
-
- RING_PUSH_REQUESTS(&blktap_uctrl_ring);
- RING_PUSH_REQUESTS(&blktap_ufe_ring);
- RING_PUSH_RESPONSES(&blktap_ube_ring);
- return POLLIN | POLLRDNORM;
- }
-
- return 0;
-}
-
-void blktap_kick_user(void)
-{
- /* blktap_ring->req_prod = blktap_req_prod; */
- wake_up_interruptible(&blktap_wait);
-}
-
-static struct file_operations blktap_fops = {
- owner: THIS_MODULE,
- poll: blktap_poll,
- ioctl: blktap_ioctl,
- open: blktap_open,
- release: blktap_release,
- mmap: blktap_mmap,
-};
-
-/*-----[ Data to/from user space ]----------------------------------------*/
-
-
-int blktap_write_fe_ring(blkif_request_t *req)
-{
- blkif_request_t *target;
- int error, i;
-
- /*
- * This is called to pass a request from the real frontend domain's
- * blkif ring to the character device.
- */
-
- if ( ! blktap_ring_ok ) {
- DPRINTK("blktap: ufe_ring not ready for a request!\n");
- return 0;
- }
-
- if ( RING_FULL(&blktap_ufe_ring) ) {
- DPRINTK("blktap: fe_ring is full, can't add.\n");
- return 0;
- }
-
- target = RING_GET_REQUEST(&blktap_ufe_ring,
- blktap_ufe_ring.req_prod_pvt);
- memcpy(target, req, sizeof(*req));
-
- /* Attempt to map the foreign pages directly in to the application */
- for (i=0; i<target->nr_segments; i++) {
-
- error = direct_remap_area_pages(blktap_vma->vm_mm,
- MMAP_VADDR(ID_TO_IDX(req->id), i),
- target->frame_and_sects[i] & PAGE_MASK,
- PAGE_SIZE,
- blktap_vma->vm_page_prot,
- ID_TO_DOM(req->id));
- if ( error != 0 ) {
- printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
- /* the request is now dropped on the floor. */
- return 0;
- }
- }
-
- blktap_ufe_ring.req_prod_pvt++;
-
- return 0;
-}
-
-int blktap_write_be_ring(blkif_response_t *rsp)
-{
- blkif_response_t *target;
-
- /*
- * This is called to pass a request from the real backend domain's
- * blkif ring to the character device.
- */
-
- if ( ! blktap_ring_ok ) {
- DPRINTK("blktap: be_ring not ready for a request!\n");
- return 0;
- }
-
- /* No test for fullness in the response direction. */
-
- target = RING_GET_RESPONSE(&blktap_ube_ring,
- blktap_ube_ring.rsp_prod_pvt);
- memcpy(target, rsp, sizeof(*rsp));
-
- /* no mapping -- pages were mapped in blktap_write_fe_ring() */
-
- blktap_ube_ring.rsp_prod_pvt++;
-
- return 0;
-}
-
-static void blktap_fast_flush_area(int idx, int nr_pages)
-{
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
- int i;
-
- for ( i = 0; i < nr_pages; i++ )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping;
- mcl[i].args[0] = MMAP_VADDR(idx, i);
- mcl[i].args[1] = 0;
- mcl[i].args[2] = 0;
- }
-
- mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
- BUG();
-}
-
-static int blktap_read_fe_ring(void)
-{
- /* This is called to read responses from the UFE ring. */
-
- RING_IDX i, rp;
- blkif_response_t *resp_s;
- blkif_t *blkif;
- active_req_t *ar;
-
- DPRINTK("blktap_read_fe_ring()\n");
-
- /* if we are forwarding from UFERring to FERing */
- if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
-
- /* for each outstanding message on the UFEring */
- rp = blktap_ufe_ring.sring->rsp_prod;
- rmb();
-
- for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
- {
- resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
-
- DPRINTK("resp->fe_ring\n");
- ar = lookup_active_req(ID_TO_IDX(resp_s->id));
- blkif = ar->blkif;
- blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
- write_resp_to_fe_ring(blkif, resp_s);
- kick_fe_domain(blkif);
- }
-
- blktap_ufe_ring.rsp_cons = i;
- }
- return 0;
-}
-
-static int blktap_read_be_ring(void)
-{
- /* This is called to read requests from the UBE ring. */
-
- RING_IDX i, rp;
- blkif_request_t *req_s;
-
- DPRINTK("blktap_read_be_ring()\n");
-
- /* if we are forwarding from UFERring to FERing */
- if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
-
- /* for each outstanding message on the UFEring */
- rp = blktap_ube_ring.sring->req_prod;
- rmb();
- for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
- {
- req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
-
- DPRINTK("req->be_ring\n");
- write_req_to_be_ring(req_s);
- kick_be_domain();
- }
-
- blktap_ube_ring.req_cons = i;
- }
-
- return 0;
-}
-
-int blktap_write_ctrl_ring(ctrl_msg_t *msg)
-{
- ctrl_msg_t *target;
-
- if ( ! blktap_ring_ok ) {
- DPRINTK("blktap: be_ring not ready for a request!\n");
- return 0;
- }
-
- /* No test for fullness in the response direction. */
-
- target = RING_GET_REQUEST(&blktap_uctrl_ring,
- blktap_uctrl_ring.req_prod_pvt);
- memcpy(target, msg, sizeof(*msg));
-
- blktap_uctrl_ring.req_prod_pvt++;
-
- /* currently treat the ring as unidirectional. */
- blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
-
- return 0;
-
-}
-
-/* -------[ blktap module setup ]------------------------------------- */
-
-static struct miscdevice blktap_miscdev = {
- .minor = BLKTAP_MINOR,
- .name = "blktap",
- .fops = &blktap_fops,
- .devfs_name = "misc/blktap",
-};
-
-int blktap_init(void)
-{
- int err;
-
- err = misc_register(&blktap_miscdev);
- if ( err != 0 )
- {
- printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
- return err;
- }
-
- init_waitqueue_head(&blktap_wait);
-
-
- return 0;
-}
+++ /dev/null
-
-#ifndef __USBIF__BACKEND__COMMON_H__
-#define __USBIF__BACKEND__COMMON_H__
-
-#include <linux/config.h>
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/rbtree.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <asm/io.h>
-#include <asm/setup.h>
-#include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
-#include <asm-xen/hypervisor.h>
-
-#include <asm-xen/xen-public/io/usbif.h>
-
-#if 0
-#define ASSERT(_p) \
- if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
- __FILE__ , __LINE__ , ## _a )
-#else
-#define ASSERT(_p) ((void)0)
-#define DPRINTK(_f, _a...) ((void)0)
-#endif
-
-typedef struct usbif_priv_st usbif_priv_t;
-
-struct usbif_priv_st {
- /* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
- /* Physical parameters of the comms window. */
- unsigned long shmem_frame;
- unsigned int evtchn;
- int irq;
- /* Comms Information */
- usbif_back_ring_t usb_ring;
- /* Private fields. */
- enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
- /*
- * DISCONNECT response is deferred until pending requests are ack'ed.
- * We therefore need to store the id from the original request.
- */
- u8 disconnect_rspid;
- usbif_priv_t *hash_next;
- struct list_head usbif_list;
- spinlock_t usb_ring_lock;
- atomic_t refcnt;
-
- struct work_struct work;
-};
-
-void usbif_create(usbif_be_create_t *create);
-void usbif_destroy(usbif_be_destroy_t *destroy);
-void usbif_connect(usbif_be_connect_t *connect);
-int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
-void usbif_disconnect_complete(usbif_priv_t *up);
-
-void usbif_release_port(usbif_be_release_port_t *msg);
-int usbif_claim_port(usbif_be_claim_port_t *msg);
-void usbif_release_ports(usbif_priv_t *up);
-
-usbif_priv_t *usbif_find(domid_t domid);
-#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
-#define usbif_put(_b) \
- do { \
- if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- usbif_disconnect_complete(_b); \
- } while (0)
-
-
-void usbif_interface_init(void);
-void usbif_ctrlif_init(void);
-
-void usbif_deschedule(usbif_priv_t *up);
-void remove_from_usbif_list(usbif_priv_t *up);
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
-
-#endif /* __USBIF__BACKEND__COMMON_H__ */
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/control.c
- *
- * Routines for interfacing with the control plane.
- *
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
-
- switch ( msg->subtype )
- {
- case CMSG_USBIF_BE_CREATE:
- if ( msg->length != sizeof(usbif_be_create_t) )
- goto parse_error;
- usbif_create((usbif_be_create_t *)&msg->msg[0]);
- break;
- case CMSG_USBIF_BE_DESTROY:
- if ( msg->length != sizeof(usbif_be_destroy_t) )
- goto parse_error;
- usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
- break;
- case CMSG_USBIF_BE_CONNECT:
- if ( msg->length != sizeof(usbif_be_connect_t) )
- goto parse_error;
- usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
- break;
- case CMSG_USBIF_BE_DISCONNECT:
- if ( msg->length != sizeof(usbif_be_disconnect_t) )
- goto parse_error;
- if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
- return; /* Sending the response is deferred until later. */
- break;
- case CMSG_USBIF_BE_CLAIM_PORT:
- if ( msg->length != sizeof(usbif_be_claim_port_t) )
- goto parse_error;
- usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
- break;
- case CMSG_USBIF_BE_RELEASE_PORT:
- if ( msg->length != sizeof(usbif_be_release_port_t) )
- goto parse_error;
- usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
- break;
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- DPRINTK("Parse error while reading message subtype %d, len %d\n",
- msg->subtype, msg->length);
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-void usbif_ctrlif_init(void)
-{
- ctrl_msg_t cmsg;
- usbif_be_driver_status_changed_t st;
-
- (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_USBIF_BE;
- cmsg.subtype = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
- cmsg.length = sizeof(usbif_be_driver_status_changed_t);
- st.status = USBIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/interface.c
- *
- * USB device interface management.
- *
- * by Mark Williamson, Copyright (c) 2004
- */
-
-
-/******************************************************************************
- * arch/xen/drivers/blkif/backend/interface.c
- *
- * Block-device interface management.
- *
- * Copyright (c) 2004, Keir Fraser
- */
-
-#include "common.h"
-
-#define USBIF_HASHSZ 1024
-#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
-
-static kmem_cache_t *usbif_priv_cachep;
-static usbif_priv_t *usbif_priv_hash[USBIF_HASHSZ];
-
-usbif_priv_t *usbif_find(domid_t domid)
-{
- usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
- while ( (up != NULL ) && ( up->domid != domid ) )
- up = up->hash_next;
- return up;
-}
-
-static void __usbif_disconnect_complete(void *arg)
-{
- usbif_priv_t *usbif = (usbif_priv_t *)arg;
- ctrl_msg_t cmsg;
- usbif_be_disconnect_t disc;
-
- /*
- * These can't be done in usbif_disconnect() because at that point there
- * may be outstanding requests at the device whose asynchronous responses
- * must still be notified to the remote driver.
- */
- unbind_evtchn_from_irq(usbif->evtchn);
- vfree(usbif->usb_ring.sring);
-
- /* Construct the deferred response message. */
- cmsg.type = CMSG_USBIF_BE;
- cmsg.subtype = CMSG_USBIF_BE_DISCONNECT;
- cmsg.id = usbif->disconnect_rspid;
- cmsg.length = sizeof(usbif_be_disconnect_t);
- disc.domid = usbif->domid;
- disc.status = USBIF_BE_STATUS_OKAY;
- memcpy(cmsg.msg, &disc, sizeof(disc));
-
- /*
- * Make sure message is constructed /before/ status change, because
- * after the status change the 'usbif' structure could be deallocated at
- * any time. Also make sure we send the response /after/ status change,
- * as otherwise a subsequent CONNECT request could spuriously fail if
- * another CPU doesn't see the status change yet.
- */
- mb();
- if ( usbif->status != DISCONNECTING )
- BUG();
- usbif->status = DISCONNECTED;
- mb();
-
- /* Send the successful response. */
- ctrl_if_send_response(&cmsg);
-}
-
-void usbif_disconnect_complete(usbif_priv_t *up)
-{
- INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
- schedule_work(&up->work);
-}
-
-void usbif_create(usbif_be_create_t *create)
-{
- domid_t domid = create->domid;
- usbif_priv_t **pup, *up;
-
- if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
- {
- DPRINTK("Could not create usbif: out of memory\n");
- create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- memset(up, 0, sizeof(*up));
- up->domid = domid;
- up->status = DISCONNECTED;
- spin_lock_init(&up->usb_ring_lock);
- atomic_set(&up->refcnt, 0);
-
- pup = &usbif_priv_hash[USBIF_HASH(domid)];
- while ( *pup != NULL )
- {
- if ( (*pup)->domid == domid )
- {
- create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
- kmem_cache_free(usbif_priv_cachep, up);
- return;
- }
- pup = &(*pup)->hash_next;
- }
-
- up->hash_next = *pup;
- *pup = up;
-
- create->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_destroy(usbif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- usbif_priv_t **pup, *up;
-
- pup = &usbif_priv_hash[USBIF_HASH(domid)];
- while ( (up = *pup) != NULL )
- {
- if ( up->domid == domid )
- {
- if ( up->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pup = &up->hash_next;
- }
-
- destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
- *pup = up->hash_next;
- usbif_release_ports(up);
- kmem_cache_free(usbif_priv_cachep, up);
- destroy->status = USBIF_BE_STATUS_OKAY;
-}
-
-void usbif_connect(usbif_be_connect_t *connect)
-{
- domid_t domid = connect->domid;
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
- struct vm_struct *vma;
- pgprot_t prot;
- int error;
- usbif_priv_t *up;
- usbif_sring_t *sring;
-
- up = usbif_find(domid);
- if ( unlikely(up == NULL) )
- {
- DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n",
- connect->domid);
- connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
- {
- connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
- error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
- shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- if ( error == -ENOMEM )
- connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT )
- connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
- else
- connect->status = USBIF_BE_STATUS_ERROR;
- vfree(vma->addr);
- return;
- }
-
- if ( up->status != DISCONNECTED )
- {
- connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
- vfree(vma->addr);
- return;
- }
-
- sring = (usbif_sring_t *)vma->addr;
- SHARED_RING_INIT(sring);
- BACK_RING_INIT(&up->usb_ring, sring);
-
- up->evtchn = evtchn;
- up->irq = bind_evtchn_to_irq(evtchn);
- up->shmem_frame = shmem_frame;
- up->status = CONNECTED;
- usbif_get(up);
-
- request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
-
- connect->status = USBIF_BE_STATUS_OKAY;
-}
-
-/* Remove URBs for this interface before destroying it. */
-void usbif_deschedule(usbif_priv_t *up)
-{
- remove_from_usbif_list(up);
-}
-
-int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
-{
- domid_t domid = disconnect->domid;
- usbif_priv_t *up;
-
- up = usbif_find(domid);
- if ( unlikely(up == NULL) )
- {
- DPRINTK("usbif_disconnect attempted for non-existent usbif"
- " (%u)\n", disconnect->domid);
- disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return 1; /* Caller will send response error message. */
- }
-
- if ( up->status == CONNECTED )
- {
- up->status = DISCONNECTING;
- up->disconnect_rspid = rsp_id;
- wmb(); /* Let other CPUs see the status change. */
- free_irq(up->irq, up);
- usbif_deschedule(up);
- usbif_put(up);
- return 0; /* Caller should not send response message. */
- }
-
- disconnect->status = USBIF_BE_STATUS_OKAY;
- return 1;
-}
-
-void __init usbif_interface_init(void)
-{
- usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
- sizeof(usbif_priv_t),
- 0, 0, NULL, NULL);
- memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
-}
+++ /dev/null
-/******************************************************************************
- * arch/xen/drivers/usbif/backend/main.c
- *
- * Backend for the Xen virtual USB driver - provides an abstraction of a
- * USB host controller to the corresponding frontend driver.
- *
- * by Mark Williamson
- * Copyright (c) 2004 Intel Research Cambridge
- * Copyright (c) 2004, 2005 Mark Williamson
- *
- * Based on arch/xen/drivers/blkif/backend/main.c
- * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
- */
-
-#include "common.h"
-
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/tqueue.h>
-
-/*
- * This is rather arbitrary.
- */
-#define MAX_PENDING_REQS 4
-#define BATCH_PER_DOMAIN 1
-
-static unsigned long mmap_vstart;
-
-/* Needs to be sufficiently large that we can map the (large) buffers
- * the USB mass storage driver wants. */
-#define MMAP_PAGES_PER_REQUEST \
- (128)
-#define MMAP_PAGES \
- (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
-
-#define MMAP_VADDR(_req,_seg) \
- (mmap_vstart + \
- ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
- ((_seg) * PAGE_SIZE))
-
-
-static spinlock_t owned_ports_lock;
-LIST_HEAD(owned_ports);
-
-/* A list of these structures is used to track ownership of physical USB
- * ports. */
-typedef struct
-{
- usbif_priv_t *usbif_priv;
- char path[16];
- int guest_port;
- int enabled;
- struct list_head list;
- unsigned long guest_address; /* The USB device address that has been
- * assigned by the guest. */
- int dev_present; /* Is there a device present? */
- struct usb_device * dev;
- unsigned long ifaces; /* What interfaces are present on this device? */
-} owned_port_t;
-
-
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. The request is complete, the specified
- * domain has a response queued for it, with the saved 'id' passed back.
- */
-typedef struct {
- usbif_priv_t *usbif_priv;
- unsigned long id;
- int nr_pages;
- unsigned short operation;
- int status;
-} pending_req_t;
-
-/*
- * We can't allocate pending_req's in order, since they may complete out of
- * order. We therefore maintain an allocation ring. This ring also indicates
- * when enough work has been passed down -- at that point the allocation ring
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock;
-
-/* NB. We use a different index type to differentiate from shared usb rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
-static void make_response(usbif_priv_t *usbif, unsigned long id,
- unsigned short op, int st, int inband,
- unsigned long actual_length);
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port);
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
-static owned_port_t *usbif_find_port(char *);
-
-/******************************************************************
- * PRIVATE DEBUG FUNCTIONS
- */
-
-#undef DEBUG
-#ifdef DEBUG
-
-static void dump_port(owned_port_t *p)
-{
- printk(KERN_DEBUG "owned_port_t @ %p\n"
- " usbif_priv @ %p\n"
- " path: %s\n"
- " guest_port: %d\n"
- " guest_address: %ld\n"
- " dev_present: %d\n"
- " dev @ %p\n"
- " ifaces: 0x%lx\n",
- p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
- p->dev_present, p->dev, p->ifaces);
-}
-
-
-static void dump_request(usbif_request_t *req)
-{
- printk(KERN_DEBUG "id = 0x%lx\n"
- "devnum %d\n"
- "endpoint 0x%x\n"
- "direction %d\n"
- "speed %d\n"
- "pipe_type 0x%x\n"
- "transfer_buffer 0x%lx\n"
- "length 0x%lx\n"
- "transfer_flags 0x%lx\n"
- "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
- "iso_schedule = 0x%lx\n"
- "num_iso %ld\n",
- req->id, req->devnum, req->endpoint, req->direction, req->speed,
- req->pipe_type, req->transfer_buffer, req->length,
- req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
- req->setup[3], req->setup[4], req->setup[5], req->setup[6],
- req->setup[7], req->iso_schedule, req->num_iso);
-}
-
-static void dump_urb(struct urb *urb)
-{
- printk(KERN_DEBUG "dumping urb @ %p\n", urb);
-
-#define DUMP_URB_FIELD(name, format) \
- printk(KERN_DEBUG " " # name " " format "\n", urb-> name)
-
- DUMP_URB_FIELD(pipe, "0x%x");
- DUMP_URB_FIELD(status, "%d");
- DUMP_URB_FIELD(transfer_flags, "0x%x");
- DUMP_URB_FIELD(transfer_buffer, "%p");
- DUMP_URB_FIELD(transfer_buffer_length, "%d");
- DUMP_URB_FIELD(actual_length, "%d");
-}
-
-static void dump_response(usbif_response_t *resp)
-{
- printk(KERN_DEBUG "usbback: Sending response:\n"
- " id = 0x%x\n"
- " op = %d\n"
- " status = %d\n"
- " data = %d\n"
- " length = %d\n",
- resp->id, resp->op, resp->status, resp->data, resp->length);
-}
-
-#else /* DEBUG */
-
-#define dump_port(blah) ((void)0)
-#define dump_request(blah) ((void)0)
-#define dump_urb(blah) ((void)0)
-#define dump_response(blah) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************
- * MEMORY MANAGEMENT
- */
-
-static void fast_flush_area(int idx, int nr_pages)
-{
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
- int i;
-
- for ( i = 0; i < nr_pages; i++ )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping;
- mcl[i].args[0] = MMAP_VADDR(idx, i);
- mcl[i].args[1] = 0;
- mcl[i].args[2] = 0;
- }
-
- mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
- if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
- BUG();
-}
-
-
-/******************************************************************
- * USB INTERFACE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head usbio_schedule_list;
-static spinlock_t usbio_schedule_list_lock;
-
-static int __on_usbif_list(usbif_priv_t *up)
-{
- return up->usbif_list.next != NULL;
-}
-
-void remove_from_usbif_list(usbif_priv_t *up)
-{
- unsigned long flags;
- if ( !__on_usbif_list(up) ) return;
- spin_lock_irqsave(&usbio_schedule_list_lock, flags);
- if ( __on_usbif_list(up) )
- {
- list_del(&up->usbif_list);
- up->usbif_list.next = NULL;
- usbif_put(up);
- }
- spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-static void add_to_usbif_list_tail(usbif_priv_t *up)
-{
- unsigned long flags;
- if ( __on_usbif_list(up) ) return;
- spin_lock_irqsave(&usbio_schedule_list_lock, flags);
- if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
- {
- list_add_tail(&up->usbif_list, &usbio_schedule_list);
- usbif_get(up);
- }
- spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
-}
-
-void free_pending(int pending_idx)
-{
- unsigned long flags;
-
- /* Free the pending request. */
- spin_lock_irqsave(&pend_prod_lock, flags);
- pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
- spin_unlock_irqrestore(&pend_prod_lock, flags);
-}
-
-/******************************************************************
- * COMPLETION CALLBACK -- Called as urb->complete()
- */
-
-static void maybe_trigger_usbio_schedule(void);
-
-static void __end_usb_io_op(struct urb *purb)
-{
- pending_req_t *pending_req;
- int pending_idx;
-
- pending_req = purb->context;
-
- pending_idx = pending_req - pending_reqs;
-
- ASSERT(purb->actual_length <= purb->transfer_buffer_length);
- ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
-
- /* An error fails the entire request. */
- if ( purb->status )
- {
- printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status);
- }
-
- if ( usb_pipetype(purb->pipe) == 0 )
- {
- int i;
- usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1);
-
- /* If we're dealing with an iso pipe, we need to copy back the schedule. */
- for ( i = 0; i < purb->number_of_packets; i++ )
- {
- sched[i].length = purb->iso_frame_desc[i].actual_length;
- ASSERT(sched[i].buffer_offset ==
- purb->iso_frame_desc[i].offset);
- sched[i].status = purb->iso_frame_desc[i].status;
- }
- }
-
- fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
-
- kfree(purb->setup_packet);
-
- make_response(pending_req->usbif_priv, pending_req->id,
- pending_req->operation, pending_req->status, 0, purb->actual_length);
- usbif_put(pending_req->usbif_priv);
-
- usb_free_urb(purb);
-
- free_pending(pending_idx);
-
- rmb();
-
- /* Check for anything still waiting in the rings, having freed a request... */
- maybe_trigger_usbio_schedule();
-}
-
-/******************************************************************
- * SCHEDULER FUNCTIONS
- */
-
-static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
-
-static int usbio_schedule(void *arg)
-{
- DECLARE_WAITQUEUE(wq, current);
-
- usbif_priv_t *up;
- struct list_head *ent;
-
- daemonize();
-
- for ( ; ; )
- {
- /* Wait for work to do. */
- add_wait_queue(&usbio_schedule_wait, &wq);
- set_current_state(TASK_INTERRUPTIBLE);
- if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
- list_empty(&usbio_schedule_list) )
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&usbio_schedule_wait, &wq);
-
- /* Queue up a batch of requests. */
- while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
- !list_empty(&usbio_schedule_list) )
- {
- ent = usbio_schedule_list.next;
- up = list_entry(ent, usbif_priv_t, usbif_list);
- usbif_get(up);
- remove_from_usbif_list(up);
- if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
- add_to_usbif_list_tail(up);
- usbif_put(up);
- }
- }
-}
-
-static void maybe_trigger_usbio_schedule(void)
-{
- /*
- * Needed so that two processes, who together make the following predicate
- * true, don't both read stale values and evaluate the predicate
- * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
- */
- smp_mb();
-
- if ( !list_empty(&usbio_schedule_list) )
- wake_up(&usbio_schedule_wait);
-}
-
-
-/******************************************************************************
- * NOTIFICATION FROM GUEST OS.
- */
-
-irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
-{
- usbif_priv_t *up = dev_id;
-
- smp_mb();
-
- add_to_usbif_list_tail(up);
-
- /* Will in fact /always/ trigger an io schedule in this case. */
- maybe_trigger_usbio_schedule();
-
- return IRQ_HANDLED;
-}
-
-
-
-/******************************************************************
- * DOWNWARD CALLS -- These interface with the usb-device layer proper.
- */
-
-static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
-{
- usbif_back_ring_t *usb_ring = &up->usb_ring;
- usbif_request_t *req;
- RING_IDX i, rp;
- int more_to_do = 0;
-
- rp = usb_ring->sring->req_prod;
- rmb(); /* Ensure we see queued requests up to 'rp'. */
-
- /* Take items off the comms ring, taking care not to overflow. */
- for ( i = usb_ring->req_cons;
- (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
- i++ )
- {
- if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
- {
- more_to_do = 1;
- break;
- }
-
- req = RING_GET_REQUEST(usb_ring, i);
-
- switch ( req->operation )
- {
- case USBIF_OP_PROBE:
- dispatch_usb_probe(up, req->id, req->port);
- break;
-
- case USBIF_OP_IO:
- /* Assemble an appropriate URB. */
- dispatch_usb_io(up, req);
- break;
-
- case USBIF_OP_RESET:
- dispatch_usb_reset(up, req->port);
- break;
-
- default:
- DPRINTK("error: unknown USB io operation [%d]\n",
- req->operation);
- make_response(up, req->id, req->operation, -EINVAL, 0, 0);
- break;
- }
- }
-
- usb_ring->req_cons = i;
-
- return more_to_do;
-}
-
-static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
-{
- unsigned long flags;
- struct list_head *l;
-
- spin_lock_irqsave(&owned_ports_lock, flags);
- list_for_each(l, &owned_ports)
- {
- owned_port_t *p = list_entry(l, owned_port_t, list);
- if(p->usbif_priv == up && p->guest_port == port)
- {
- spin_unlock_irqrestore(&owned_ports_lock, flags);
- return p;
- }
- }
- spin_unlock_irqrestore(&owned_ports_lock, flags);
-
- return NULL;
-}
-
-static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
-{
- owned_port_t *port = find_guest_port(up, portid);
- int ret = 0;
-
-
- /* Allowing the guest to actually reset the device causes more problems
- * than it's worth. We just fake it out in software but we will do a real
- * reset when the interface is destroyed. */
-
- dump_port(port);
-
- port->guest_address = 0;
- /* If there's an attached device then the port is now enabled. */
- if ( port->dev_present )
- port->enabled = 1;
- else
- port->enabled = 0;
-
- make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
-}
-
-static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid)
-{
- owned_port_t *port = find_guest_port(up, portid);
- int ret;
-
- if ( port != NULL )
- ret = port->dev_present;
- else
- {
- ret = -EINVAL;
- printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
- "(port %ld)\n", portid);
- }
-
- /* Probe result is sent back in-band. Probes don't have an associated id
- * right now... */
- make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
-}
-
-/**
- * check_iso_schedule - safety check the isochronous schedule for an URB
- * @purb : the URB in question
- */
-static int check_iso_schedule(struct urb *purb)
-{
- int i;
- unsigned long total_length = 0;
-
- for ( i = 0; i < purb->number_of_packets; i++ )
- {
- struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
-
- if ( desc->offset >= purb->transfer_buffer_length
- || ( desc->offset + desc->length) > purb->transfer_buffer_length )
- return -EINVAL;
-
- total_length += desc->length;
-
- if ( total_length > purb->transfer_buffer_length )
- return -EINVAL;
- }
-
- return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
-
-static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
-{
- unsigned long buffer_mach;
- int i = 0, offset = 0,
- pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
- pending_req_t *pending_req;
- unsigned long remap_prot;
- multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
- struct urb *purb = NULL;
- owned_port_t *port;
- unsigned char *setup;
-
- dump_request(req);
-
- if ( NR_PENDING_REQS == MAX_PENDING_REQS )
- {
- printk(KERN_WARNING "usbback: Max requests already queued. "
- "Giving up!\n");
-
- return;
- }
-
- port = find_port_for_request(up, req);
-
- if ( port == NULL )
- {
- printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
- dump_request(req);
-
- make_response(up, req->id, req->operation, -ENODEV, 0, 0);
- return;
- }
- else if ( !port->dev_present )
- {
- /* In normal operation, we'll only get here if a device is unplugged
- * and the frontend hasn't noticed yet. */
- make_response(up, req->id, req->operation, -ENODEV, 0, 0);
- return;
- }
-
-
- setup = kmalloc(8, GFP_KERNEL);
-
- if ( setup == NULL )
- goto no_mem;
-
- /* Copy request out for safety. */
- memcpy(setup, req->setup, 8);
-
- if( setup[0] == 0x0 && setup[1] == 0x5)
- {
- /* To virtualise the USB address space, we need to intercept
- * set_address messages and emulate. From the USB specification:
- * bmRequestType = 0x0;
- * Brequest = SET_ADDRESS (i.e. 0x5)
- * wValue = device address
- * wIndex = 0
- * wLength = 0
- * data = None
- */
- /* Store into the guest transfer buffer using cpu_to_le16 */
- port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
- /* Make a successful response. That was easy! */
-
- make_response(up, req->id, req->operation, 0, 0, 0);
-
- kfree(setup);
- return;
- }
- else if ( setup[0] == 0x0 && setup[1] == 0x9 )
- {
- /* The host kernel needs to know what device configuration is in use
- * because various error checks get confused otherwise. We just do
- * configuration settings here, under controlled conditions.
- */
-
- /* Ignore configuration setting and hope that the host kernel
- did it right. */
- /* usb_set_configuration(port->dev, setup[2]); */
-
- make_response(up, req->id, req->operation, 0, 0, 0);
-
- kfree(setup);
- return;
- }
- else if ( setup[0] == 0x1 && setup[1] == 0xB )
- {
- /* The host kernel needs to know what device interface is in use
- * because various error checks get confused otherwise. We just do
- * configuration settings here, under controlled conditions.
- */
- usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
- (setup[2] | setup[3] << 8) );
-
- make_response(up, req->id, req->operation, 0, 0, 0);
-
- kfree(setup);
- return;
- }
-
- if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
- + req->length )
- > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
- {
- printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
- req->length);
- make_response(up, req->id, req->operation, -EINVAL, 0, 0);
- kfree(setup);
- return;
- }
-
- buffer_mach = req->transfer_buffer;
-
- if( buffer_mach == 0 )
- goto no_remap;
-
- ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
- ASSERT(buffer_mach);
-
- /* Always map writeable for now. */
- remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
-
- for ( i = 0, offset = 0; offset < req->length;
- i++, offset += PAGE_SIZE )
- {
- mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
- mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
- mcl[i].args[2] = 0;
- mcl[i].args[3] = up->domid;
-
- phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
-
- ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
- == buffer_mach + i << PAGE_SHIFT);
- }
-
- if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
- {
- /* Map in ISO schedule, if necessary. */
- mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
- mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
- mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
- mcl[i].args[2] = 0;
- mcl[i].args[3] = up->domid;
-
- phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
-
- i++;
- }
-
- if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
- BUG();
-
- {
- int j;
- for ( j = 0; j < i; j++ )
- {
- if ( unlikely(mcl[j].args[5] != 0) )
- {
- printk(KERN_WARNING
- "invalid buffer %d -- could not remap it\n", j);
- fast_flush_area(pending_idx, i);
- goto bad_descriptor;
- }
- }
- }
-
- no_remap:
-
- ASSERT(i <= MMAP_PAGES_PER_REQUEST);
- ASSERT(i * PAGE_SIZE >= req->length);
-
- /* We have to do this because some things might complete out of order. */
- pending_req = &pending_reqs[pending_idx];
- pending_req->usbif_priv= up;
- pending_req->id = req->id;
- pending_req->operation = req->operation;
- pending_req->nr_pages = i;
-
- pending_cons++;
-
- usbif_get(up);
-
- /* Fill out an actual request for the USB layer. */
- purb = usb_alloc_urb(req->num_iso);
-
- if ( purb == NULL )
- {
- usbif_put(up);
- free_pending(pending_idx);
- goto no_mem;
- }
-
- purb->dev = port->dev;
- purb->context = pending_req;
- purb->transfer_buffer =
- (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
- if(buffer_mach == 0)
- purb->transfer_buffer = NULL;
- purb->complete = __end_usb_io_op;
- purb->transfer_buffer_length = req->length;
- purb->transfer_flags = req->transfer_flags;
-
- purb->pipe = 0;
- purb->pipe |= req->direction << 7;
- purb->pipe |= port->dev->devnum << 8;
- purb->pipe |= req->speed << 26;
- purb->pipe |= req->pipe_type << 30;
- purb->pipe |= req->endpoint << 15;
-
- purb->number_of_packets = req->num_iso;
-
- if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
- goto urb_error;
-
- /* Make sure there's always some kind of timeout. */
- purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
- : 1000;
-
- purb->setup_packet = setup;
-
- if ( req->pipe_type == 0 ) /* ISO */
- {
- int j;
- usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
-
- /* If we're dealing with an iso pipe, we need to copy in a schedule. */
- for ( j = 0; j < purb->number_of_packets; j++ )
- {
- purb->iso_frame_desc[j].length = iso_sched[j].length;
- purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
- iso_sched[j].status = 0;
- }
- }
-
- if ( check_iso_schedule(purb) != 0 )
- goto urb_error;
-
- if ( usb_submit_urb(purb) != 0 )
- goto urb_error;
-
- return;
-
- urb_error:
- dump_urb(purb);
- usbif_put(up);
- free_pending(pending_idx);
-
- bad_descriptor:
- kfree ( setup );
- if ( purb != NULL )
- usb_free_urb(purb);
- make_response(up, req->id, req->operation, -EINVAL, 0, 0);
- return;
-
- no_mem:
- if ( setup != NULL )
- kfree(setup);
- make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
- return;
-}
-
-
-
-/******************************************************************
- * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
- */
-
-
-static void make_response(usbif_priv_t *up, unsigned long id,
- unsigned short op, int st, int inband,
- unsigned long length)
-{
- usbif_response_t *resp;
- unsigned long flags;
- usbif_back_ring_t *usb_ring = &up->usb_ring;
-
- /* Place on the response ring for the relevant domain. */
- spin_lock_irqsave(&up->usb_ring_lock, flags);
- resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
- resp->id = id;
- resp->operation = op;
- resp->status = st;
- resp->data = inband;
- resp->length = length;
- wmb(); /* Ensure other side can see the response fields. */
-
- dump_response(resp);
-
- usb_ring->rsp_prod_pvt++;
- RING_PUSH_RESPONSES(usb_ring);
- spin_unlock_irqrestore(&up->usb_ring_lock, flags);
-
- /* Kick the relevant domain. */
- notify_via_evtchn(up->evtchn);
-}
-
-/**
- * usbif_claim_port - claim devices on a port on behalf of guest
- *
- * Once completed, this will ensure that any device attached to that
- * port is claimed by this driver for use by the guest.
- */
-int usbif_claim_port(usbif_be_claim_port_t *msg)
-{
- owned_port_t *o_p;
-
- /* Sanity... */
- if ( usbif_find_port(msg->path) != NULL )
- {
- printk(KERN_WARNING "usbback: Attempted to claim USB port "
- "we already own!\n");
- return -EINVAL;
- }
-
- /* No need for a slab cache - this should be infrequent. */
- o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
-
- if ( o_p == NULL )
- return -ENOMEM;
-
- o_p->enabled = 0;
- o_p->usbif_priv = usbif_find(msg->domid);
- o_p->guest_port = msg->usbif_port;
- o_p->dev_present = 0;
- o_p->guest_address = 0; /* Default address. */
-
- strcpy(o_p->path, msg->path);
-
- spin_lock_irq(&owned_ports_lock);
-
- list_add(&o_p->list, &owned_ports);
-
- spin_unlock_irq(&owned_ports_lock);
-
- printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
- msg->domid, msg->usbif_port);
-
- /* Force a reprobe for unclaimed devices. */
- usb_scan_devices();
-
- return 0;
-}
-
-owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
-{
- unsigned long flags;
- struct list_head *port;
-
- /* I'm assuming this is not called from IRQ context - correct? I think
- * it's probably only called in response to control messages or plug events
- * in the USB hub kernel thread, so should be OK. */
- spin_lock_irqsave(&owned_ports_lock, flags);
- list_for_each(port, &owned_ports)
- {
- owned_port_t *p = list_entry(port, owned_port_t, list);
- if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled )
- {
- dump_port(p);
-
- spin_unlock_irqrestore(&owned_ports_lock, flags);
- return p;
- }
- }
- spin_unlock_irqrestore(&owned_ports_lock, flags);
-
- return NULL;
-}
-
-owned_port_t *__usbif_find_port(char *path)
-{
- struct list_head *port;
-
- list_for_each(port, &owned_ports)
- {
- owned_port_t *p = list_entry(port, owned_port_t, list);
- if(!strcmp(path, p->path))
- {
- return p;
- }
- }
-
- return NULL;
-}
-
-owned_port_t *usbif_find_port(char *path)
-{
- owned_port_t *ret;
- unsigned long flags;
-
- spin_lock_irqsave(&owned_ports_lock, flags);
- ret = __usbif_find_port(path);
- spin_unlock_irqrestore(&owned_ports_lock, flags);
-
- return ret;
-}
-
-
-static void *probe(struct usb_device *dev, unsigned iface,
- const struct usb_device_id *id)
-{
- owned_port_t *p;
-
- /* We don't care what the device is - if we own the port, we want it. We
- * don't deal with device-specifics in this driver, so we don't care what
- * the device actually is ;-) */
- if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
- {
- printk(KERN_INFO "usbback: claimed device attached to owned port\n");
-
- p->dev_present = 1;
- p->dev = dev;
- set_bit(iface, &p->ifaces);
-
- return p->usbif_priv;
- }
- else
- printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n",
- dev->devpath);
-
-
- return NULL;
-}
-
-static void disconnect(struct usb_device *dev, void *usbif)
-{
- /* Note the device is removed so we can tell the guest when it probes. */
- owned_port_t *port = usbif_find_port(dev->devpath);
- port->dev_present = 0;
- port->dev = NULL;
- port->ifaces = 0;
-}
-
-
-struct usb_driver driver =
-{
- .owner = THIS_MODULE,
- .name = "Xen USB Backend",
- .probe = probe,
- .disconnect = disconnect,
- .id_table = NULL,
-};
-
-/* __usbif_release_port - internal mechanics for releasing a port */
-void __usbif_release_port(owned_port_t *p)
-{
- int i;
-
- for ( i = 0; p->ifaces != 0; i++)
- if ( p->ifaces & 1 << i )
- {
- usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
- clear_bit(i, &p->ifaces);
- }
- list_del(&p->list);
-
- /* Reset the real device. We don't simulate disconnect / probe for other
- * drivers in this kernel because we assume the device is completely under
- * the control of ourselves (i.e. the guest!). This should ensure that the
- * device is in a sane state for the next customer ;-) */
-
- /* MAW NB: we're not resetting the real device here. This looks perfectly
- * valid to me but it causes memory corruption. We seem to get away with not
- * resetting for now, although it'd be nice to have this tracked down. */
-/* if ( p->dev != NULL) */
-/* usb_reset_device(p->dev); */
-
- kfree(p);
-}
-
-
-/**
- * usbif_release_port - stop claiming devices on a port on behalf of guest
- */
-void usbif_release_port(usbif_be_release_port_t *msg)
-{
- owned_port_t *p;
-
- spin_lock_irq(&owned_ports_lock);
- p = __usbif_find_port(msg->path);
- __usbif_release_port(p);
- spin_unlock_irq(&owned_ports_lock);
-}
-
-void usbif_release_ports(usbif_priv_t *up)
-{
- struct list_head *port, *tmp;
- unsigned long flags;
-
- spin_lock_irqsave(&owned_ports_lock, flags);
- list_for_each_safe(port, tmp, &owned_ports)
- {
- owned_port_t *p = list_entry(port, owned_port_t, list);
- if ( p->usbif_priv == up )
- __usbif_release_port(p);
- }
- spin_unlock_irqrestore(&owned_ports_lock, flags);
-}
-
-static int __init usbif_init(void)
-{
- int i;
-
- if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
- !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
- return 0;
-
- if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
- BUG();
-
- pending_cons = 0;
- pending_prod = MAX_PENDING_REQS;
- memset(pending_reqs, 0, sizeof(pending_reqs));
- for ( i = 0; i < MAX_PENDING_REQS; i++ )
- pending_ring[i] = i;
-
- spin_lock_init(&pend_prod_lock);
-
- spin_lock_init(&owned_ports_lock);
- INIT_LIST_HEAD(&owned_ports);
-
- spin_lock_init(&usbio_schedule_list_lock);
- INIT_LIST_HEAD(&usbio_schedule_list);
-
- if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
- BUG();
-
- usbif_interface_init();
-
- usbif_ctrlif_init();
-
- usb_register(&driver);
-
- printk(KERN_INFO "Xen USB Backend Initialised");
-
- return 0;
-}
-
-__initcall(usbif_init);
+++ /dev/null
-/*
- * Xen Virtual USB Frontend Driver
- *
- * This file contains the first version of the Xen virtual USB hub
- * that I've managed not to delete by mistake (3rd time lucky!).
- *
- * Based on Linux's uhci.c, original copyright notices are displayed
- * below. Portions also (c) 2004 Intel Research Cambridge
- * and (c) 2004, 2005 Mark Williamson
- *
- * Contact <mark.williamson@cl.cam.ac.uk> or
- * <xen-devel@lists.sourceforge.net> regarding this code.
- *
- * Still to be (maybe) implemented:
- * - migration / backend restart support?
- * - support for building / using as a module
- */
-
-/*
- * Universal Host Controller Interface driver for USB.
- *
- * Maintainer: Johannes Erdfelt <johannes@erdfelt.com>
- *
- * (C) Copyright 1999 Linus Torvalds
- * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com
- * (C) Copyright 1999 Randy Dunlap
- * (C) Copyright 1999 Georg Acher, acher@in.tum.de
- * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de
- * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch
- * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at
- * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
- * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
- * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- *
- * Intel documents this fairly well, and as far as I know there
- * are no royalties or anything like that, but even so there are
- * people who decided that they want to do the same thing in a
- * completely different way.
- *
- * WARNING! The USB documentation is downright evil. Most of it
- * is just crap, written by a committee. You're better off ignoring
- * most of it, the important stuff is:
- * - the low-level protocol (fairly simple but lots of small details)
- * - working around the horridness of the rest
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#ifdef CONFIG_USB_DEBUG
-#define DEBUG
-#else
-#undef DEBUG
-#endif
-#include <linux/usb.h>
-
-#include <asm/irq.h>
-#include <asm/system.h>
-
-#include "xhci.h"
-
-#include "../../../../../drivers/usb/hcd.h"
-
-#include <asm-xen/xen-public/io/usbif.h>
-#include <asm/ctrl_if.h>
-#include <asm/xen-public/io/domain_controller.h>
-
-/*
- * Version Information
- */
-#define DRIVER_VERSION "v1.0"
-#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
- "Randy Dunlap, Georg Acher, Deti Fliegl, " \
- "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
-#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
-
-/*
- * debug = 0, no debugging messages
- * debug = 1, dump failed URB's except for stalls
- * debug = 2, dump all failed URB's (including stalls)
- */
-#ifdef DEBUG
-static int debug = 1;
-#else
-static int debug = 0;
-#endif
-MODULE_PARM(debug, "i");
-MODULE_PARM_DESC(debug, "Debug level");
-static char *errbuf;
-#define ERRBUF_LEN (PAGE_SIZE * 8)
-
-static int rh_submit_urb(struct urb *urb);
-static int rh_unlink_urb(struct urb *urb);
-static int xhci_unlink_urb(struct urb *urb);
-static void xhci_call_completion(struct urb *urb);
-static void xhci_drain_ring(void);
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
-static void xhci_finish_completion(void);
-
-#define MAX_URB_LOOP 2048 /* Maximum number of linked URB's */
-
-static kmem_cache_t *xhci_up_cachep; /* urb_priv cache */
-static struct xhci *xhci; /* XHCI structure for the interface */
-
-/******************************************************************************
- * DEBUGGING
- */
-
-#ifdef DEBUG
-
-static void dump_urb(struct urb *urb)
-{
- printk(KERN_DEBUG "dumping urb @ %p\n"
- " hcpriv = %p\n"
- " next = %p\n"
- " dev = %p\n"
- " pipe = 0x%lx\n"
- " status = %d\n"
- " transfer_flags = 0x%lx\n"
- " transfer_buffer = %p\n"
- " transfer_buffer_length = %d\n"
- " actual_length = %d\n"
- " bandwidth = %d\n"
- " setup_packet = %p\n",
- urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
- urb->transfer_flags, urb->transfer_buffer,
- urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
- urb->setup_packet);
- if ( urb->setup_packet != NULL )
- printk(KERN_DEBUG
- "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
- urb->setup_packet[0], urb->setup_packet[1],
- urb->setup_packet[2], urb->setup_packet[3],
- urb->setup_packet[4], urb->setup_packet[5],
- urb->setup_packet[6], urb->setup_packet[7]);
- printk(KERN_DEBUG "complete = %p\n"
- "interval = %d\n", urb->complete, urb->interval);
-
-}
-
-static void xhci_show_resp(usbif_response_t *r)
-{
- printk(KERN_DEBUG "dumping response @ %p\n"
- " id=0x%lx\n"
- " op=0x%x\n"
- " data=0x%x\n"
- " status=0x%x\n"
- " length=0x%lx\n",
- r->id, r->operation, r->data, r->status, r->length);
-}
-
-#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
-
-#else /* DEBUG */
-
-#define dump_urb(blah) ((void)0)
-#define xhci_show_resp(blah) ((void)0)
-#define DPRINTK(blah,...) ((void)0)
-
-#endif /* DEBUG */
-
-/******************************************************************************
- * RING REQUEST HANDLING
- */
-
-/**
- * xhci_construct_isoc - add isochronous information to a request
- */
-static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
-{
- usbif_iso_t *schedule;
- int i;
- struct urb_priv *urb_priv = urb->hcpriv;
-
- req->num_iso = urb->number_of_packets;
- schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
-
- if ( schedule == NULL )
- return -ENOMEM;
-
- for ( i = 0; i < req->num_iso; i++ )
- {
- schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
- schedule[i].length = urb->iso_frame_desc[i].length;
- }
-
- urb_priv->schedule = schedule;
- req->iso_schedule = virt_to_machine(schedule);
-
- return 0;
-}
-
-/**
- * xhci_queue_req - construct and queue request for an URB
- */
-static int xhci_queue_req(struct urb *urb)
-{
- usbif_request_t *req;
- usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
- printk(KERN_DEBUG
- "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n",
- usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
- usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-
-
- if ( RING_FULL(usb_ring) )
- {
- printk(KERN_WARNING
- "xhci_queue_req(): USB ring full, not queuing request\n");
- return -ENOBUFS;
- }
-
- /* Stick something in the shared communications ring. */
- req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
- req->operation = USBIF_OP_IO;
- req->port = 0; /* We don't care what the port is. */
- req->id = (unsigned long) urb->hcpriv;
- req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
- req->devnum = usb_pipedevice(urb->pipe);
- req->direction = usb_pipein(urb->pipe);
- req->speed = usb_pipeslow(urb->pipe);
- req->pipe_type = usb_pipetype(urb->pipe);
- req->length = urb->transfer_buffer_length;
- req->transfer_flags = urb->transfer_flags;
- req->endpoint = usb_pipeendpoint(urb->pipe);
- req->speed = usb_pipeslow(urb->pipe);
- req->timeout = urb->timeout * (1000 / HZ);
-
- if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
- {
- int ret = xhci_construct_isoc(req, urb);
- if ( ret != 0 )
- return ret;
- }
-
- if(urb->setup_packet != NULL)
- memcpy(req->setup, urb->setup_packet, 8);
- else
- memset(req->setup, 0, 8);
-
- usb_ring->req_prod_pvt++;
- RING_PUSH_REQUESTS(usb_ring);
-
- notify_via_evtchn(xhci->evtchn);
-
- DPRINTK("Queued request for an URB.\n");
- dump_urb(urb);
-
- return -EINPROGRESS;
-}
-
-/**
- * xhci_queue_probe - queue a probe request for a particular port
- */
-static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
-{
- usbif_request_t *req;
- usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
-#if DEBUG
- printk(KERN_DEBUG
- "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
- "resp_cons = %d\n", usbif->req_prod,
- virt_to_machine(&usbif->req_prod),
- usbif->resp_prod, xhci->usb_resp_cons);
-#endif
-
- if ( RING_FULL(usb_ring) )
- {
- printk(KERN_WARNING
- "xhci_queue_probe(): ring full, not queuing request\n");
- return NULL;
- }
-
- /* Stick something in the shared communications ring. */
- req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
- memset(req, sizeof(*req), 0);
-
- req->operation = USBIF_OP_PROBE;
- req->port = port;
-
- usb_ring->req_prod_pvt++;
- RING_PUSH_REQUESTS(usb_ring);
-
- notify_via_evtchn(xhci->evtchn);
-
- return req;
-}
-
-/**
- * xhci_port_reset - queue a reset request for a particular port
- */
-static int xhci_port_reset(usbif_vdev_t port)
-{
- usbif_request_t *req;
- usbif_front_ring_t *usb_ring = &xhci->usb_ring;
-
- /* We only reset one port at a time, so we only need one variable per
- * hub. */
- xhci->awaiting_reset = 1;
-
- /* Stick something in the shared communications ring. */
- req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
-
- memset(req, sizeof(*req), 0);
-
- req->operation = USBIF_OP_RESET;
- req->port = port;
-
- usb_ring->req_prod_pvt++;
- RING_PUSH_REQUESTS(usb_ring);
-
- notify_via_evtchn(xhci->evtchn);
-
- while ( xhci->awaiting_reset > 0 )
- {
- mdelay(1);
- xhci_drain_ring();
- }
-
- xhci->rh.ports[port].pe = 1;
- xhci->rh.ports[port].pe_chg = 1;
-
- return xhci->awaiting_reset;
-}
-
-
-/******************************************************************************
- * RING RESPONSE HANDLING
- */
-
-static void receive_usb_reset(usbif_response_t *resp)
-{
- xhci->awaiting_reset = resp->status;
- rmb();
-
-}
-
-static void receive_usb_probe(usbif_response_t *resp)
-{
- spin_lock(&xhci->rh.port_state_lock);
-
- if ( resp->status > 0 )
- {
- if ( resp->status == 1 )
- {
- /* If theres a device there and there wasn't one before there must
- * have been a connection status change. */
- if( xhci->rh.ports[resp->data].cs == 0 )
- {
- xhci->rh.ports[resp->data].cs = 1;
- xhci->rh.ports[resp->data].ccs = 1;
- xhci->rh.ports[resp->data].cs_chg = 1;
- }
- }
- else
- printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
- "for port %d\n", resp->status, resp->data);
- }
- else if ( resp->status < 0)
- printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
- resp->status);
-
- spin_unlock(&xhci->rh.port_state_lock);
-}
-
-static void receive_usb_io(usbif_response_t *resp)
-{
- struct urb_priv *urbp = (struct urb_priv *)resp->id;
- struct urb *urb = urbp->urb;
-
- urb->actual_length = resp->length;
- urbp->in_progress = 0;
-
- if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
- {
- int i;
-
- /* Copy ISO schedule results back in. */
- for ( i = 0; i < urb->number_of_packets; i++ )
- {
- urb->iso_frame_desc[i].status
- = urbp->schedule[i].status;
- urb->iso_frame_desc[i].actual_length
- = urbp->schedule[i].length;
- }
- free_page((unsigned long)urbp->schedule);
- }
-
- /* Only set status if it's not been changed since submission. It might
- * have been changed if the URB has been unlinked asynchronously, for
- * instance. */
- if ( urb->status == -EINPROGRESS )
- urbp->status = urb->status = resp->status;
-}
-
-/**
- * xhci_drain_ring - drain responses from the ring, calling handlers
- *
- * This may be called from interrupt context when an event is received from the
- * backend domain, or sometimes in process context whilst waiting for a port
- * reset or URB completion.
- */
-static void xhci_drain_ring(void)
-{
- struct list_head *tmp, *head;
- usbif_front_ring_t *usb_ring = &xhci->usb_ring;
- usbif_response_t *resp;
- RING_IDX i, rp;
-
- /* Walk the ring here to get responses, updating URBs to show what
- * completed. */
-
- rp = usb_ring->sring->rsp_prod;
- rmb(); /* Ensure we see queued requests up to 'rp'. */
-
- /* Take items off the comms ring, taking care not to overflow. */
- for ( i = usb_ring->rsp_cons; i != rp; i++ )
- {
- resp = RING_GET_RESPONSE(usb_ring, i);
-
- /* May need to deal with batching and with putting a ceiling on
- the number dispatched for performance and anti-dos reasons */
-
- xhci_show_resp(resp);
-
- switch ( resp->operation )
- {
- case USBIF_OP_PROBE:
- receive_usb_probe(resp);
- break;
-
- case USBIF_OP_IO:
- receive_usb_io(resp);
- break;
-
- case USBIF_OP_RESET:
- receive_usb_reset(resp);
- break;
-
- default:
- printk(KERN_WARNING
- "error: unknown USB io operation response [%d]\n",
- resp->operation);
- break;
- }
- }
-
- usb_ring->rsp_cons = i;
-
- /* Walk the list of pending URB's to see which ones completed and do
- * callbacks, etc. */
- spin_lock(&xhci->urb_list_lock);
- head = &xhci->urb_list;
- tmp = head->next;
- while (tmp != head) {
- struct urb *urb = list_entry(tmp, struct urb, urb_list);
-
- tmp = tmp->next;
-
- /* Checks the status and does all of the magic necessary */
- xhci_transfer_result(xhci, urb);
- }
- spin_unlock(&xhci->urb_list_lock);
-
- xhci_finish_completion();
-}
-
-
-static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
-{
- xhci_drain_ring();
-}
-
-/******************************************************************************
- * HOST CONTROLLER FUNCTIONALITY
- */
-
-/**
- * no-op implementation of private device alloc / free routines
- */
-static int xhci_do_nothing_dev(struct usb_device *dev)
-{
- return 0;
-}
-
-static inline void xhci_add_complete(struct urb *urb)
-{
- struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
- unsigned long flags;
-
- spin_lock_irqsave(&xhci->complete_list_lock, flags);
- list_add_tail(&urbp->complete_list, &xhci->complete_list);
- spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-/* When this returns, the owner of the URB may free its
- * storage.
- *
- * We spin and wait for the URB to complete before returning.
- *
- * Call with urb->lock acquired.
- */
-static void xhci_delete_urb(struct urb *urb)
-{
- struct urb_priv *urbp;
-
- urbp = urb->hcpriv;
-
- /* If there's no urb_priv structure for this URB then it can't have
- * been submitted at all. */
- if ( urbp == NULL )
- return;
-
- /* For now we just spin until the URB completes. It shouldn't take too
- * long and we don't expect to have to do this very often. */
- while ( urb->status == -EINPROGRESS )
- {
- xhci_drain_ring();
- mdelay(1);
- }
-
- /* Now we know that further transfers to the buffer won't
- * occur, so we can safely return. */
-}
-
-static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
-{
- struct urb_priv *urbp;
-
- urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
- if (!urbp) {
- err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n");
- return NULL;
- }
-
- memset((void *)urbp, 0, sizeof(*urbp));
-
- urbp->inserttime = jiffies;
- urbp->urb = urb;
- urbp->dev = urb->dev;
-
- INIT_LIST_HEAD(&urbp->complete_list);
-
- urb->hcpriv = urbp;
-
- return urbp;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-/* When is this called? Do we need to stop the transfer (as we
- * currently do)? */
-static void xhci_destroy_urb_priv(struct urb *urb)
-{
- struct urb_priv *urbp;
-
- urbp = (struct urb_priv *)urb->hcpriv;
- if (!urbp)
- return;
-
- if (!list_empty(&urb->urb_list))
- warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
-
- if (!list_empty(&urbp->complete_list))
- warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb);
-
- kmem_cache_free(xhci_up_cachep, urb->hcpriv);
-
- urb->hcpriv = NULL;
-}
-
-/**
- * Try to find URBs in progress on the same pipe to the same device.
- *
- * MUST be called with xhci->urb_list_lock acquired
- */
-static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
-{
- struct list_head *tmp, *head;
-
- /* We don't match Isoc transfers since they are special */
- if (usb_pipeisoc(urb->pipe))
- return NULL;
-
- head = &xhci->urb_list;
- tmp = head->next;
- while (tmp != head) {
- struct urb *u = list_entry(tmp, struct urb, urb_list);
-
- tmp = tmp->next;
-
- if (u->dev == urb->dev && u->pipe == urb->pipe &&
- u->status == -EINPROGRESS)
- return u;
- }
-
- return NULL;
-}
-
-static int xhci_submit_urb(struct urb *urb)
-{
- int ret = -EINVAL;
- unsigned long flags;
- struct urb *eurb;
- int bustime;
-
- DPRINTK("URB submitted to XHCI driver.\n");
- dump_urb(urb);
-
- if (!urb)
- return -EINVAL;
-
- if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
- warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb);
- return -ENODEV;
- }
-
- if ( urb->dev->devpath == NULL )
- BUG();
-
- usb_inc_dev_use(urb->dev);
-
- spin_lock_irqsave(&xhci->urb_list_lock, flags);
- spin_lock(&urb->lock);
-
- if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
- urb->status == -ECONNABORTED) {
- dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status);
- /* Since we can have problems on the out path */
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
- usb_dec_dev_use(urb->dev);
-
- return ret;
- }
-
- INIT_LIST_HEAD(&urb->urb_list);
- if (!xhci_alloc_urb_priv(urb)) {
- ret = -ENOMEM;
-
- goto out;
- }
-
- ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
-
- eurb = xhci_find_urb_ep(xhci, urb);
- if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
- ret = -ENXIO;
-
- goto out;
- }
-
- /* Short circuit the virtual root hub */
- if (urb->dev == xhci->rh.dev) {
- ret = rh_submit_urb(urb);
-
- goto out;
- }
-
- switch (usb_pipetype(urb->pipe)) {
- case PIPE_CONTROL:
- case PIPE_BULK:
- ret = xhci_queue_req(urb);
- break;
-
- case PIPE_INTERRUPT:
- if (urb->bandwidth == 0) { /* not yet checked/allocated */
- bustime = usb_check_bandwidth(urb->dev, urb);
- if (bustime < 0)
- ret = bustime;
- else {
- ret = xhci_queue_req(urb);
- if (ret == -EINPROGRESS)
- usb_claim_bandwidth(urb->dev, urb,
- bustime, 0);
- }
- } else /* bandwidth is already set */
- ret = xhci_queue_req(urb);
- break;
-
- case PIPE_ISOCHRONOUS:
- if (urb->bandwidth == 0) { /* not yet checked/allocated */
- if (urb->number_of_packets <= 0) {
- ret = -EINVAL;
- break;
- }
- bustime = usb_check_bandwidth(urb->dev, urb);
- if (bustime < 0) {
- ret = bustime;
- break;
- }
-
- ret = xhci_queue_req(urb);
- if (ret == -EINPROGRESS)
- usb_claim_bandwidth(urb->dev, urb, bustime, 1);
- } else /* bandwidth is already set */
- ret = xhci_queue_req(urb);
- break;
- }
-out:
- urb->status = ret;
-
- if (ret == -EINPROGRESS) {
- /* We use _tail to make find_urb_ep more efficient */
- list_add_tail(&urb->urb_list, &xhci->urb_list);
-
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
- return 0;
- }
-
- xhci_delete_urb(urb);
-
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
- /* Only call completion if it was successful */
- if (!ret)
- xhci_call_completion(urb);
-
- return ret;
-}
-
-/*
- * Return the result of a transfer
- *
- * MUST be called with urb_list_lock acquired
- */
-static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
-{
- int ret = 0;
- unsigned long flags;
- struct urb_priv *urbp;
-
- /* The root hub is special */
- if (urb->dev == xhci->rh.dev)
- return;
-
- spin_lock_irqsave(&urb->lock, flags);
-
- urbp = (struct urb_priv *)urb->hcpriv;
-
- if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
- ret = -EINPROGRESS;
-
- if (urb->actual_length < urb->transfer_buffer_length) {
- if (urb->transfer_flags & USB_DISABLE_SPD) {
- ret = -EREMOTEIO;
- }
- }
-
- if (urb->status == -EPIPE)
- {
- ret = urb->status;
- /* endpoint has stalled - mark it halted */
- usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
- usb_pipeout(urb->pipe));
- }
-
- if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
- (ret != 0 && debug > 1)) {
- /* Some debugging code */
- dbg("xhci_result_interrupt/bulk() failed with status %x",
- status);
- }
-
- if (ret == -EINPROGRESS)
- goto out;
-
- switch (usb_pipetype(urb->pipe)) {
- case PIPE_CONTROL:
- case PIPE_BULK:
- case PIPE_ISOCHRONOUS:
- /* Release bandwidth for Interrupt or Isoc. transfers */
- /* Spinlock needed ? */
- if (urb->bandwidth)
- usb_release_bandwidth(urb->dev, urb, 1);
- xhci_delete_urb(urb);
- break;
- case PIPE_INTERRUPT:
- /* Interrupts are an exception */
- if (urb->interval)
- goto out_complete;
-
- /* Release bandwidth for Interrupt or Isoc. transfers */
- /* Spinlock needed ? */
- if (urb->bandwidth)
- usb_release_bandwidth(urb->dev, urb, 0);
- xhci_delete_urb(urb);
- break;
- default:
- info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
- usb_pipetype(urb->pipe), urb);
- }
-
- /* Remove it from xhci->urb_list */
- list_del_init(&urb->urb_list);
-
-out_complete:
- xhci_add_complete(urb);
-
-out:
- spin_unlock_irqrestore(&urb->lock, flags);
-}
-
-static int xhci_unlink_urb(struct urb *urb)
-{
- unsigned long flags;
- struct urb_priv *urbp = urb->hcpriv;
-
- if (!urb)
- return -EINVAL;
-
- if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
- return -ENODEV;
-
- spin_lock_irqsave(&xhci->urb_list_lock, flags);
- spin_lock(&urb->lock);
-
- /* Release bandwidth for Interrupt or Isoc. transfers */
- /* Spinlock needed ? */
- if (urb->bandwidth) {
- switch (usb_pipetype(urb->pipe)) {
- case PIPE_INTERRUPT:
- usb_release_bandwidth(urb->dev, urb, 0);
- break;
- case PIPE_ISOCHRONOUS:
- usb_release_bandwidth(urb->dev, urb, 1);
- break;
- default:
- break;
- }
- }
-
- if (urb->status != -EINPROGRESS) {
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
- return 0;
- }
-
- list_del_init(&urb->urb_list);
-
- /* Short circuit the virtual root hub */
- if (urb->dev == xhci->rh.dev) {
- rh_unlink_urb(urb);
-
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
- xhci_call_completion(urb);
- } else {
- if (urb->transfer_flags & USB_ASYNC_UNLINK) {
- /* We currently don't currently attempt to cancel URBs
- * that have been queued in the ring. We handle async
- * unlinked URBs when they complete. */
- urbp->status = urb->status = -ECONNABORTED;
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
- } else {
- urb->status = -ENOENT;
-
- spin_unlock(&urb->lock);
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
- if (in_interrupt()) { /* wait at least 1 frame */
- static int errorcount = 10;
-
- if (errorcount--)
- dbg("xhci_unlink_urb called from interrupt for urb %p", urb);
- udelay(1000);
- } else
- schedule_timeout(1+1*HZ/1000);
-
- xhci_delete_urb(urb);
-
- xhci_call_completion(urb);
- }
- }
-
- return 0;
-}
-
-static void xhci_call_completion(struct urb *urb)
-{
- struct urb_priv *urbp;
- struct usb_device *dev = urb->dev;
- int is_ring = 0, killed, resubmit_interrupt, status;
- struct urb *nurb;
- unsigned long flags;
-
- spin_lock_irqsave(&urb->lock, flags);
-
- urbp = (struct urb_priv *)urb->hcpriv;
- if (!urbp || !urb->dev) {
- spin_unlock_irqrestore(&urb->lock, flags);
- return;
- }
-
- killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
- urb->status == -ECONNRESET);
- resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
- urb->interval);
-
- nurb = urb->next;
- if (nurb && !killed) {
- int count = 0;
-
- while (nurb && nurb != urb && count < MAX_URB_LOOP) {
- if (nurb->status == -ENOENT ||
- nurb->status == -ECONNABORTED ||
- nurb->status == -ECONNRESET) {
- killed = 1;
- break;
- }
-
- nurb = nurb->next;
- count++;
- }
-
- if (count == MAX_URB_LOOP)
- err("xhci_call_completion: too many linked URB's, loop? (first loop)");
-
- /* Check to see if chain is a ring */
- is_ring = (nurb == urb);
- }
-
- status = urbp->status;
- if (!resubmit_interrupt || killed)
- /* We don't need urb_priv anymore */
- xhci_destroy_urb_priv(urb);
-
- if (!killed)
- urb->status = status;
-
- spin_unlock_irqrestore(&urb->lock, flags);
-
- if (urb->complete)
- urb->complete(urb);
-
- if (resubmit_interrupt)
- /* Recheck the status. The completion handler may have */
- /* unlinked the resubmitting interrupt URB */
- killed = (urb->status == -ENOENT ||
- urb->status == -ECONNABORTED ||
- urb->status == -ECONNRESET);
-
- if (resubmit_interrupt && !killed) {
- if ( urb->dev != xhci->rh.dev )
- xhci_queue_req(urb); /* XXX What if this fails? */
- /* Don't need to resubmit URBs for the virtual root dev. */
- } else {
- if (is_ring && !killed) {
- urb->dev = dev;
- xhci_submit_urb(urb);
- } else {
- /* We decrement the usage count after we're done */
- /* with everything */
- usb_dec_dev_use(dev);
- }
- }
-}
-
-static void xhci_finish_completion(void)
-{
- struct list_head *tmp, *head;
- unsigned long flags;
-
- spin_lock_irqsave(&xhci->complete_list_lock, flags);
- head = &xhci->complete_list;
- tmp = head->next;
- while (tmp != head) {
- struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
- complete_list);
- struct urb *urb = urbp->urb;
-
- list_del_init(&urbp->complete_list);
- spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-
- xhci_call_completion(urb);
-
- spin_lock_irqsave(&xhci->complete_list_lock, flags);
- head = &xhci->complete_list;
- tmp = head->next;
- }
- spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
-}
-
-static struct usb_operations xhci_device_operations = {
- .allocate = xhci_do_nothing_dev,
- .deallocate = xhci_do_nothing_dev,
- /* It doesn't look like any drivers actually care what the frame number
- * is at the moment! If necessary, we could approximate the current
- * frame nubmer by passing it from the backend in response messages. */
- .get_frame_number = NULL,
- .submit_urb = xhci_submit_urb,
- .unlink_urb = xhci_unlink_urb
-};
-
-/******************************************************************************
- * VIRTUAL ROOT HUB EMULATION
- */
-
-static __u8 root_hub_dev_des[] =
-{
- 0x12, /* __u8 bLength; */
- 0x01, /* __u8 bDescriptorType; Device */
- 0x00, /* __u16 bcdUSB; v1.0 */
- 0x01,
- 0x09, /* __u8 bDeviceClass; HUB_CLASSCODE */
- 0x00, /* __u8 bDeviceSubClass; */
- 0x00, /* __u8 bDeviceProtocol; */
- 0x08, /* __u8 bMaxPacketSize0; 8 Bytes */
- 0x00, /* __u16 idVendor; */
- 0x00,
- 0x00, /* __u16 idProduct; */
- 0x00,
- 0x00, /* __u16 bcdDevice; */
- 0x00,
- 0x00, /* __u8 iManufacturer; */
- 0x02, /* __u8 iProduct; */
- 0x01, /* __u8 iSerialNumber; */
- 0x01 /* __u8 bNumConfigurations; */
-};
-
-
-/* Configuration descriptor */
-static __u8 root_hub_config_des[] =
-{
- 0x09, /* __u8 bLength; */
- 0x02, /* __u8 bDescriptorType; Configuration */
- 0x19, /* __u16 wTotalLength; */
- 0x00,
- 0x01, /* __u8 bNumInterfaces; */
- 0x01, /* __u8 bConfigurationValue; */
- 0x00, /* __u8 iConfiguration; */
- 0x40, /* __u8 bmAttributes;
- Bit 7: Bus-powered, 6: Self-powered,
- Bit 5 Remote-wakeup, 4..0: resvd */
- 0x00, /* __u8 MaxPower; */
-
- /* interface */
- 0x09, /* __u8 if_bLength; */
- 0x04, /* __u8 if_bDescriptorType; Interface */
- 0x00, /* __u8 if_bInterfaceNumber; */
- 0x00, /* __u8 if_bAlternateSetting; */
- 0x01, /* __u8 if_bNumEndpoints; */
- 0x09, /* __u8 if_bInterfaceClass; HUB_CLASSCODE */
- 0x00, /* __u8 if_bInterfaceSubClass; */
- 0x00, /* __u8 if_bInterfaceProtocol; */
- 0x00, /* __u8 if_iInterface; */
-
- /* endpoint */
- 0x07, /* __u8 ep_bLength; */
- 0x05, /* __u8 ep_bDescriptorType; Endpoint */
- 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */
- 0x03, /* __u8 ep_bmAttributes; Interrupt */
- 0x08, /* __u16 ep_wMaxPacketSize; 8 Bytes */
- 0x00,
- 0xff /* __u8 ep_bInterval; 255 ms */
-};
-
-static __u8 root_hub_hub_des[] =
-{
- 0x09, /* __u8 bLength; */
- 0x29, /* __u8 bDescriptorType; Hub-descriptor */
- 0x02, /* __u8 bNbrPorts; */
- 0x00, /* __u16 wHubCharacteristics; */
- 0x00,
- 0x01, /* __u8 bPwrOn2pwrGood; 2ms */
- 0x00, /* __u8 bHubContrCurrent; 0 mA */
- 0x00, /* __u8 DeviceRemovable; *** 7 Ports max *** */
- 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */
-};
-
-/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
-static int rh_send_irq(struct urb *urb)
-{
- struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
- xhci_port_t *ports = xhci->rh.ports;
- unsigned long flags;
- int i, len = 1;
- __u16 data = 0;
-
- spin_lock_irqsave(&urb->lock, flags);
- for (i = 0; i < xhci->rh.numports; i++) {
- /* Set a bit if anything at all has changed on the port, as per
- * USB spec 11.12 */
- data |= (ports[i].cs_chg || ports[i].pe_chg )
- ? (1 << (i + 1))
- : 0;
-
- len = (i + 1) / 8 + 1;
- }
-
- *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
- urb->actual_length = len;
- urbp->status = 0;
-
- spin_unlock_irqrestore(&urb->lock, flags);
-
- if ((data > 0) && (xhci->rh.send != 0)) {
- dbg("root-hub INT complete: data: %x", data);
- xhci_call_completion(urb);
- }
-
- return 0;
-}
-
-/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
-static int rh_init_int_timer(struct urb *urb);
-
-static void rh_int_timer_do(unsigned long ptr)
-{
- struct urb *urb = (struct urb *)ptr;
- struct list_head list, *tmp, *head;
- unsigned long flags;
- int i;
-
- for ( i = 0; i < xhci->rh.numports; i++)
- xhci_queue_probe(i);
-
- if (xhci->rh.send)
- rh_send_irq(urb);
-
- INIT_LIST_HEAD(&list);
-
- spin_lock_irqsave(&xhci->urb_list_lock, flags);
- head = &xhci->urb_list;
- tmp = head->next;
- while (tmp != head) {
- struct urb *u = list_entry(tmp, struct urb, urb_list);
- struct urb_priv *up = (struct urb_priv *)u->hcpriv;
-
- tmp = tmp->next;
-
- spin_lock(&u->lock);
-
- /* Check if the URB timed out */
- if (u->timeout && time_after_eq(jiffies,
- up->inserttime + u->timeout)) {
- list_del(&u->urb_list);
- list_add_tail(&u->urb_list, &list);
- }
-
- spin_unlock(&u->lock);
- }
- spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
-
- head = &list;
- tmp = head->next;
- while (tmp != head) {
- struct urb *u = list_entry(tmp, struct urb, urb_list);
-
- tmp = tmp->next;
-
- u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
- xhci_unlink_urb(u);
- }
-
- rh_init_int_timer(urb);
-}
-
-/* Root Hub INTs are polled by this timer */
-static int rh_init_int_timer(struct urb *urb)
-{
- xhci->rh.interval = urb->interval;
- init_timer(&xhci->rh.rh_int_timer);
- xhci->rh.rh_int_timer.function = rh_int_timer_do;
- xhci->rh.rh_int_timer.data = (unsigned long)urb;
- xhci->rh.rh_int_timer.expires = jiffies
- + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
- add_timer(&xhci->rh.rh_int_timer);
-
- return 0;
-}
-
-#define OK(x) len = (x); break
-
-/* Root Hub Control Pipe */
-static int rh_submit_urb(struct urb *urb)
-{
- unsigned int pipe = urb->pipe;
- struct usb_ctrlrequest *cmd =
- (struct usb_ctrlrequest *)urb->setup_packet;
- void *data = urb->transfer_buffer;
- int leni = urb->transfer_buffer_length;
- int len = 0;
- xhci_port_t *status;
- int stat = 0;
- int i;
- int retstatus;
- unsigned long flags;
-
- __u16 cstatus;
- __u16 bmRType_bReq;
- __u16 wValue;
- __u16 wIndex;
- __u16 wLength;
-
- if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
- xhci->rh.urb = urb;
- xhci->rh.send = 1;
- xhci->rh.interval = urb->interval;
- rh_init_int_timer(urb);
-
- return -EINPROGRESS;
- }
-
- bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
- wValue = le16_to_cpu(cmd->wValue);
- wIndex = le16_to_cpu(cmd->wIndex);
- wLength = le16_to_cpu(cmd->wLength);
-
- for (i = 0; i < 8; i++)
- xhci->rh.c_p_r[i] = 0;
-
- status = &xhci->rh.ports[wIndex - 1];
-
- spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
-
- switch (bmRType_bReq) {
- /* Request Destination:
- without flags: Device,
- RH_INTERFACE: interface,
- RH_ENDPOINT: endpoint,
- RH_CLASS means HUB here,
- RH_OTHER | RH_CLASS almost ever means HUB_PORT here
- */
-
- case RH_GET_STATUS:
- *(__u16 *)data = cpu_to_le16(1);
- OK(2);
- case RH_GET_STATUS | RH_INTERFACE:
- *(__u16 *)data = cpu_to_le16(0);
- OK(2);
- case RH_GET_STATUS | RH_ENDPOINT:
- *(__u16 *)data = cpu_to_le16(0);
- OK(2);
- case RH_GET_STATUS | RH_CLASS:
- *(__u32 *)data = cpu_to_le32(0);
- OK(4); /* hub power */
- case RH_GET_STATUS | RH_OTHER | RH_CLASS:
- cstatus = (status->cs_chg) |
- (status->pe_chg << 1) |
- (xhci->rh.c_p_r[wIndex - 1] << 4);
- retstatus = (status->ccs) |
- (status->pe << 1) |
- (status->susp << 2) |
- (status->pr << 8) |
- (1 << 8) | /* power on */
- (status->lsda << 9);
- *(__u16 *)data = cpu_to_le16(retstatus);
- *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
- OK(4);
- case RH_CLEAR_FEATURE | RH_ENDPOINT:
- switch (wValue) {
- case RH_ENDPOINT_STALL:
- OK(0);
- }
- break;
- case RH_CLEAR_FEATURE | RH_CLASS:
- switch (wValue) {
- case RH_C_HUB_OVER_CURRENT:
- OK(0); /* hub power over current */
- }
- break;
- case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
- switch (wValue) {
- case RH_PORT_ENABLE:
- status->pe = 0;
- OK(0);
- case RH_PORT_SUSPEND:
- status->susp = 0;
- OK(0);
- case RH_PORT_POWER:
- OK(0); /* port power */
- case RH_C_PORT_CONNECTION:
- status->cs_chg = 0;
- OK(0);
- case RH_C_PORT_ENABLE:
- status->pe_chg = 0;
- OK(0);
- case RH_C_PORT_SUSPEND:
- /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
- OK(0);
- case RH_C_PORT_OVER_CURRENT:
- OK(0); /* port power over current */
- case RH_C_PORT_RESET:
- xhci->rh.c_p_r[wIndex - 1] = 0;
- OK(0);
- }
- break;
- case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
- switch (wValue) {
- case RH_PORT_SUSPEND:
- status->susp = 1;
- OK(0);
- case RH_PORT_RESET:
- {
- int ret;
- xhci->rh.c_p_r[wIndex - 1] = 1;
- status->pr = 0;
- status->pe = 1;
- ret = xhci_port_reset(wIndex - 1);
- /* XXX MAW: should probably cancel queued transfers during reset... *\/ */
- if ( ret == 0 ) { OK(0); }
- else { return ret; }
- }
- break;
- case RH_PORT_POWER:
- OK(0); /* port power ** */
- case RH_PORT_ENABLE:
- status->pe = 1;
- OK(0);
- }
- break;
- case RH_SET_ADDRESS:
- xhci->rh.devnum = wValue;
- OK(0);
- case RH_GET_DESCRIPTOR:
- switch ((wValue & 0xff00) >> 8) {
- case 0x01: /* device descriptor */
- len = min_t(unsigned int, leni,
- min_t(unsigned int,
- sizeof(root_hub_dev_des), wLength));
- memcpy(data, root_hub_dev_des, len);
- OK(len);
- case 0x02: /* configuration descriptor */
- len = min_t(unsigned int, leni,
- min_t(unsigned int,
- sizeof(root_hub_config_des), wLength));
- memcpy (data, root_hub_config_des, len);
- OK(len);
- case 0x03: /* string descriptors */
- len = usb_root_hub_string (wValue & 0xff,
- 0, "XHCI-alt",
- data, wLength);
- if (len > 0) {
- OK(min_t(int, leni, len));
- } else
- stat = -EPIPE;
- }
- break;
- case RH_GET_DESCRIPTOR | RH_CLASS:
- root_hub_hub_des[2] = xhci->rh.numports;
- len = min_t(unsigned int, leni,
- min_t(unsigned int, sizeof(root_hub_hub_des), wLength));
- memcpy(data, root_hub_hub_des, len);
- OK(len);
- case RH_GET_CONFIGURATION:
- *(__u8 *)data = 0x01;
- OK(1);
- case RH_SET_CONFIGURATION:
- OK(0);
- case RH_GET_INTERFACE | RH_INTERFACE:
- *(__u8 *)data = 0x00;
- OK(1);
- case RH_SET_INTERFACE | RH_INTERFACE:
- OK(0);
- default:
- stat = -EPIPE;
- }
-
- spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
-
- urb->actual_length = len;
-
- return stat;
-}
-
-/*
- * MUST be called with urb->lock acquired
- */
-static int rh_unlink_urb(struct urb *urb)
-{
- if (xhci->rh.urb == urb) {
- urb->status = -ENOENT;
- xhci->rh.send = 0;
- xhci->rh.urb = NULL;
- del_timer(&xhci->rh.rh_int_timer);
- }
- return 0;
-}
-
-/******************************************************************************
- * CONTROL PLANE FUNCTIONALITY
- */
-
-/**
- * alloc_xhci - initialise a new virtual root hub for a new USB device channel
- */
-static int alloc_xhci(void)
-{
- int retval;
- struct usb_bus *bus;
-
- retval = -EBUSY;
-
- xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
- if (!xhci) {
- err("couldn't allocate xhci structure");
- retval = -ENOMEM;
- goto err_alloc_xhci;
- }
-
- xhci->state = USBIF_STATE_CLOSED;
-
- spin_lock_init(&xhci->urb_list_lock);
- INIT_LIST_HEAD(&xhci->urb_list);
-
- spin_lock_init(&xhci->complete_list_lock);
- INIT_LIST_HEAD(&xhci->complete_list);
-
- spin_lock_init(&xhci->frame_list_lock);
-
- bus = usb_alloc_bus(&xhci_device_operations);
-
- if (!bus) {
- err("unable to allocate bus");
- goto err_alloc_bus;
- }
-
- xhci->bus = bus;
- bus->bus_name = "XHCI";
- bus->hcpriv = xhci;
-
- usb_register_bus(xhci->bus);
-
- /* Initialize the root hub */
-
- xhci->rh.numports = 0;
-
- xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
- if (!xhci->rh.dev) {
- err("unable to allocate root hub");
- goto err_alloc_root_hub;
- }
-
- xhci->state = 0;
-
- return 0;
-
-/*
- * error exits:
- */
-err_alloc_root_hub:
- usb_deregister_bus(xhci->bus);
- usb_free_bus(xhci->bus);
- xhci->bus = NULL;
-
-err_alloc_bus:
- kfree(xhci);
-
-err_alloc_xhci:
- return retval;
-}
-
-/**
- * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
- */
-static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
-{
- ctrl_msg_t cmsg;
- usbif_fe_interface_connect_t up;
- long rc;
- usbif_sring_t *sring;
-
- switch ( status->status )
- {
- case USBIF_INTERFACE_STATUS_DESTROYED:
- printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
- xhci->state);
- break;
-
- case USBIF_INTERFACE_STATUS_DISCONNECTED:
- if ( xhci->state != USBIF_STATE_CLOSED )
- {
- printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
- " in state %d\n", xhci->state);
- break;
- /* Not bothering to do recovery here for now. Keep things
- * simple. */
- }
-
- /* Move from CLOSED to DISCONNECTED state. */
- sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&xhci->usb_ring, sring);
- xhci->state = USBIF_STATE_DISCONNECTED;
-
- /* Construct an interface-CONNECT message for the domain controller. */
- cmsg.type = CMSG_USBIF_FE;
- cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT;
- cmsg.length = sizeof(usbif_fe_interface_connect_t);
- up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
- memcpy(cmsg.msg, &up, sizeof(up));
-
- /* Tell the controller to bring up the interface. */
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
- break;
-
- case USBIF_INTERFACE_STATUS_CONNECTED:
- if ( xhci->state == USBIF_STATE_CLOSED )
- {
- printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
- " in state %d\n", xhci->state);
- break;
- }
-
- xhci->evtchn = status->evtchn;
- xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
- xhci->bandwidth = status->bandwidth;
- xhci->rh.numports = status->num_ports;
-
- xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL);
- memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
-
- usb_connect(xhci->rh.dev);
-
- if (usb_new_device(xhci->rh.dev) != 0) {
- err("unable to start root hub");
- }
-
- /* Allocate the appropriate USB bandwidth here... Need to
- * somehow know what the total available is thought to be so we
- * can calculate the reservation correctly. */
- usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
- 1000 - xhci->bandwidth, 0);
-
- if ( (rc = request_irq(xhci->irq, xhci_interrupt,
- SA_SAMPLE_RANDOM, "usbif", xhci)) )
- printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
-
- DPRINTK(KERN_INFO __FILE__
- ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
- xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
- xhci->evtchn, xhci->irq);
-
- xhci->state = USBIF_STATE_CONNECTED;
-
- break;
-
- default:
- printk(KERN_WARNING "Status change to unknown value %d\n",
- status->status);
- break;
- }
-}
-
-/**
- * usbif_ctrlif_rx - demux control messages by subtype
- */
-static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
- if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) )
- goto parse_error;
- usbif_status_change((usbif_fe_interface_status_changed_t *)
- &msg->msg[0]);
- break;
-
- /* New interface...? */
- default:
- goto parse_error;
- }
-
- ctrl_if_send_response(msg);
- return;
-
- parse_error:
- msg->length = 0;
- ctrl_if_send_response(msg);
-}
-
-
-static int __init xhci_hcd_init(void)
-{
- int retval = -ENOMEM, i;
- usbif_fe_interface_status_changed_t st;
- control_msg_t cmsg;
-
- if ( (xen_start_info.flags & SIF_INITDOMAIN)
- || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
- return 0;
-
- info(DRIVER_DESC " " DRIVER_VERSION);
-
- if (debug) {
- errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
- if (!errbuf)
- goto errbuf_failed;
- }
-
- xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
- sizeof(struct urb_priv), 0, 0, NULL, NULL);
- if (!xhci_up_cachep)
- goto up_failed;
-
- /* Let the domain controller know we're here. For now we wait until
- * connection, as for the block and net drivers. This is only strictly
- * necessary if we're going to boot off a USB device. */
- printk(KERN_INFO "Initialising Xen virtual USB hub\n");
-
- (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
-
- alloc_xhci();
-
- /* Send a driver-UP notification to the domain controller. */
- cmsg.type = CMSG_USBIF_FE;
- cmsg.subtype = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
- cmsg.length = sizeof(usbif_fe_driver_status_changed_t);
- st.status = USBIF_DRIVER_STATUS_UP;
- memcpy(cmsg.msg, &st, sizeof(st));
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-
- /*
- * We should read 'nr_interfaces' from response message and wait
- * for notifications before proceeding. For now we assume that we
- * will be notified of exactly one interface.
- */
- for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- if (xhci->state != USBIF_STATE_CONNECTED)
- printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
-
- return 0;
-
-up_failed:
- if (errbuf)
- kfree(errbuf);
-
-errbuf_failed:
- return retval;
-}
-
-module_init(xhci_hcd_init);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-
+++ /dev/null
-/******************************************************************************
- * xhci.h
- *
- * Private definitions for the Xen Virtual USB Controller. Based on
- * drivers/usb/host/uhci.h from Linux. Copyright for the imported content is
- * retained by the original authors.
- *
- * Modifications are:
- * Copyright (C) 2004 Intel Research Cambridge
- * Copyright (C) 2004, 2005 Mark Williamson
- */
-
-#ifndef __LINUX_XHCI_H
-#define __LINUX_XHCI_H
-
-#include <linux/list.h>
-#include <linux/usb.h>
-#include <asm-xen/xen-public/io/usbif.h>
-#include <linux/spinlock.h>
-
-/* xhci_port_t - current known state of a virtual hub ports */
-typedef struct {
- unsigned int cs :1; /* Connection status. do we really need this /and/ ccs? */
- unsigned int cs_chg :1; /* Connection status change. */
- unsigned int pe :1; /* Port enable. */
- unsigned int pe_chg :1; /* Port enable change. */
- unsigned int ccs :1; /* Current connect status. */
- unsigned int susp :1; /* Suspended. */
- unsigned int lsda :1; /* Low speed device attached. */
- unsigned int pr :1; /* Port reset. */
-} xhci_port_t;
-
-/* struct virt_root_hub - state related to the virtual root hub */
-struct virt_root_hub {
- struct usb_device *dev;
- int devnum; /* Address of Root Hub endpoint */
- struct urb *urb;
- void *int_addr;
- int send;
- int interval;
- int numports;
- int c_p_r[8];
- struct timer_list rh_int_timer;
- spinlock_t port_state_lock;
- xhci_port_t *ports;
-};
-
-/* struct xhci - contains the state associated with a single USB interface */
-struct xhci {
-
-#ifdef CONFIG_PROC_FS
- /* procfs */
- int num;
- struct proc_dir_entry *proc_entry;
-#endif
-
- int evtchn; /* Interdom channel to backend */
- int irq; /* Bound to evtchn */
- enum { USBIF_STATE_CONNECTED = 2,
- USBIF_STATE_DISCONNECTED = 1,
- USBIF_STATE_CLOSED = 0
- } state; /* State of this USB interface */
- unsigned long bandwidth;
-
- struct usb_bus *bus;
-
- /* Main list of URB's currently controlled by this HC */
- spinlock_t urb_list_lock;
- struct list_head urb_list; /* P: xhci->urb_list_lock */
-
- /* List of URB's awaiting completion callback */
- spinlock_t complete_list_lock;
- struct list_head complete_list; /* P: xhci->complete_list_lock */
-
- struct virt_root_hub rh; /* private data of the virtual root hub */
-
- usbif_front_ring_t usb_ring;
-
- int awaiting_reset;
-};
-
-/* per-URB private data structure for the host controller */
-struct urb_priv {
- struct urb *urb;
- usbif_iso_t *schedule;
- struct usb_device *dev;
-
- int in_progress : 1; /* QH was queued (not linked in) */
- int short_control_packet : 1; /* If we get a short packet during */
- /* a control transfer, retrigger */
- /* the status phase */
-
- int status; /* Final status */
-
- unsigned long inserttime; /* In jiffies */
-
- struct list_head complete_list; /* P: xhci->complete_list_lock */
-};
-
-/*
- * Locking in xhci.c
- *
- * spinlocks are used extensively to protect the many lists and data
- * structures we have. It's not that pretty, but it's necessary. We
- * need to be done with all of the locks (except complete_list_lock) when
- * we call urb->complete. I've tried to make it simple enough so I don't
- * have to spend hours racking my brain trying to figure out if the
- * locking is safe.
- *
- * Here's the safe locking order to prevent deadlocks:
- *
- * #1 xhci->urb_list_lock
- * #2 urb->lock
- * #3 xhci->urb_remove_list_lock
- * #4 xhci->complete_list_lock
- *
- * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
- * at the lowest level FIRST and NEVER grab locks at the same level at the
- * same time.
- *
- * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
- */
-
-/* -------------------------------------------------------------------------
- Virtual Root HUB
- ------------------------------------------------------------------------- */
-/* destination of request */
-#define RH_DEVICE 0x00
-#define RH_INTERFACE 0x01
-#define RH_ENDPOINT 0x02
-#define RH_OTHER 0x03
-
-#define RH_CLASS 0x20
-#define RH_VENDOR 0x40
-
-/* Requests: bRequest << 8 | bmRequestType */
-#define RH_GET_STATUS 0x0080
-#define RH_CLEAR_FEATURE 0x0100
-#define RH_SET_FEATURE 0x0300
-#define RH_SET_ADDRESS 0x0500
-#define RH_GET_DESCRIPTOR 0x0680
-#define RH_SET_DESCRIPTOR 0x0700
-#define RH_GET_CONFIGURATION 0x0880
-#define RH_SET_CONFIGURATION 0x0900
-#define RH_GET_STATE 0x0280
-#define RH_GET_INTERFACE 0x0A80
-#define RH_SET_INTERFACE 0x0B00
-#define RH_SYNC_FRAME 0x0C80
-/* Our Vendor Specific Request */
-#define RH_SET_EP 0x2000
-
-/* Hub port features */
-#define RH_PORT_CONNECTION 0x00
-#define RH_PORT_ENABLE 0x01
-#define RH_PORT_SUSPEND 0x02
-#define RH_PORT_OVER_CURRENT 0x03
-#define RH_PORT_RESET 0x04
-#define RH_PORT_POWER 0x08
-#define RH_PORT_LOW_SPEED 0x09
-#define RH_C_PORT_CONNECTION 0x10
-#define RH_C_PORT_ENABLE 0x11
-#define RH_C_PORT_SUSPEND 0x12
-#define RH_C_PORT_OVER_CURRENT 0x13
-#define RH_C_PORT_RESET 0x14
-
-/* Hub features */
-#define RH_C_HUB_LOCAL_POWER 0x00
-#define RH_C_HUB_OVER_CURRENT 0x01
-#define RH_DEVICE_REMOTE_WAKEUP 0x00
-#define RH_ENDPOINT_STALL 0x01
-
-/* Our Vendor Specific feature */
-#define RH_REMOVE_EP 0x00
-
-#define RH_ACK 0x01
-#define RH_REQ_ERR -1
-#define RH_NACK 0x00
-
-#endif
-
+++ /dev/null
-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
- * which needs to alter them. */
-
-static inline void smpboot_clear_io_apic_irqs(void)
-{
-#if 1
- printk("smpboot_clear_io_apic_irqs\n");
-#else
- io_apic_irqs = 0;
-#endif
-}
-
-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
-{
-#if 1
- printk("smpboot_setup_warm_reset_vector\n");
-#else
- CMOS_WRITE(0xa, 0xf);
- local_flush_tlb();
- Dprintk("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
- Dprintk("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
- Dprintk("3.\n");
-#endif
-}
-
-static inline void smpboot_restore_warm_reset_vector(void)
-{
- /*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
- CMOS_WRITE(0, 0xf);
-
- *((volatile long *) phys_to_virt(0x467)) = 0;
-}
-
-static inline void smpboot_setup_io_apic(void)
-{
-#if 1
- printk("smpboot_setup_io_apic\n");
-#else
- /*
- * Here we can be sure that there is an IO-APIC in the system. Let's
- * go and set it up:
- */
- if (!skip_ioapic_setup && nr_ioapics)
- setup_IO_APIC();
-#endif
-}
-
-
-#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1)
+++ /dev/null
-#ifndef __ASM_SPINLOCK_H
-#define __ASM_SPINLOCK_H
-
-#include <asm/atomic.h>
-#include <asm/rwlock.h>
-#include <asm/page.h>
-#include <linux/config.h>
-#include <linux/compiler.h>
-
-asmlinkage int printk(const char * fmt, ...)
- __attribute__ ((format (printf, 1, 2)));
-
-/*
- * Your basic SMP spinlocks, allowing only a single CPU anywhere
- */
-
-typedef struct {
- volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned magic;
-#endif
-} spinlock_t;
-
-#define SPINLOCK_MAGIC 0xdead4ead
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
-#else
-#define SPINLOCK_MAGIC_INIT /* */
-#endif
-
-#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
-
-#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
-
-/*
- * Simple spin lock operations. There are two variants, one clears IRQ's
- * on the local processor, one does not.
- *
- * We make no fairness assumptions. They have a cost.
- */
-
-#define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0)
-#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
-
-#define spin_lock_string \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
- "jns 3f\n" \
- "2:\t" \
- "rep;nop\n\t" \
- "cmpb $0,%0\n\t" \
- "jle 2b\n\t" \
- "jmp 1b\n" \
- "3:\n\t"
-
-#define spin_lock_string_flags \
- "\n1:\t" \
- "lock ; decb %0\n\t" \
- "jns 4f\n\t" \
- "2:\t" \
- "testl $0x200, %1\n\t" \
- "jz 3f\n\t" \
- "#sti\n\t" \
- "3:\t" \
- "rep;nop\n\t" \
- "cmpb $0, %0\n\t" \
- "jle 3b\n\t" \
- "#cli\n\t" \
- "jmp 1b\n" \
- "4:\n\t"
-
-/*
- * This works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE)
- * (PPro errata 66, 92)
- */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-#define spin_unlock_string \
- "movb $1,%0" \
- :"=m" (lock->lock) : : "memory"
-
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
- BUG_ON(lock->magic != SPINLOCK_MAGIC);
- BUG_ON(!spin_is_locked(lock));
-#endif
- __asm__ __volatile__(
- spin_unlock_string
- );
-}
-
-#else
-
-#define spin_unlock_string \
- "xchgb %b0, %1" \
- :"=q" (oldval), "=m" (lock->lock) \
- :"0" (oldval) : "memory"
-
-static inline void _raw_spin_unlock(spinlock_t *lock)
-{
- char oldval = 1;
-#ifdef CONFIG_DEBUG_SPINLOCK
- BUG_ON(lock->magic != SPINLOCK_MAGIC);
- BUG_ON(!spin_is_locked(lock));
-#endif
- __asm__ __volatile__(
- spin_unlock_string
- );
-}
-
-#endif
-
-static inline int _raw_spin_trylock(spinlock_t *lock)
-{
- char oldval;
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->lock)
- :"0" (0) : "memory");
- return oldval > 0;
-}
-
-static inline void _raw_spin_lock(spinlock_t *lock)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
- if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
- printk("eip: %p\n", __builtin_return_address(0));
- BUG();
- }
-#endif
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->lock) : : "memory");
-}
-
-static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
- if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
- printk("eip: %p\n", __builtin_return_address(0));
- BUG();
- }
-#endif
- __asm__ __volatile__(
- spin_lock_string_flags
- :"=m" (lock->lock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
- volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned magic;
-#endif
-} rwlock_t;
-
-#define RWLOCK_MAGIC 0xdeaf1eed
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
-#else
-#define RWLOCK_MAGIC_INIT /* */
-#endif
-
-#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
-
-#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
-
-#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
-
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- *
- * The inline assembly is non-obvious. Think about it.
- *
- * Changed to use the same technique as rw semaphores. See
- * semaphore.h for details. -ben
- */
-/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
-
-static inline void _raw_read_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
- BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
- __build_read_lock(rw, "__read_lock_failed");
-}
-
-static inline void _raw_write_lock(rwlock_t *rw)
-{
-#ifdef CONFIG_DEBUG_SPINLOCK
- BUG_ON(rw->magic != RWLOCK_MAGIC);
-#endif
- __build_write_lock(rw, "__write_lock_failed");
-}
-
-#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
-
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
-
-#endif /* __ASM_SPINLOCK_H */
--- /dev/null
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the lowest level x86-specific interrupt
+ * entry, irq-stacks and irq statistics code. All the remaining
+ * irq logic is done by the generic kernel/irq/ code and
+ * by the x86-specific irq controller code. (e.g. i8259.c and
+ * io_apic.c.)
+ */
+
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#ifndef CONFIG_X86_LOCAL_APIC
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves.
+ */
+void ack_bad_irq(unsigned int irq)
+{
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+#endif
+
+#ifdef CONFIG_4KSTACKS
+/*
+ * per-CPU IRQ handling contexts (thread information and stack)
+ */
+union irq_ctx {
+ struct thread_info tinfo;
+ u32 stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS];
+static union irq_ctx *softirq_ctx[NR_CPUS];
+#endif
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+fastcall unsigned int do_IRQ(struct pt_regs *regs)
+{
+ /* high bits used in ret_from_ code */
+ int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
+#ifdef CONFIG_4KSTACKS
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+#endif
+
+ irq_enter();
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+ {
+ long esp;
+
+ __asm__ __volatile__("andl %%esp,%0" :
+ "=r" (esp) : "0" (THREAD_SIZE - 1));
+ if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ esp - sizeof(struct thread_info));
+ dump_stack();
+ }
+ }
+#endif
+
+#ifdef CONFIG_4KSTACKS
+
+ curctx = (union irq_ctx *) current_thread_info();
+ irqctx = hardirq_ctx[smp_processor_id()];
+
+ /*
+ * this is where we switch to the IRQ stack. However, if we are
+ * already using the IRQ stack (because we interrupted a hardirq
+ * handler) we can't do that and just have to keep using the
+ * current stack (which is the irq stack already after all)
+ */
+ if (curctx != irqctx) {
+ int arg1, arg2, ebx;
+
+ /* build the stack frame on the IRQ stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+ irqctx->tinfo.task = curctx->tinfo.task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_IRQ \n"
+ " movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+ : "0" (irq), "1" (regs), "2" (isp)
+ : "memory", "cc", "ecx"
+ );
+ } else
+#endif
+ __do_IRQ(irq, regs);
+
+ irq_exit();
+
+ return 1;
+}
+
+#ifdef CONFIG_4KSTACKS
+
+/*
+ * These should really be __section__(".bss.page_aligned") as well, but
+ * gcc's 3.0 and earlier don't handle that correctly.
+ */
+static char softirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE]
+ __attribute__((__aligned__(THREAD_SIZE)));
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+ union irq_ctx *irqctx;
+
+ if (hardirq_ctx[cpu])
+ return;
+
+ irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ hardirq_ctx[cpu] = irqctx;
+
+ irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ softirq_ctx[cpu] = irqctx;
+
+ printk("CPU %u irqstacks, hard=%p soft=%p\n",
+ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curctx;
+ union irq_ctx *irqctx;
+ u32 *isp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curctx = current_thread_info();
+ irqctx = softirq_ctx[smp_processor_id()];
+ irqctx->tinfo.task = curctx->task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
+
+ /* build the stack frame on the softirq stack */
+ isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+
+ asm volatile(
+ " xchgl %%ebx,%%esp \n"
+ " call __do_softirq \n"
+ " movl %%ebx,%%esp \n"
+ : "=b"(isp)
+ : "0"(isp)
+ : "memory", "cc", "edx", "ecx", "eax"
+ );
+ }
+
+ local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(do_softirq);
+#endif
+
+/*
+ * Interrupt statistics:
+ */
+
+atomic_t irq_err_count;
+
+/*
+ * /proc/interrupts printing:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (i == 0) {
+ seq_printf(p, " ");
+ for (j=0; j<NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p, '\n');
+ }
+
+ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+ goto skip;
+ seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+#endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+
+ seq_putc(p, '\n');
+skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ } else if (i == NR_IRQS) {
+ seq_printf(p, "NMI: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ", nmi_count(j));
+ seq_putc(p, '\n');
+#ifdef CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for (j = 0; j < NR_CPUS; j++)
+ if (cpu_online(j))
+ seq_printf(p, "%10u ",
+ irq_stat[j].apic_timer_irqs);
+ seq_putc(p, '\n');
+#endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#if defined(CONFIG_X86_IO_APIC)
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Intel SMP support routines.
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#if 0
+#include <mach_apic.h>
+#endif
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
+/*
+ * Some notes on x86 processor bugs affecting SMP operation:
+ *
+ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ * The Linux implications for SMP are handled as follows:
+ *
+ * Pentium III / [Xeon]
+ * None of the E1AP-E3AP errata are visible to the user.
+ *
+ * E1AP. see PII A1AP
+ * E2AP. see PII A2AP
+ * E3AP. see PII A3AP
+ *
+ * Pentium II / [Xeon]
+ * None of the A1AP-A3AP errata are visible to the user.
+ *
+ * A1AP. see PPro 1AP
+ * A2AP. see PPro 2AP
+ * A3AP. see PPro 7AP
+ *
+ * Pentium Pro
+ * None of 1AP-9AP errata are visible to the normal user,
+ * except occasional delivery of 'spurious interrupt' as trap #15.
+ * This is very rare and a non-problem.
+ *
+ * 1AP. Linux maps APIC as non-cacheable
+ * 2AP. worked around in hardware
+ * 3AP. fixed in C0 and above steppings microcode update.
+ * Linux does not use excessive STARTUP_IPIs.
+ * 4AP. worked around in hardware
+ * 5AP. symmetric IO mode (normal Linux operation) not affected.
+ * 'noapic' mode has vector 0xf filled out properly.
+ * 6AP. 'noapic' mode might be affected - fixed in later steppings
+ * 7AP. We do not assume writes to the LVT deassering IRQs
+ * 8AP. We do not enable low power mode (deep sleep) during MP bootup
+ * 9AP. We do not use mixed mode
+ *
+ * Pentium
+ * There is a marginal case where REP MOVS on 100MHz SMP
+ * machines with B stepping processors can fail. XXX should provide
+ * an L1cache=Writethrough or L1cache=off option.
+ *
+ * B stepping CPUs may hang. There are hardware work arounds
+ * for this. We warn about it in case your board doesn't have the work
+ * arounds. Basically thats so I can tell anyone with a B stepping
+ * CPU and SMP problems "tough".
+ *
+ * Specific items [From Pentium Processor Specification Update]
+ *
+ * 1AP. Linux doesn't use remote read
+ * 2AP. Linux doesn't trust APIC errors
+ * 3AP. We work around this
+ * 4AP. Linux never generated 3 interrupts of the same priority
+ * to cause a lost local interrupt.
+ * 5AP. Remote read is never used
+ * 6AP. not affected - worked around in hardware
+ * 7AP. not affected - worked around in hardware
+ * 8AP. worked around in hardware - we get explicit CS errors if not
+ * 9AP. only 'noapic' mode affected. Might generate spurious
+ * interrupts, we log only the first one and count the
+ * rest silently.
+ * 10AP. not affected - worked around in hardware
+ * 11AP. Linux reads the APIC between writes to avoid this, as per
+ * the documentation. Make sure you preserve this as it affects
+ * the C stepping chips too.
+ * 12AP. not affected - worked around in hardware
+ * 13AP. not affected - worked around in hardware
+ * 14AP. we always deassert INIT during bootup
+ * 15AP. not affected - worked around in hardware
+ * 16AP. not affected - worked around in hardware
+ * 17AP. not affected - worked around in hardware
+ * 18AP. not affected - worked around in hardware
+ * 19AP. not affected - worked around in BIOS
+ *
+ * If this sounds worrying believe me these bugs are either ___RARE___,
+ * or are signal timing bugs worked around in hardware and there's
+ * about nothing of note with C stepping upwards.
+ */
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+ return SET_APIC_DEST_FIELD(mask);
+}
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+ unsigned int evtchn;
+
+ evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+ // printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
+ if (evtchn) {
+#if 0
+ shared_info_t *s = HYPERVISOR_shared_info;
+ while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
+ synch_test_bit(evtchn, &s->evtchn_mask[0]))
+ ;
+#endif
+ notify_via_evtchn(evtchn);
+ } else
+ printk("send_IPI to unbound port %d/%d",
+ cpu, vector);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ int cpu;
+
+ switch (shortcut) {
+ case APIC_DEST_SELF:
+ __send_IPI_one(smp_processor_id(), vector);
+ break;
+ case APIC_DEST_ALLBUT:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ default:
+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+ vector);
+ break;
+ }
+}
+
+void fastcall send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned int cpu;
+
+ local_irq_save(flags);
+
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, mask)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+
+ send_IPI_mask_bitmask(mask, vector);
+}
+
+#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ *
+ * Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+#define FLUSH_ALL 0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+static inline void leave_mm (unsigned long cpu)
+{
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+ BUG();
+ cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+ load_cr3(swapper_pg_dir);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ * Stop ipi delivery for the old mm. This is not synchronized with
+ * the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * for the wrong mm, and in the worst case we perform a superflous
+ * tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ * was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ * Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ * cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ * flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * Atomically set the bit [other cpus will start sending flush ipis],
+ * and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ * runs in kernel space, the cpu could load tlb entries for user space
+ * pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ unsigned long cpu;
+
+ cpu = get_cpu();
+
+ if (!cpu_isset(cpu, flush_cpumask))
+ goto out;
+ /*
+ * This was a BUG() but until someone can quote me the
+ * line from the intel manual that guarantees an IPI to
+ * multiple CPUs is retried _only_ on the erroring CPUs
+ * its staying as a return
+ *
+ * BUG();
+ */
+
+ if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+ if (flush_va == FLUSH_ALL)
+ local_flush_tlb();
+ else
+ __flush_tlb_one(flush_va);
+ } else
+ leave_mm(cpu);
+ }
+ smp_mb__before_clear_bit();
+ cpu_clear(cpu, flush_cpumask);
+ smp_mb__after_clear_bit();
+out:
+ put_cpu_no_resched();
+
+ return IRQ_HANDLED;
+}
+
+static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+ unsigned long va)
+{
+ cpumask_t tmp;
+ /*
+ * A couple of (to be removed) sanity checks:
+ *
+ * - we do not send IPIs to not-yet booted CPUs.
+ * - current CPU must not be in mask
+ * - mask must exist :)
+ */
+ BUG_ON(cpus_empty(cpumask));
+
+ cpus_and(tmp, cpumask, cpu_online_map);
+ BUG_ON(!cpus_equal(cpumask, tmp));
+ BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+ BUG_ON(!mm);
+
+ /*
+ * i'm not happy about this global shared spinlock in the
+ * MM hot path, but we'll see how contended it is.
+ * Temporarily this turns IRQs off, so that lockups are
+ * detected by the NMI watchdog.
+ */
+ spin_lock(&tlbstate_lock);
+
+ flush_mm = mm;
+ flush_va = va;
+#if NR_CPUS <= BITS_PER_LONG
+ atomic_set_mask(cpumask, &flush_cpumask);
+#else
+ {
+ int k;
+ unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
+ unsigned long *cpu_mask = (unsigned long *)&cpumask;
+ for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
+ atomic_set_mask(cpu_mask[k], &flush_mask[k]);
+ }
+#endif
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+ while (!cpus_empty(flush_cpumask))
+ /* nothing. lockup detection does not belong here */
+ mb();
+
+ flush_mm = NULL;
+ flush_va = 0;
+ spin_unlock(&tlbstate_lock);
+}
+
+void flush_tlb_current_task(void)
+{
+ struct mm_struct *mm = current->mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ local_flush_tlb();
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+ preempt_enable();
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if (current->mm)
+ local_flush_tlb();
+ else
+ leave_mm(smp_processor_id());
+ }
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+
+ preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ cpumask_t cpu_mask;
+
+ preempt_disable();
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+
+ if (current->active_mm == mm) {
+ if(current->mm)
+ __flush_tlb_one(va);
+ else
+ leave_mm(smp_processor_id());
+ }
+
+ if (!cpus_empty(cpu_mask))
+ flush_tlb_others(cpu_mask, mm, va);
+
+ preempt_enable();
+}
+
+static void do_flush_tlb_all(void* info)
+{
+ unsigned long cpu = smp_processor_id();
+
+ __flush_tlb_all();
+ if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+ leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+ on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+{
+ struct call_data_struct data;
+ int cpus = num_online_cpus()-1;
+
+ if (!cpus)
+ return 0;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ spin_lock(&call_lock);
+ call_data = &data;
+ mb();
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ barrier();
+
+ if (wait)
+ while (atomic_read(&data.finished) != cpus)
+ barrier();
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ local_irq_disable();
+#if 1
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#else
+ disable_local_APIC();
+#endif
+ if (cpu_data[smp_processor_id()].hlt_works_ok)
+ for(;;) __asm__("hlt");
+ for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 1, 0);
+
+ local_irq_disable();
+#if 1
+ xxprint("smp_send_stop disable_local_APIC\n");
+#else
+ disable_local_APIC();
+#endif
+ local_irq_enable();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+
+ return IRQ_HANDLED;
+}
+
+#include <linux/kallsyms.h>
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+ int wait = call_data->wait;
+
+ /*
+ * Notify initiating CPU that I've grabbed the data and am
+ * about to execute the function
+ */
+ mb();
+ atomic_inc(&call_data->started);
+ /*
+ * At this point the info structure may be out of scope unless wait==1
+ */
+ irq_enter();
+ (*func)(info);
+ irq_exit();
+
+ if (wait) {
+ mb();
+ atomic_inc(&call_data->finished);
+ }
+
+ return IRQ_HANDLED;
+}
+
--- /dev/null
+/*
+ * x86 SMP booting functions
+ *
+ * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ * Much of the core SMP work is based on previous work by Thomas Radke, to
+ * whom a great many thanks are extended.
+ *
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
+ * Original development of Linux SMP code supported by Caldera.
+ *
+ * This code is released under the GNU General Public License version 2 or
+ * later.
+ *
+ * Fixes
+ * Felix Koop : NR_CPUS used properly
+ * Jose Renau : Handle single CPU case.
+ * Alan Cox : By repeated request 8) - Total BogoMIPS report.
+ * Greg Wright : Fix for kernel stacks panic.
+ * Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
+ * Ingo Molnar : various cleanups and rewrites
+ * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs
+ * Martin J. Bligh : Added support for multi-quad systems
+ * Dave Jones : Report invalid combinations of Athlon CPUs.
+* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+#include <asm/arch_hooks.h>
+
+#if 1
+#define Dprintk(args...)
+#else
+#include <mach_apic.h>
+#endif
+#include <mach_wakecpu.h>
+#include <smpboot_hooks.h>
+
+/* Set if we find a B stepping CPU */
+static int __initdata smp_b_stepping;
+
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
+EXPORT_SYMBOL(phys_proc_id);
+
+/* bitmap of online cpus */
+cpumask_t cpu_online_map;
+
+cpumask_t cpu_callin_map;
+cpumask_t cpu_callout_map;
+static cpumask_t smp_commenced_mask;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+u8 x86_cpu_to_apicid[NR_CPUS] =
+ { [0 ... NR_CPUS-1] = 0xff };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+#if 0
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
+static unsigned char *trampoline_base;
+static int trampoline_exec;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
+}
+#endif
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+#if 1
+ int cpu;
+
+ for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ cpu_gdt_descr[cpu].address = (unsigned long)
+ alloc_bootmem_low_pages(PAGE_SIZE);
+ /* XXX free unused pages later */
+ }
+#else
+ trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ if (__pa(trampoline_base) >= 0x9F000)
+ BUG();
+ /*
+ * Make the SMP trampoline executable:
+ */
+ trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
+#endif
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+static void __init smp_store_cpu_info(int id)
+{
+ struct cpuinfo_x86 *c = cpu_data + id;
+
+ *c = boot_cpu_data;
+ if (id!=0)
+ identify_cpu(c);
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 5 &&
+ c->x86_mask >= 1 && c->x86_mask <= 4 &&
+ c->x86_model <= 3)
+ /*
+ * Remember we have B step Pentia with bugs
+ */
+ smp_b_stepping = 1;
+
+ /*
+ * Certain Athlons might work (for various values of 'work') in SMP
+ * but they are not certified as MP capable.
+ */
+ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+
+ /* Athlon 660/661 is valid. */
+ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
+ goto valid_k7;
+
+ /* Duron 670 is valid */
+ if ((c->x86_model==7) && (c->x86_mask==0))
+ goto valid_k7;
+
+ /*
+ * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
+ * It's worth noting that the A5 stepping (662) of some Athlon XP's
+ * have the MP bit set.
+ * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
+ */
+ if (((c->x86_model==6) && (c->x86_mask>=2)) ||
+ ((c->x86_model==7) && (c->x86_mask>=1)) ||
+ (c->x86_model> 7))
+ if (cpu_has_mp)
+ goto valid_k7;
+
+ /* If we get here, it's not a certified SMP capable AMD system. */
+ tainted |= TAINT_UNSAFE_SMP;
+ }
+
+valid_k7:
+ ;
+}
+
+#if 0
+/*
+ * TSC synchronization.
+ *
+ * We first check whether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+static void __init synchronize_tsc_bp (void)
+{
+ int i;
+ unsigned long long t0;
+ unsigned long long sum, avg;
+ long long delta;
+ unsigned long one_usec;
+ int buggy = 0;
+
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
+
+ /* convert from kcyc/sec to cyc/usec */
+ one_usec = cpu_khz / 1000;
+
+ atomic_set(&tsc_start_flag, 1);
+ wmb();
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronized and
+ * the BP and APs set their cycle counters to zero all at
+ * once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+ for (i = 0; i < NR_LOOPS; i++) {
+ /*
+ * all APs synchronize but they loop on '== num_cpus'
+ */
+ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_stop, 0);
+ wmb();
+ /*
+ * this lets the APs save their current TSC:
+ */
+ atomic_inc(&tsc_count_start);
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ /*
+ * We clear the TSC in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ /*
+ * Wait for all APs to leave the synchronization point:
+ */
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
+ mb();
+ atomic_set(&tsc_count_start, 0);
+ wmb();
+ atomic_inc(&tsc_count_stop);
+ }
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_isset(i, cpu_callout_map)) {
+ t0 = tsc_values[i];
+ sum += t0;
+ }
+ }
+ avg = sum;
+ do_div(avg, num_booting_cpus());
+
+ sum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ delta = tsc_values[i] - avg;
+ if (delta < 0)
+ delta = -delta;
+ /*
+ * We report bigger than 2 microseconds clock differences.
+ */
+ if (delta > 2*one_usec) {
+ long realdelta;
+ if (!buggy) {
+ buggy = 1;
+ printk("\n");
+ }
+ realdelta = delta;
+ do_div(realdelta, one_usec);
+ if (tsc_values[i] < avg)
+ realdelta = -realdelta;
+
+ printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+ }
+
+ sum += delta;
+ }
+ if (!buggy)
+ printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+ int i;
+
+ /*
+ * Not every cpu is online at the time
+ * this gets called, so we first wait for the BP to
+ * finish SMP initialization:
+ */
+ while (!atomic_read(&tsc_start_flag)) mb();
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&tsc_count_start);
+ while (atomic_read(&tsc_count_start) != num_booting_cpus())
+ mb();
+
+ rdtscll(tsc_values[smp_processor_id()]);
+ if (i == NR_LOOPS-1)
+ write_tsc(0, 0);
+
+ atomic_inc(&tsc_count_stop);
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ }
+}
+#undef NR_LOOPS
+#endif
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+ int cpuid, phys_id;
+ unsigned long timeout;
+
+#if 0
+ /*
+ * If waken up by an INIT in an 82489DX configuration
+ * we may get here before an INIT-deassert IPI reaches
+ * our local APIC. We have to wait for the IPI or we'll
+ * lock up on an APIC access.
+ */
+ wait_for_init_deassert(&init_deasserted);
+#endif
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ phys_id = smp_processor_id();
+ cpuid = smp_processor_id();
+ if (cpu_isset(cpuid, cpu_callin_map)) {
+ printk("huh, phys CPU#%d, CPU#%d already present??\n",
+ phys_id, cpuid);
+ BUG();
+ }
+ Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup (udelay is not yet working)
+ */
+ timeout = jiffies + 2*HZ;
+ while (time_before(jiffies, timeout)) {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (cpu_isset(cpuid, cpu_callout_map))
+ break;
+ rep_nop();
+ }
+
+ if (!time_before(jiffies, timeout)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ BUG();
+ }
+
+#if 0
+ /*
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
+ */
+
+ Dprintk("CALLIN, before setup_local_APIC().\n");
+ smp_callin_clear_local_apic();
+ setup_local_APIC();
+#endif
+ map_cpu_to_logical_apicid();
+
+ /*
+ * Get our bogomips.
+ */
+ calibrate_delay();
+ Dprintk("Stack at about %p\n",&cpuid);
+
+ /*
+ * Save our processor parameters
+ */
+ smp_store_cpu_info(cpuid);
+
+#if 0
+ disable_APIC_timer();
+#endif
+
+ /*
+ * Allow the master to continue.
+ */
+ cpu_set(cpuid, cpu_callin_map);
+
+#if 0
+ /*
+ * Synchronize the TSC with the BP
+ */
+ if (cpu_has_tsc && cpu_khz)
+ synchronize_tsc_ap();
+#endif
+}
+
+int cpucount;
+
+
+static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction local_irq_debug = {
+ local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
+ NULL, NULL
+};
+
+void local_setup_debug(void)
+{
+ (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
+}
+
+
+extern void local_setup_timer(void);
+
+/*
+ * Activate a secondary processor.
+ */
+static int __init start_secondary(void *unused)
+{
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
+ local_setup_timer();
+ local_setup_debug(); /* XXX */
+ smp_intr_init();
+ local_irq_enable();
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
+
+ /* We can take interrupts now: we're officially "up". */
+ local_irq_enable();
+
+ wmb();
+ if (0) {
+ char *msg2 = "delay2\n";
+ int timeout;
+ for (timeout = 0; timeout < 50000; timeout++) {
+ udelay(1000);
+ if (timeout == 2000) {
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
+ timeout = 0;
+ }
+ }
+ }
+ cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+ /*
+ * We don't actually need to load the full TSS,
+ * basically just the stack pointer and the eip.
+ */
+
+ asm volatile(
+ "movl %0,%%esp\n\t"
+ "jmp *%1"
+ :
+ :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+ void * esp;
+ unsigned short ss;
+} stack_start;
+
+#ifdef CONFIG_NUMA
+
+/* which logical CPUs are on which nodes */
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+ { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+/* which node each logical CPU is on */
+int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_2_node);
+
+/* set up a mapping between cpu and node. */
+static inline void map_cpu_to_node(int cpu, int node)
+{
+ printk("Mapping cpu %d to node %d\n", cpu, node);
+ cpu_set(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = node;
+}
+
+/* undo a mapping between cpu and node. */
+static inline void unmap_cpu_to_node(int cpu)
+{
+ int node;
+
+ printk("Unmapping cpu %d from all nodes\n", cpu);
+ for (node = 0; node < MAX_NUMNODES; node ++)
+ cpu_clear(cpu, node_2_cpu_mask[node]);
+ cpu_2_node[cpu] = 0;
+}
+#else /* !CONFIG_NUMA */
+
+#define map_cpu_to_node(cpu, node) ({})
+#define unmap_cpu_to_node(cpu) ({})
+
+#endif /* CONFIG_NUMA */
+
+u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+void map_cpu_to_logical_apicid(void)
+{
+ int cpu = smp_processor_id();
+ int apicid = smp_processor_id();
+
+ cpu_2_logical_apicid[cpu] = apicid;
+ map_cpu_to_node(cpu, apicid_to_node(apicid));
+}
+
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+ cpu_2_logical_apicid[cpu] = BAD_APICID;
+ unmap_cpu_to_node(cpu);
+}
+
+#if APIC_DEBUG
+static inline void __inquire_remote_apic(int apicid)
+{
+ int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+ char *names[] = { "ID", "VERSION", "SPIV" };
+ int timeout, status;
+
+ printk("Inquiring remote APIC #%d...\n", apicid);
+
+ for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ printk("... APIC #%d %s: ", apicid, names[i]);
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+ apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+ timeout = 0;
+ do {
+ udelay(100);
+ status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+ } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+ switch (status) {
+ case APIC_ICR_RR_VALID:
+ status = apic_read(APIC_RRR);
+ printk("%08x\n", status);
+ break;
+ default:
+ printk("failed\n");
+ }
+ }
+}
+#endif
+
+#if 0
+#ifdef WAKE_SECONDARY_VIA_NMI
+/*
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+static int __init
+wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int timeout, maxlvt;
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ maxlvt = get_maxlvt();
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ Dprintk("NMI sent.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_NMI */
+
+#ifdef WAKE_SECONDARY_VIA_INIT
+static int __init
+wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+{
+ unsigned long send_status = 0, accept_status = 0;
+ int maxlvt, timeout, num_starts, j;
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ }
+
+ Dprintk("Asserting INIT.\n");
+
+ /*
+ * Turn INIT on target chip
+ */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /*
+ * Send IPI
+ */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+ | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ mdelay(10);
+
+ Dprintk("Deasserting INIT.\n");
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Send IPI */
+ apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ atomic_set(&init_deasserted, 1);
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+ */
+ if (APIC_INTEGRATED(apic_version[phys_apicid]))
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+ Dprintk("#startup loops: %d.\n", num_starts);
+
+ maxlvt = get_maxlvt();
+
+ for (j = 1; j <= num_starts; j++) {
+ Dprintk("Sending STARTUP #%d.\n",j);
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ apic_read(APIC_ESR);
+ Dprintk("After apic_write.\n");
+
+ /*
+ * STARTUP IPI
+ */
+
+ /* Target chip */
+ apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+ /* Boot on the stack */
+ /* Kick the second */
+ apic_write_around(APIC_ICR, APIC_DM_STARTUP
+ | (start_eip >> 12));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(300);
+
+ Dprintk("Startup point 1.\n");
+
+ Dprintk("Waiting for send to finish...\n");
+ timeout = 0;
+ do {
+ Dprintk("+");
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
+ udelay(200);
+ /*
+ * Due to the Pentium erratum 3AP.
+ */
+ if (maxlvt > 3) {
+ apic_read_around(APIC_SPIV);
+ apic_write(APIC_ESR, 0);
+ }
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ if (send_status || accept_status)
+ break;
+ }
+ Dprintk("After Startup.\n");
+
+ if (send_status)
+ printk("APIC never delivered???\n");
+ if (accept_status)
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ return (send_status | accept_status);
+}
+#endif /* WAKE_SECONDARY_VIA_INIT */
+#endif
+
+extern cpumask_t cpu_initialized;
+
+static int __init do_boot_cpu(int apicid)
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
+ */
+{
+ struct task_struct *idle;
+ unsigned long boot_error;
+ int timeout, cpu;
+ unsigned long start_eip;
+#if 0
+ unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+ full_execution_context_t ctxt;
+ extern void startup_32_smp(void);
+ extern void hypervisor_callback(void);
+ extern void failsafe_callback(void);
+ extern int smp_trap_init(trap_info_t *);
+ int i;
+
+ cpu = ++cpucount;
+ /*
+ * We can't use kernel_thread since we must avoid to
+ * reschedule the child.
+ */
+ idle = fork_idle(cpu);
+ if (IS_ERR(idle))
+ panic("failed fork for CPU %d", cpu);
+ idle->thread.eip = (unsigned long) start_secondary;
+ /* start_eip had better be page-aligned! */
+ start_eip = (unsigned long)startup_32_smp;
+
+ /* So we see what's up */
+ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+ /* Stack for startup_32 can be just as for start_secondary onwards */
+ stack_start.esp = (void *) idle->thread.esp;
+
+ irq_ctx_init(cpu);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ atomic_set(&init_deasserted, 0);
+
+#if 1
+ if (cpu_gdt_descr[0].size > PAGE_SIZE)
+ BUG();
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+ memset((char *)cpu_gdt_descr[cpu].address +
+ FIRST_RESERVED_GDT_ENTRY * 8, 0,
+ NR_RESERVED_GDT_ENTRIES * 8);
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.cpu_ctxt.ds = __USER_DS;
+ ctxt.cpu_ctxt.es = __USER_DS;
+ ctxt.cpu_ctxt.fs = 0;
+ ctxt.cpu_ctxt.gs = 0;
+ ctxt.cpu_ctxt.ss = __KERNEL_DS;
+ ctxt.cpu_ctxt.cs = __KERNEL_CS;
+ ctxt.cpu_ctxt.eip = start_eip;
+ ctxt.cpu_ctxt.esp = idle->thread.esp;
+ ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+ /* FPU is set up to default initial state. */
+ memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+ ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ {
+ unsigned long va;
+ int f;
+
+ for (va = cpu_gdt_descr[cpu].address, f = 0;
+ va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+ va += PAGE_SIZE, f++) {
+ ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ make_page_readonly((void *)va);
+ }
+ ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+ flush_page_update_queue();
+ }
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_esp = idle->thread.esp;
+
+ /* Callback handlers. */
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+ ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+ boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+#else
+ Dprintk("Setting warm reset code and vector.\n");
+
+ store_NMI_vector(&nmi_high, &nmi_low);
+
+ smpboot_setup_warm_reset_vector(start_eip);
+
+ /*
+ * Starting actual IPI sequence...
+ */
+ boot_error = wakeup_secondary_cpu(apicid, start_eip);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ if (*((volatile unsigned char *)trampoline_base)
+ == 0xA5)
+ /* trampoline started but...? */
+ printk("Stuck ??\n");
+ else
+ /* trampoline code not run */
+ printk("Not responding.\n");
+ inquire_remote_apic(apicid);
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)trampoline_base) = 0;
+#endif
+
+ return boot_error;
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+ unsigned long cachesize; /* kB */
+ unsigned long bandwidth = 350; /* MB/s */
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_khz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1) {
+ cachesize = 16; /* Pentiums, 2x8kB cache */
+ bandwidth = 100;
+ }
+
+ cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+ }
+
+ cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
+
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_khz/1000),
+ ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+ printk("task migration cache decay timeout: %ld msecs.\n",
+ cache_decay_ticks);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+#if 0
+static int boot_cpu_logical_apicid;
+#endif
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+
+static void __init smp_boot_cpus(unsigned int max_cpus)
+{
+ int cpu, kicked;
+ unsigned long bogosum = 0;
+#if 0
+ int apicid, bit;
+#endif
+
+ /*
+ * Setup boot CPU information
+ */
+ smp_store_cpu_info(0); /* Final full version of the data */
+ printk("CPU%d: ", 0);
+ print_cpu_info(&cpu_data[0]);
+
+#if 0
+ boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+ boot_cpu_logical_apicid = logical_smp_processor_id();
+ x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+ // boot_cpu_physical_apicid = 0;
+ // boot_cpu_logical_apicid = 0;
+ x86_cpu_to_apicid[0] = 0;
+#endif
+
+ current_thread_info()->cpu = 0;
+ smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
+
+ /*
+ * If we couldn't find an SMP configuration at boot time,
+ * get out of here now!
+ */
+ if (!smp_found_config /* && !acpi_lapic) */) {
+ printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ smpboot_clear_io_apic_irqs();
+#if 0
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ if (APIC_init_uniprocessor())
+ printk(KERN_NOTICE "Local APIC not detected."
+ " Using dummy APIC emulation.\n");
+#endif
+ map_cpu_to_logical_apicid();
+ return;
+ }
+
+#if 0
+ /*
+ * Should not be necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ * Makes no sense to do this check in clustered apic mode, so skip it
+ */
+ if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+ boot_cpu_physical_apicid);
+ physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+ }
+
+ /*
+ * If we couldn't find a local APIC, then get out of here now!
+ */
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+ printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+ boot_cpu_physical_apicid);
+ printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+ smpboot_clear_io_apic_irqs();
+ phys_cpu_present_map = physid_mask_of_physid(0);
+ return;
+ }
+
+ verify_local_APIC();
+#endif
+
+ /*
+ * If SMP should be disabled, then really disable it!
+ */
+ if (!max_cpus) {
+ HYPERVISOR_shared_info->n_vcpu = 1;
+ printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ smpboot_clear_io_apic_irqs();
+#if 0
+ phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
+ return;
+ }
+
+ smp_intr_init();
+
+#if 0
+ connect_bsp_APIC();
+ setup_local_APIC();
+#endif
+ map_cpu_to_logical_apicid();
+#if 0
+
+
+ setup_portio_remap();
+
+ /*
+ * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+ *
+ * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+ * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
+ * clustered apic ID.
+ */
+ Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+ Dprintk("CPU present map: %lx\n",
+ (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
+
+ kicked = 1;
+ for (cpu = 1; kicked < NR_CPUS &&
+ cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
+ if (max_cpus <= cpucount+1)
+ continue;
+
+ if (do_boot_cpu(cpu))
+ printk("CPU #%d not responding - cannot use it.\n",
+ cpu);
+ else
+ ++kicked;
+ }
+
+#if 0
+ /*
+ * Cleanup possible dangling ends...
+ */
+ smpboot_restore_warm_reset_vector();
+#endif
+
+ /*
+ * Allow the user to impress friends.
+ */
+ Dprintk("Before bogomips.\n");
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ if (cpu_isset(cpu, cpu_callout_map))
+ bogosum += cpu_data[cpu].loops_per_jiffy;
+ printk(KERN_INFO
+ "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
+ bogosum/(500000/HZ),
+ (bogosum/(5000/HZ))%100);
+
+ Dprintk("Before bogocount - setting activated=1.\n");
+
+ if (smp_b_stepping)
+ printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+
+ /*
+ * Don't taint if we are running SMP kernel on a single non-MP
+ * approved Athlon
+ */
+ if (tainted & TAINT_UNSAFE_SMP) {
+ if (cpucount)
+ printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
+ else
+ tainted &= ~TAINT_UNSAFE_SMP;
+ }
+
+ Dprintk("Boot done.\n");
+
+ /*
+ * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * efficiently.
+ */
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpus_clear(cpu_sibling_map[cpu]);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ int siblings = 0;
+ int i;
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+
+ if (smp_num_siblings > 1) {
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_isset(i, cpu_callout_map))
+ continue;
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ siblings++;
+ cpu_set(i, cpu_sibling_map[cpu]);
+ }
+ }
+ } else {
+ siblings++;
+ cpu_set(cpu, cpu_sibling_map[cpu]);
+ }
+
+ if (siblings != smp_num_siblings)
+ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
+ }
+
+#if 0
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ check_nmi_watchdog();
+
+ smpboot_setup_io_apic();
+
+ setup_boot_APIC_clock();
+
+ /*
+ * Synchronize the TSC with the AP
+ */
+ if (cpu_has_tsc && cpucount && cpu_khz)
+ synchronize_tsc_bp();
+#endif
+}
+
+/* These are wrappers to interface to the new boot process. Someone
+ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_boot_cpus(max_cpus);
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+ cpu_set(smp_processor_id(), cpu_online_map);
+ cpu_set(smp_processor_id(), cpu_callout_map);
+}
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+ /* This only works at boot for x86. See "rewrite" above. */
+ if (cpu_isset(cpu, smp_commenced_mask)) {
+ local_irq_enable();
+ return -ENOSYS;
+ }
+
+ /* In case one didn't come up */
+ if (!cpu_isset(cpu, cpu_callin_map)) {
+ local_irq_enable();
+ return -EIO;
+ }
+
+ local_irq_enable();
+ /* Unleash the CPU! */
+ cpu_set(cpu, smp_commenced_mask);
+ while (!cpu_isset(cpu, cpu_online_map))
+ mb();
+ return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+#if 1
+#else
+#ifdef CONFIG_X86_IO_APIC
+ setup_ioapic_dest();
+#endif
+ zap_low_mappings();
+ /*
+ * Disable executability of the SMP trampoline:
+ */
+ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction reschedule_irq = {
+ smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
+ NULL, NULL
+};
+
+extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction invalidate_irq = {
+ smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
+ NULL, NULL
+};
+
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static struct irqaction call_function_irq = {
+ smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
+ "call_function", NULL, NULL
+};
+
+void __init smp_intr_init(void)
+{
+
+ (void)setup_irq(
+ bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
+ &reschedule_irq);
+ (void)setup_irq(
+ bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
+ &invalidate_irq);
+ (void)setup_irq(
+ bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
+ &call_function_irq);
+}
--- /dev/null
+/* Copyright (C) 2004, Christian Limpach */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/threads.h>
+
+unsigned int __initdata maxcpus = NR_CPUS;
+
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ printk("setup_profiling_timer\n");
+
+ return 0;
+}
--- /dev/null
+
+obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o
+
--- /dev/null
+/******************************************************************************
+ * blktap.c
+ *
+ * XenLinux virtual block-device tap.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Based on the original split block driver:
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * Note that unlike the split block driver code, this driver has been developed
+ * strictly for Linux 2.6
+ */
+
+#include "blktap.h"
+
+int __init xlblktap_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t fe_st;
+ blkif_be_driver_status_t be_st;
+
+ printk(KERN_INFO "Initialising Xen block tap device\n");
+
+ DPRINTK(" tap - Backend connection init:\n");
+
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ fe_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Frontend connection init:\n");
+
+ active_reqs_init();
+ blkif_interface_init();
+ blkdev_schedule_init();
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_be_driver_status_t);
+ be_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &be_st, sizeof(be_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Userland channel init:\n");
+
+ blktap_init();
+
+ DPRINTK("Blkif tap device initialized.\n");
+
+ return 0;
+}
+
+#if 0 /* tap doesn't handle suspend/resume */
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t st;
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+#endif
+
+__initcall(xlblktap_init);
--- /dev/null
+/*
+ * blktap.h
+ *
+ * Interfaces for the Xen block tap driver.
+ *
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ *
+ */
+
+#ifndef __BLKTAP_H__
+#define __BLKTAP_H__
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/io/blkif.h>
+#include <asm-xen/xen-public/io/ring.h>
+
+/* Used to signal to the backend that this is a tap domain. */
+#define BLKTAP_COOKIE 0xbeadfeed
+
+/* -------[ debug / pretty printing ]--------------------------------- */
+
+#if 0
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
+
+/* -------[ state descriptors ]--------------------------------------- */
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+
+/* -------[ connection tracking ]------------------------------------- */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+extern spinlock_t blkif_io_lock;
+
+typedef struct blkif_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms information. */
+ blkif_back_ring_t blk_ring;
+
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ struct blkif_st *hash_next;
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+ atomic_t refcnt;
+ struct work_struct work;
+} blkif_t;
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+void blkif_disconnect_complete(blkif_t *blkif);
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ blkif_disconnect_complete(_b); \
+ } while (0)
+
+
+/* -------[ active request tracking ]--------------------------------- */
+
+typedef struct {
+ blkif_t *blkif;
+ unsigned long id;
+ int nr_pages;
+ unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int next_free;
+} active_req_t;
+
+typedef unsigned int ACTIVE_RING_IDX;
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx);
+
+extern inline unsigned int ID_TO_IDX(unsigned long id)
+{
+ return ( id & 0x0000ffff );
+}
+
+extern inline domid_t ID_TO_DOM(unsigned long id)
+{
+ return (id >> 16);
+}
+
+void active_reqs_init(void);
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=200 */
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+#define BLKTAP_IOCTL_PRINT_IDXS 100
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+#define BLKTAP_MODE_COPY_FE 0x00000004
+#define BLKTAP_MODE_COPY_BE 0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+ (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+ (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+ );
+}
+
+
+
+/* -------[ Mappings to User VMA ]------------------------------------ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+extern struct vm_area_struct *blktap_vma;
+
+/* The following are from blkback.c and should probably be put in a
+ * header and included from there.
+ * The mmap area described here is where attached data pages eill be mapped.
+ */
+
+extern unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+
+#define RING_PAGES 3 /* Ctrl, Front, and Back */
+extern unsigned long rings_vstart;
+
+
+/* -------[ Here be globals ]----------------------------------------- */
+extern unsigned long blktap_mode;
+
+/* Connection to a single backend domain. */
+extern blkif_front_ring_t blktap_be_ring;
+extern unsigned int blktap_be_evtchn;
+extern unsigned int blktap_be_state;
+
+/* User ring status. */
+extern unsigned long blktap_ring_ok;
+
+/* -------[ ...and function prototypes. ]----------------------------- */
+
+/* init function for character device interface. */
+int blktap_init(void);
+
+/* init function for the blkif cache. */
+void __init blkif_interface_init(void);
+void __init blkdev_schedule_init(void);
+void blkif_deschedule(blkif_t *blkif);
+
+/* interfaces to the char driver, passing messages to and from apps. */
+void blktap_kick_user(void);
+
+/* user ring access functions: */
+int blktap_write_fe_ring(blkif_request_t *req);
+int blktap_write_be_ring(blkif_response_t *rsp);
+int blktap_write_ctrl_ring(ctrl_msg_t *msg);
+
+/* fe/be ring access functions: */
+int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp);
+int write_req_to_be_ring(blkif_request_t *req);
+
+/* event notification functions */
+void kick_fe_domain(blkif_t *blkif);
+void kick_be_domain(void);
+
+/* Interrupt handlers. */
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs);
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Control message receiver. */
+extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+/* debug */
+void print_vm_ring_idxs(void);
+
+#define __BLKINT_H__
+#endif
--- /dev/null
+/******************************************************************************
+ * blktap_controlmsg.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interfaces to the frontend and backend drivers.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include "blktap.h"
+
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_CLOSED] = "closed",
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
+static unsigned blktap_be_irq;
+unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
+unsigned int blktap_be_evtchn;
+
+/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+
+#define BLKIF_HASHSZ 1024
+#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+
+static kmem_cache_t *blkif_cachep;
+static blkif_t *blkif_hash[BLKIF_HASHSZ];
+
+blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif != NULL) &&
+ ((blkif->domid != domid) || (blkif->handle != handle)) )
+ blkif = blkif->hash_next;
+ return blkif;
+}
+
+static void __blkif_disconnect_complete(void *arg)
+{
+ blkif_t *blkif = (blkif_t *)arg;
+ ctrl_msg_t cmsg;
+ blkif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in blkif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(blkif->evtchn);
+ vfree(blkif->blk_ring.sring);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT;
+ cmsg.id = blkif->disconnect_rspid;
+ cmsg.length = sizeof(blkif_be_disconnect_t);
+ disc.domid = blkif->domid;
+ disc.blkif_handle = blkif->handle;
+ disc.status = BLKIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'blkif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( blkif->status != DISCONNECTING )
+ BUG();
+ blkif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void blkif_disconnect_complete(blkif_t *blkif)
+{
+ INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
+ schedule_work(&blkif->work);
+}
+
+void blkif_ptfe_create(blkif_be_create_t *create)
+{
+ blkif_t *blkif, **pblkif;
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+
+
+ /* May want to store info on the connecting domain here. */
+
+ DPRINTK("PT got BE_CREATE\n");
+
+ if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+ {
+ DPRINTK("Could not create blkif: out of memory\n");
+ create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ /* blkif struct init code from blkback.c */
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( *pblkif != NULL )
+ {
+ if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
+ {
+ DPRINTK("Could not create blkif: already exists\n");
+ create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(blkif_cachep, blkif);
+ return;
+ }
+ pblkif = &(*pblkif)->hash_next;
+ }
+
+ blkif->hash_next = *pblkif;
+ *pblkif = blkif;
+
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
+{
+ /* Clear anything that we initialized above. */
+
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->blkif_handle;
+ blkif_t **pblkif, *blkif;
+
+ DPRINTK("PT got BE_DESTROY\n");
+
+ pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
+ while ( (blkif = *pblkif) != NULL )
+ {
+ if ( (blkif->domid == domid) && (blkif->handle == handle) )
+ {
+ if ( blkif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pblkif = &blkif->hash_next;
+ }
+
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pblkif = blkif->hash_next;
+ kmem_cache_free(blkif_cachep, blkif);
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->blkif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+ blkif_sring_t *sring;
+
+ DPRINTK("PT got BE_CONNECT\n");
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
+ connect->domid, connect->blkif_handle);
+ connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ WPRINTK("BE_CONNECT: error! (%d)\n", error);
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT ) {
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ WPRINTK("BE_CONNECT: MAPPING error!\n");
+ }
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ sring = (blkif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blkif->blk_ring, sring);
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->status = CONNECTED;
+ blkif_get(blkif);
+
+ request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->blkif_handle;
+ blkif_t *blkif;
+
+ DPRINTK("PT got BE_DISCONNECT\n");
+
+ blkif = blkif_find_by_handle(domid, handle);
+ if ( unlikely(blkif == NULL) )
+ {
+ DPRINTK("blkif_disconnect attempted for non-existent blkif"
+ " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
+ disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( blkif->status == CONNECTED )
+ {
+ blkif->status = DISCONNECTING;
+ blkif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(blkif->irq, blkif);
+ blkif_deschedule(blkif);
+ blkif_put(blkif);
+ return 0; /* Caller should not send response message. */
+ }
+
+ disconnect->status = BLKIF_BE_STATUS_OKAY;
+ return 1;
+}
+
+/*-----[ Control Messages to/from Backend VM ]----------------------------*/
+
+/* Tell the controller to bring up the interface. */
+static void blkif_ptbe_send_interface_connect(void)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(blkif_fe_interface_connect_t),
+ };
+ blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+ msg->handle = 0;
+ msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_ptbe_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_ptbe_disconnect(void)
+{
+ blkif_sring_t *sring;
+
+ sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blktap_be_ring, sring);
+ blktap_be_state = BLKIF_STATE_DISCONNECTED;
+ DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
+ blkif_ptbe_send_interface_connect();
+}
+
+static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
+{
+ int err = 0;
+
+ blktap_be_evtchn = status->evtchn;
+ blktap_be_irq = bind_evtchn_to_irq(blktap_be_evtchn);
+
+ err = request_irq(blktap_be_irq, blkif_ptbe_int,
+ SA_SAMPLE_RANDOM, "blkif", NULL);
+ if ( err ) {
+ WPRINTK("blkfront request_irq failed (%d)\n", err);
+ return;
+ } else {
+ /* transtion to connected in case we need to do a
+ a partion probe on a whole disk */
+ blktap_be_state = BLKIF_STATE_CONNECTED;
+ }
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
+ WPRINTK(" TAP: Unexpected blkif status %s in state %s\n",
+ blkif_status_name[status->status],
+ blkif_state_name[blktap_be_state]);
+}
+
+static void blkif_ptbe_status(
+ blkif_fe_interface_status_t *status)
+{
+ if ( status->handle != 0 )
+ {
+ DPRINTK("Status change on unsupported blkif %d\n",
+ status->handle);
+ return;
+ }
+
+ DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_CLOSED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_close();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ blkif_ptbe_disconnect();
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
+ unexpected(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ blkif_ptbe_disconnect();
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_connect(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CHANGED:
+ switch ( blktap_be_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ case BLKIF_STATE_DISCONNECTED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ /* vbd_update(); */
+ /* tap doesn't really get state changes... */
+ unexpected(status);
+ break;
+ }
+ break;
+
+ default:
+ DPRINTK("Status change to unknown value %d\n", status->status);
+ break;
+ }
+}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->type )
+ {
+ case CMSG_BLKIF_FE:
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+ goto parse_error;
+ blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
+ break;
+
+ default:
+ goto parse_error;
+ }
+
+ case CMSG_BLKIF_BE:
+
+ /* send a copy of the message to user if wanted */
+
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ blktap_write_ctrl_ring(msg);
+ }
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
+ msg->id) )
+ return;
+ break;
+
+ /* We just ignore anything to do with vbds for now. */
+
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ DPRINTK("PT got VBD_CREATE\n");
+ ((blkif_be_vbd_create_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ DPRINTK("PT got VBD_DESTROY\n");
+ ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ DPRINTK("PT got VBD_GROW\n");
+ ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_SHRINK:
+ DPRINTK("PT got VBD_SHRINK\n");
+ ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ default:
+ goto parse_error;
+ }
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+/*-----[ Initialization ]-------------------------------------------------*/
+
+void __init blkif_interface_init(void)
+{
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
+ 0, 0, NULL, NULL);
+ memset(blkif_hash, 0, sizeof(blkif_hash));
+
+ blktap_be_ring.sring = NULL;
+}
--- /dev/null
+/******************************************************************************
+ * blktap_datapath.c
+ *
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ * -- see full header in blktap.c
+ */
+
+#include "blktap.h"
+#include <asm-xen/evtchn.h>
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connection to a single backend domain. */
+blkif_front_ring_t blktap_be_ring;
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
+
+active_req_t active_reqs[MAX_ACTIVE_REQS];
+ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
+ACTIVE_RING_IDX active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
+
+inline active_req_t *get_active_req(void)
+{
+ ACTIVE_RING_IDX idx;
+ active_req_t *ar;
+ unsigned long flags;
+
+ ASSERT(active_cons != active_prod);
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
+ ar = &active_reqs[idx];
+ spin_unlock_irqrestore(&active_req_lock, flags);
+
+ return ar;
+}
+
+inline void free_active_req(active_req_t *ar)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+ spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
+{
+ return &active_reqs[idx];
+}
+
+void active_reqs_init(void)
+{
+ ACTIVE_RING_IDX i;
+
+ active_cons = 0;
+ active_prod = MAX_ACTIVE_REQS;
+ memset(active_reqs, 0, sizeof(active_reqs));
+ for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+ active_req_ring[i] = i;
+}
+
+/* Requests passing through the tap to the backend hijack the id field
+ * in the request message. In it we put the AR index _AND_ the fe domid.
+ * the domid is used by the backend to map the pages properly.
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
+{
+ return ( (fe_dom << 16) | idx );
+}
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
+{
+ blkif_response_t *resp_d;
+ active_req_t *ar;
+
+ ar = &active_reqs[ID_TO_IDX(rsp->id)];
+ rsp->id = ar->id;
+
+ resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
+ blkif->blk_ring.rsp_prod_pvt);
+ memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ wmb();
+ blkif->blk_ring.rsp_prod_pvt++;
+
+ blkif_put(ar->blkif);
+ free_active_req(ar);
+
+ return 0;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
+ WPRINTK("Tap trying to access an unconnected backend!\n");
+ return 0;
+ }
+
+ req_d = RING_GET_REQUEST(&blktap_be_ring,
+ blktap_be_ring.req_prod_pvt);
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ wmb();
+ blktap_be_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+void kick_fe_domain(blkif_t *blkif)
+{
+ RING_PUSH_RESPONSES(&blkif->blk_ring);
+ notify_via_evtchn(blkif->evtchn);
+ DPRINTK("notified FE(dom %u)\n", blkif->domid);
+
+}
+
+void kick_be_domain(void)
+{
+ if ( blktap_be_state != BLKIF_STATE_CONNECTED )
+ return;
+
+ wmb(); /* Ensure that the frontend can see the requests. */
+ RING_PUSH_REQUESTS(&blktap_be_ring);
+ notify_via_evtchn(blktap_be_evtchn);
+ DPRINTK("notified BE\n");
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+/*-----[ Scheduler list maint -from blkback ]--- */
+
+static struct list_head blkio_schedule_list;
+static spinlock_t blkio_schedule_list_lock;
+
+static int __on_blkdev_list(blkif_t *blkif)
+{
+ return blkif->blkdev_list.next != NULL;
+}
+
+static void remove_from_blkdev_list(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( !__on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( __on_blkdev_list(blkif) )
+ {
+ list_del(&blkif->blkdev_list);
+ blkif->blkdev_list.next = NULL;
+ blkif_put(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+static void add_to_blkdev_list_tail(blkif_t *blkif)
+{
+ unsigned long flags;
+ if ( __on_blkdev_list(blkif) ) return;
+ spin_lock_irqsave(&blkio_schedule_list_lock, flags);
+ if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
+ {
+ list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
+ blkif_get(blkif);
+ }
+ spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
+}
+
+
+/*-----[ Scheduler functions - from blkback ]--- */
+
+static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
+
+static int do_block_io_op(blkif_t *blkif, int max_to_do);
+
+static int blkio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ blkif_t *blkif;
+ struct list_head *ent;
+
+ daemonize(
+ "xentapd"
+ );
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&blkio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
+ list_empty(&blkio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&blkio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
+ !list_empty(&blkio_schedule_list) )
+ {
+ ent = blkio_schedule_list.next;
+ blkif = list_entry(ent, blkif_t, blkdev_list);
+ blkif_get(blkif);
+ remove_from_blkdev_list(blkif);
+ if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
+ add_to_blkdev_list_tail(blkif);
+ blkif_put(blkif);
+ }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ /* Push the batch through to disc. */
+ run_task_queue(&tq_disk);
+#endif
+ }
+}
+
+static void maybe_trigger_blkio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/
+ !list_empty(&blkio_schedule_list) )
+ wake_up(&blkio_schedule_wait);
+}
+
+void blkif_deschedule(blkif_t *blkif)
+{
+ remove_from_blkdev_list(blkif);
+}
+
+void __init blkdev_schedule_init(void)
+{
+ spin_lock_init(&blkio_schedule_list_lock);
+ INIT_LIST_HEAD(&blkio_schedule_list);
+
+ if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+}
+
+/*-----[ Interrupt entry from a frontend ]------ */
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ blkif_t *blkif = dev_id;
+
+ add_to_blkdev_list_tail(blkif);
+ maybe_trigger_blkio_schedule();
+ return IRQ_HANDLED;
+}
+
+/*-----[ Other Frontend Ring functions ]-------- */
+
+/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
+static int do_block_io_op(blkif_t *blkif, int max_to_do)
+{
+ /* we have pending messages from the real frontend. */
+
+ blkif_request_t *req_s;
+ RING_IDX i, rp;
+ unsigned long flags;
+ active_req_t *ar;
+ int more_to_do = 0;
+ int notify_be = 0, notify_user = 0;
+
+ DPRINTK("PT got FE interrupt.\n");
+
+ if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1;
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blkif->blk_ring.sring->req_prod;
+ rmb();
+
+ for ( i = blkif->blk_ring.req_cons;
+ (i != rp) &&
+ !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
+ i++ )
+ {
+
+ if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
+ /* This is a new request:
+ * Assign an active request record, and remap the id.
+ */
+ ar = get_active_req();
+ ar->id = req_s->id;
+ ar->nr_pages = req_s->nr_segments;
+ blkif_get(blkif);
+ ar->blkif = blkif;
+ req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
+ /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
+
+ /* FE -> BE interposition point is here. */
+
+ /* ------------------------------------------------------------- */
+ /* BLKIF_OP_PROBE_HACK: */
+ /* Signal to the backend that we are a tap domain. */
+
+ if (req_s->operation == BLKIF_OP_PROBE) {
+ DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
+ req_s->frame_and_sects[1] = BLKTAP_COOKIE;
+ }
+
+ /* ------------------------------------------------------------- */
+
+ /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ /* Copy the response message to UFERing */
+ /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+ /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("req->UFERing\n");
+ blktap_write_fe_ring(req_s);
+ notify_user = 1;
+ }
+
+ /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+
+ /* be included to prevent noise from the fe when its off */
+ /* copy the request message to the BERing */
+
+ DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
+ (unsigned)blktap_be_ring.req_prod_pvt &
+ (RING_SIZE((&blktap_be_ring)-1)));
+
+ write_req_to_be_ring(req_s);
+ notify_be = 1;
+ }
+ }
+
+ blkif->blk_ring.req_cons = i;
+
+ /* unlock rings */
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ if (notify_user)
+ blktap_kick_user();
+ if (notify_be)
+ kick_be_domain();
+
+ return more_to_do;
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs)
+{
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ RING_IDX rp, i;
+ unsigned long flags;
+
+ DPRINTK("PT got BE interrupt.\n");
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ rp = blktap_be_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
+
+ /* BE -> FE interposition point is here. */
+
+ blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
+
+ /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+ /* Copy the response message to UBERing */
+ /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+ /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+ DPRINTK("rsp->UBERing\n");
+ blktap_write_be_ring(resp_s);
+ blktap_kick_user();
+
+ }
+
+ /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+
+ /* (fe included to prevent random interference from the BE) */
+ /* Copy the response message to FERing */
+
+ DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
+ (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
+ (unsigned)blkif->blk_ring.rsp_prod_pvt &
+ (RING_SIZE((&blkif->blk_ring)-1)));
+
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+
+ }
+ }
+
+ blktap_be_ring.rsp_cons = i;
+
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/* Debug : print the current ring indices. */
+
+void print_vm_ring_idxs(void)
+{
+ int i;
+ blkif_t *blkif;
+
+ WPRINTK("FE Rings: \n---------\n");
+ for ( i = 0; i < 50; i++) {
+ blkif = blkif_find_by_handle((domid_t)i, 0);
+ if (blkif != NULL) {
+ if (blkif->blk_ring.sring != NULL) {
+ WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n", i,
+ blkif->blk_ring.req_cons,
+ blkif->blk_ring.rsp_prod_pvt,
+ blkif->blk_ring.sring->req_prod,
+ blkif->blk_ring.sring->rsp_prod);
+ } else {
+ WPRINTK("%2d: [no device channel yet]\n", i);
+ }
+ }
+ }
+ if (blktap_be_ring.sring != NULL) {
+ WPRINTK("BE Ring: \n--------\n");
+ WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_be_ring.rsp_cons,
+ blktap_be_ring.req_prod_pvt,
+ blktap_be_ring.sring->req_prod,
+ blktap_be_ring.sring->rsp_prod);
+ }
+}
--- /dev/null
+/******************************************************************************
+ * blktap_userdev.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* Rings up to user space. */
+static blkif_front_ring_t blktap_ufe_ring;
+static blkif_back_ring_t blktap_ube_ring;
+static ctrl_front_ring_t blktap_uctrl_ring;
+
+/* local prototypes */
+static int blktap_read_fe_ring(void);
+static int blktap_read_be_ring(void);
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ /*
+ * if the page has not been mapped in by the driver then generate
+ * a SIGBUS to the domain.
+ */
+
+ force_sig(SIGBUS, current);
+
+ return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+ nopage: blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+ blkif_sring_t *sring;
+ ctrl_sring_t *csring;
+
+ if ( test_and_set_bit(0, &blktap_dev_inuse) )
+ return -EBUSY;
+
+ printk(KERN_ALERT "blktap open.\n");
+
+ /* Allocate the ctrl ring. */
+ csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (csring == NULL)
+ goto fail_nomem;
+
+ SetPageReserved(virt_to_page(csring));
+
+ SHARED_RING_INIT(csring);
+ FRONT_RING_INIT(&blktap_uctrl_ring, csring);
+
+
+ /* Allocate the fe ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_ctrl;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blktap_ufe_ring, sring);
+
+ /* Allocate the be ring. */
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+ if (sring == NULL)
+ goto fail_free_fe;
+
+ SetPageReserved(virt_to_page(sring));
+
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blktap_ube_ring, sring);
+
+ DPRINTK(KERN_ALERT "blktap open.\n");
+
+ return 0;
+
+ fail_free_ctrl:
+ free_page( (unsigned long) blktap_uctrl_ring.sring);
+
+ fail_free_fe:
+ free_page( (unsigned long) blktap_ufe_ring.sring);
+
+ fail_nomem:
+ return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+ blktap_dev_inuse = 0;
+ blktap_ring_ok = 0;
+
+ printk(KERN_ALERT "blktap closed.\n");
+
+ /* Free the ring page. */
+ ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
+ free_page((unsigned long) blktap_uctrl_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
+ free_page((unsigned long) blktap_ufe_ring.sring);
+
+ ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
+ free_page((unsigned long) blktap_ube_ring.sring);
+
+ return 0;
+}
+
+/* Note on mmap:
+ * remap_pfn_range sets VM_IO on vma->vm_flags. In trying to make libaio
+ * work to do direct page access from userspace, this ended up being a
+ * problem. The bigger issue seems to be that there is no way to map
+ * a foreign page in to user space and have the virtual address of that
+ * page map sanely down to a mfn.
+ * Removing the VM_IO flag results in a loop in get_user_pages, as
+ * pfn_valid() always fails on a foreign page.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int size;
+
+ printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ vma->vm_start, vma->vm_end);
+
+ vma->vm_ops = &blktap_vm_ops;
+
+ size = vma->vm_end - vma->vm_start;
+ if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+ printk(KERN_INFO
+ "blktap: you _must_ map exactly %d pages!\n",
+ MMAP_PAGES + RING_PAGES);
+ return -EAGAIN;
+ }
+
+ size >>= PAGE_SHIFT;
+ printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+
+ rings_vstart = vma->vm_start;
+ mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+
+ /* Map the ring pages to the start of the region and reserve it. */
+
+ /* not sure if I really need to do this... */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start,
+ __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("ctrl_ring: remap_pfn_range failure!\n");
+ }
+
+
+ DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE,
+ __pa(blktap_ube_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("be_ring: remap_pfn_range failure!\n");
+ }
+
+ DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
+ if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ),
+ __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ WPRINTK("fe_ring: remap_pfn_range failure!\n");
+ }
+
+ blktap_vma = vma;
+ blktap_ring_ok = 1;
+
+ return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+ case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+ return blktap_read_fe_ring();
+
+ case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+ return blktap_read_be_ring();
+
+ case BLKTAP_IOCTL_SETMODE:
+ if (BLKTAP_MODE_VALID(arg)) {
+ blktap_mode = arg;
+ /* XXX: may need to flush rings here. */
+ printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+ return 0;
+ }
+ case BLKTAP_IOCTL_PRINT_IDXS:
+ {
+ print_vm_ring_idxs();
+ WPRINTK("User Rings: \n-----------\n");
+ WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ufe_ring.rsp_cons,
+ blktap_ufe_ring.req_prod_pvt,
+ blktap_ufe_ring.sring->req_prod,
+ blktap_ufe_ring.sring->rsp_prod);
+ WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
+ "| req_prod: %2d, rsp_prod: %2d\n",
+ blktap_ube_ring.req_cons,
+ blktap_ube_ring.rsp_prod_pvt,
+ blktap_ube_ring.sring->req_prod,
+ blktap_ube_ring.sring->rsp_prod);
+
+ }
+ }
+ return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &blktap_wait, wait);
+
+ if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
+ RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ||
+ RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
+
+ RING_PUSH_REQUESTS(&blktap_uctrl_ring);
+ RING_PUSH_REQUESTS(&blktap_ufe_ring);
+ RING_PUSH_RESPONSES(&blktap_ube_ring);
+ return POLLIN | POLLRDNORM;
+ }
+
+ return 0;
+}
+
+void blktap_kick_user(void)
+{
+ /* blktap_ring->req_prod = blktap_req_prod; */
+ wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+ owner: THIS_MODULE,
+ poll: blktap_poll,
+ ioctl: blktap_ioctl,
+ open: blktap_open,
+ release: blktap_release,
+ mmap: blktap_mmap,
+};
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+ blkif_request_t *target;
+ int error, i;
+
+ /*
+ * This is called to pass a request from the real frontend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: ufe_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( RING_FULL(&blktap_ufe_ring) ) {
+ DPRINTK("blktap: fe_ring is full, can't add.\n");
+ return 0;
+ }
+
+ target = RING_GET_REQUEST(&blktap_ufe_ring,
+ blktap_ufe_ring.req_prod_pvt);
+ memcpy(target, req, sizeof(*req));
+
+ /* Attempt to map the foreign pages directly in to the application */
+ for (i=0; i<target->nr_segments; i++) {
+
+ error = direct_remap_area_pages(blktap_vma->vm_mm,
+ MMAP_VADDR(ID_TO_IDX(req->id), i),
+ target->frame_and_sects[i] & PAGE_MASK,
+ PAGE_SIZE,
+ blktap_vma->vm_page_prot,
+ ID_TO_DOM(req->id));
+ if ( error != 0 ) {
+ printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ /* the request is now dropped on the floor. */
+ return 0;
+ }
+ }
+
+ blktap_ufe_ring.req_prod_pvt++;
+
+ return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *target;
+
+ /*
+ * This is called to pass a request from the real backend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_RESPONSE(&blktap_ube_ring,
+ blktap_ube_ring.rsp_prod_pvt);
+ memcpy(target, rsp, sizeof(*rsp));
+
+ /* no mapping -- pages were mapped in blktap_write_fe_ring() */
+
+ blktap_ube_ring.rsp_prod_pvt++;
+
+ return 0;
+}
+
+static void blktap_fast_flush_area(int idx, int nr_pages)
+{
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ int i;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping;
+ mcl[i].args[0] = MMAP_VADDR(idx, i);
+ mcl[i].args[1] = 0;
+ mcl[i].args[2] = 0;
+ }
+
+ mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+ BUG();
+}
+
+static int blktap_read_fe_ring(void)
+{
+ /* This is called to read responses from the UFE ring. */
+
+ RING_IDX i, rp;
+ blkif_response_t *resp_s;
+ blkif_t *blkif;
+ active_req_t *ar;
+
+ DPRINTK("blktap_read_fe_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ufe_ring.sring->rsp_prod;
+ rmb();
+
+ for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
+ {
+ resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
+
+ DPRINTK("resp->fe_ring\n");
+ ar = lookup_active_req(ID_TO_IDX(resp_s->id));
+ blkif = ar->blkif;
+ blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
+ write_resp_to_fe_ring(blkif, resp_s);
+ kick_fe_domain(blkif);
+ }
+
+ blktap_ufe_ring.rsp_cons = i;
+ }
+ return 0;
+}
+
+static int blktap_read_be_ring(void)
+{
+ /* This is called to read requests from the UBE ring. */
+
+ RING_IDX i, rp;
+ blkif_request_t *req_s;
+
+ DPRINTK("blktap_read_be_ring()\n");
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+ /* for each outstanding message on the UFEring */
+ rp = blktap_ube_ring.sring->req_prod;
+ rmb();
+ for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
+ {
+ req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
+
+ DPRINTK("req->be_ring\n");
+ write_req_to_be_ring(req_s);
+ kick_be_domain();
+ }
+
+ blktap_ube_ring.req_cons = i;
+ }
+
+ return 0;
+}
+
+int blktap_write_ctrl_ring(ctrl_msg_t *msg)
+{
+ ctrl_msg_t *target;
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ /* No test for fullness in the response direction. */
+
+ target = RING_GET_REQUEST(&blktap_uctrl_ring,
+ blktap_uctrl_ring.req_prod_pvt);
+ memcpy(target, msg, sizeof(*msg));
+
+ blktap_uctrl_ring.req_prod_pvt++;
+
+ /* currently treat the ring as unidirectional. */
+ blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
+
+ return 0;
+
+}
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+ .minor = BLKTAP_MINOR,
+ .name = "blktap",
+ .fops = &blktap_fops,
+ .devfs_name = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+ int err;
+
+ err = misc_register(&blktap_miscdev);
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+ return err;
+ }
+
+ init_waitqueue_head(&blktap_wait);
+
+
+ return 0;
+}
{
unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
pgd_t *pgd = pgd_offset_k(m2pv);
- pmd_t *pmd = pmd_offset(pgd, m2pv);
- unsigned long m2p_start_mfn = pmd_val_ma(*pmd) >> PAGE_SHIFT;
+ pud_t *pud = pud_offset(pgd, m2pv);
+ pmd_t *pmd = pmd_offset(pud, m2pv);
+ unsigned long m2p_start_mfn = pfn_to_mfn(pmd_val(*pmd) >> PAGE_SHIFT);
ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
}
break;
--- /dev/null
+
+#ifndef __USBIF__BACKEND__COMMON_H__
+#define __USBIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/hypervisor.h>
+
+#include <asm-xen/xen-public/io/usbif.h>
+
+#if 0
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct usbif_priv_st usbif_priv_t;
+
+struct usbif_priv_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms Information */
+ usbif_back_ring_t usb_ring;
+ /* Private fields. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ usbif_priv_t *hash_next;
+ struct list_head usbif_list;
+ spinlock_t usb_ring_lock;
+ atomic_t refcnt;
+
+ struct work_struct work;
+};
+
+void usbif_create(usbif_be_create_t *create);
+void usbif_destroy(usbif_be_destroy_t *destroy);
+void usbif_connect(usbif_be_connect_t *connect);
+int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id);
+void usbif_disconnect_complete(usbif_priv_t *up);
+
+void usbif_release_port(usbif_be_release_port_t *msg);
+int usbif_claim_port(usbif_be_claim_port_t *msg);
+void usbif_release_ports(usbif_priv_t *up);
+
+usbif_priv_t *usbif_find(domid_t domid);
+#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define usbif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ usbif_disconnect_complete(_b); \
+ } while (0)
+
+
+void usbif_interface_init(void);
+void usbif_ctrlif_init(void);
+
+void usbif_deschedule(usbif_priv_t *up);
+void remove_from_usbif_list(usbif_priv_t *up);
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __USBIF__BACKEND__COMMON_H__ */
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/control.c
+ *
+ * Routines for interfacing with the control plane.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ DPRINTK("Received usbif backend message, subtype=%d\n", msg->subtype);
+
+ switch ( msg->subtype )
+ {
+ case CMSG_USBIF_BE_CREATE:
+ if ( msg->length != sizeof(usbif_be_create_t) )
+ goto parse_error;
+ usbif_create((usbif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_DESTROY:
+ if ( msg->length != sizeof(usbif_be_destroy_t) )
+ goto parse_error;
+ usbif_destroy((usbif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_CONNECT:
+ if ( msg->length != sizeof(usbif_be_connect_t) )
+ goto parse_error;
+ usbif_connect((usbif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(usbif_be_disconnect_t) )
+ goto parse_error;
+ if ( !usbif_disconnect((usbif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
+ break;
+ case CMSG_USBIF_BE_CLAIM_PORT:
+ if ( msg->length != sizeof(usbif_be_claim_port_t) )
+ goto parse_error;
+ usbif_claim_port((usbif_be_claim_port_t *)&msg->msg[0]);
+ break;
+ case CMSG_USBIF_BE_RELEASE_PORT:
+ if ( msg->length != sizeof(usbif_be_release_port_t) )
+ goto parse_error;
+ usbif_release_port((usbif_be_release_port_t *)&msg->msg[0]);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+void usbif_ctrlif_init(void)
+{
+ ctrl_msg_t cmsg;
+ usbif_be_driver_status_changed_t st;
+
+ (void)ctrl_if_register_receiver(CMSG_USBIF_BE, usbif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_USBIF_BE;
+ cmsg.subtype = CMSG_USBIF_BE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(usbif_be_driver_status_changed_t);
+ st.status = USBIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/interface.c
+ *
+ * USB device interface management.
+ *
+ * by Mark Williamson, Copyright (c) 2004
+ */
+
+
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ *
+ * Block-device interface management.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+#define USBIF_HASHSZ 1024
+#define USBIF_HASH(_d) (((int)(_d))&(USBIF_HASHSZ-1))
+
+static kmem_cache_t *usbif_priv_cachep;
+static usbif_priv_t *usbif_priv_hash[USBIF_HASHSZ];
+
+usbif_priv_t *usbif_find(domid_t domid)
+{
+ usbif_priv_t *up = usbif_priv_hash[USBIF_HASH(domid)];
+ while ( (up != NULL ) && ( up->domid != domid ) )
+ up = up->hash_next;
+ return up;
+}
+
+static void __usbif_disconnect_complete(void *arg)
+{
+ usbif_priv_t *usbif = (usbif_priv_t *)arg;
+ ctrl_msg_t cmsg;
+ usbif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in usbif_disconnect() because at that point there
+ * may be outstanding requests at the device whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(usbif->evtchn);
+ vfree(usbif->usb_ring.sring);
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_USBIF_BE;
+ cmsg.subtype = CMSG_USBIF_BE_DISCONNECT;
+ cmsg.id = usbif->disconnect_rspid;
+ cmsg.length = sizeof(usbif_be_disconnect_t);
+ disc.domid = usbif->domid;
+ disc.status = USBIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'usbif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( usbif->status != DISCONNECTING )
+ BUG();
+ usbif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void usbif_disconnect_complete(usbif_priv_t *up)
+{
+ INIT_WORK(&up->work, __usbif_disconnect_complete, (void *)up);
+ schedule_work(&up->work);
+}
+
+void usbif_create(usbif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ usbif_priv_t **pup, *up;
+
+ if ( (up = kmem_cache_alloc(usbif_priv_cachep, GFP_KERNEL)) == NULL )
+ {
+ DPRINTK("Could not create usbif: out of memory\n");
+ create->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ memset(up, 0, sizeof(*up));
+ up->domid = domid;
+ up->status = DISCONNECTED;
+ spin_lock_init(&up->usb_ring_lock);
+ atomic_set(&up->refcnt, 0);
+
+ pup = &usbif_priv_hash[USBIF_HASH(domid)];
+ while ( *pup != NULL )
+ {
+ if ( (*pup)->domid == domid )
+ {
+ create->status = USBIF_BE_STATUS_INTERFACE_EXISTS;
+ kmem_cache_free(usbif_priv_cachep, up);
+ return;
+ }
+ pup = &(*pup)->hash_next;
+ }
+
+ up->hash_next = *pup;
+ *pup = up;
+
+ create->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_destroy(usbif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ usbif_priv_t **pup, *up;
+
+ pup = &usbif_priv_hash[USBIF_HASH(domid)];
+ while ( (up = *pup) != NULL )
+ {
+ if ( up->domid == domid )
+ {
+ if ( up->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pup = &up->hash_next;
+ }
+
+ destroy->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pup = up->hash_next;
+ usbif_release_ports(up);
+ kmem_cache_free(usbif_priv_cachep, up);
+ destroy->status = USBIF_BE_STATUS_OKAY;
+}
+
+void usbif_connect(usbif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ usbif_priv_t *up;
+ usbif_sring_t *sring;
+
+ up = usbif_find(domid);
+ if ( unlikely(up == NULL) )
+ {
+ DPRINTK("usbif_connect attempted for non-existent usbif (%u)\n",
+ connect->domid);
+ connect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = USBIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = USBIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = USBIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( up->status != DISCONNECTED )
+ {
+ connect->status = USBIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ sring = (usbif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&up->usb_ring, sring);
+
+ up->evtchn = evtchn;
+ up->irq = bind_evtchn_to_irq(evtchn);
+ up->shmem_frame = shmem_frame;
+ up->status = CONNECTED;
+ usbif_get(up);
+
+ request_irq(up->irq, usbif_be_int, 0, "usbif-backend", up);
+
+ connect->status = USBIF_BE_STATUS_OKAY;
+}
+
+/* Remove URBs for this interface before destroying it. */
+void usbif_deschedule(usbif_priv_t *up)
+{
+ remove_from_usbif_list(up);
+}
+
+int usbif_disconnect(usbif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ usbif_priv_t *up;
+
+ up = usbif_find(domid);
+ if ( unlikely(up == NULL) )
+ {
+ DPRINTK("usbif_disconnect attempted for non-existent usbif"
+ " (%u)\n", disconnect->domid);
+ disconnect->status = USBIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( up->status == CONNECTED )
+ {
+ up->status = DISCONNECTING;
+ up->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ free_irq(up->irq, up);
+ usbif_deschedule(up);
+ usbif_put(up);
+ return 0; /* Caller should not send response message. */
+ }
+
+ disconnect->status = USBIF_BE_STATUS_OKAY;
+ return 1;
+}
+
+void __init usbif_interface_init(void)
+{
+ usbif_priv_cachep = kmem_cache_create("usbif_priv_cache",
+ sizeof(usbif_priv_t),
+ 0, 0, NULL, NULL);
+ memset(usbif_priv_hash, 0, sizeof(usbif_priv_hash));
+}
--- /dev/null
+/******************************************************************************
+ * arch/xen/drivers/usbif/backend/main.c
+ *
+ * Backend for the Xen virtual USB driver - provides an abstraction of a
+ * USB host controller to the corresponding frontend driver.
+ *
+ * by Mark Williamson
+ * Copyright (c) 2004 Intel Research Cambridge
+ * Copyright (c) 2004, 2005 Mark Williamson
+ *
+ * Based on arch/xen/drivers/blkif/backend/main.c
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ */
+
+#include "common.h"
+
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/tqueue.h>
+
+/*
+ * This is rather arbitrary.
+ */
+#define MAX_PENDING_REQS 4
+#define BATCH_PER_DOMAIN 1
+
+static unsigned long mmap_vstart;
+
+/* Needs to be sufficiently large that we can map the (large) buffers
+ * the USB mass storage driver wants. */
+#define MMAP_PAGES_PER_REQUEST \
+ (128)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+
+static spinlock_t owned_ports_lock;
+LIST_HEAD(owned_ports);
+
+/* A list of these structures is used to track ownership of physical USB
+ * ports. */
+typedef struct
+{
+ usbif_priv_t *usbif_priv;
+ char path[16];
+ int guest_port;
+ int enabled;
+ struct list_head list;
+ unsigned long guest_address; /* The USB device address that has been
+ * assigned by the guest. */
+ int dev_present; /* Is there a device present? */
+ struct usb_device * dev;
+ unsigned long ifaces; /* What interfaces are present on this device? */
+} owned_port_t;
+
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. The request is complete, the specified
+ * domain has a response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+ usbif_priv_t *usbif_priv;
+ unsigned long id;
+ int nr_pages;
+ unsigned short operation;
+ int status;
+} pending_req_t;
+
+/*
+ * We can't allocate pending_req's in order, since they may complete out of
+ * order. We therefore maintain an allocation ring. This ring also indicates
+ * when enough work has been passed down -- at that point the allocation ring
+ * will be empty.
+ */
+static pending_req_t pending_reqs[MAX_PENDING_REQS];
+static unsigned char pending_ring[MAX_PENDING_REQS];
+static spinlock_t pend_prod_lock;
+
+/* NB. We use a different index type to differentiate from shared usb rings. */
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static int do_usb_io_op(usbif_priv_t *usbif, int max_to_do);
+static void make_response(usbif_priv_t *usbif, unsigned long id,
+ unsigned short op, int st, int inband,
+ unsigned long actual_length);
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long port);
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req);
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid);
+static owned_port_t *usbif_find_port(char *);
+
+/******************************************************************
+ * PRIVATE DEBUG FUNCTIONS
+ */
+
+#undef DEBUG
+#ifdef DEBUG
+
+static void dump_port(owned_port_t *p)
+{
+ printk(KERN_DEBUG "owned_port_t @ %p\n"
+ " usbif_priv @ %p\n"
+ " path: %s\n"
+ " guest_port: %d\n"
+ " guest_address: %ld\n"
+ " dev_present: %d\n"
+ " dev @ %p\n"
+ " ifaces: 0x%lx\n",
+ p, p->usbif_priv, p->path, p->guest_port, p->guest_address,
+ p->dev_present, p->dev, p->ifaces);
+}
+
+
+static void dump_request(usbif_request_t *req)
+{
+ printk(KERN_DEBUG "id = 0x%lx\n"
+ "devnum %d\n"
+ "endpoint 0x%x\n"
+ "direction %d\n"
+ "speed %d\n"
+ "pipe_type 0x%x\n"
+ "transfer_buffer 0x%lx\n"
+ "length 0x%lx\n"
+ "transfer_flags 0x%lx\n"
+ "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n"
+ "iso_schedule = 0x%lx\n"
+ "num_iso %ld\n",
+ req->id, req->devnum, req->endpoint, req->direction, req->speed,
+ req->pipe_type, req->transfer_buffer, req->length,
+ req->transfer_flags, req->setup[0], req->setup[1], req->setup[2],
+ req->setup[3], req->setup[4], req->setup[5], req->setup[6],
+ req->setup[7], req->iso_schedule, req->num_iso);
+}
+
+static void dump_urb(struct urb *urb)
+{
+ printk(KERN_DEBUG "dumping urb @ %p\n", urb);
+
+#define DUMP_URB_FIELD(name, format) \
+ printk(KERN_DEBUG " " # name " " format "\n", urb-> name)
+
+ DUMP_URB_FIELD(pipe, "0x%x");
+ DUMP_URB_FIELD(status, "%d");
+ DUMP_URB_FIELD(transfer_flags, "0x%x");
+ DUMP_URB_FIELD(transfer_buffer, "%p");
+ DUMP_URB_FIELD(transfer_buffer_length, "%d");
+ DUMP_URB_FIELD(actual_length, "%d");
+}
+
+static void dump_response(usbif_response_t *resp)
+{
+ printk(KERN_DEBUG "usbback: Sending response:\n"
+ " id = 0x%x\n"
+ " op = %d\n"
+ " status = %d\n"
+ " data = %d\n"
+ " length = %d\n",
+ resp->id, resp->op, resp->status, resp->data, resp->length);
+}
+
+#else /* DEBUG */
+
+#define dump_port(blah) ((void)0)
+#define dump_request(blah) ((void)0)
+#define dump_urb(blah) ((void)0)
+#define dump_response(blah) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************
+ * MEMORY MANAGEMENT
+ */
+
+static void fast_flush_area(int idx, int nr_pages)
+{
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ int i;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping;
+ mcl[i].args[0] = MMAP_VADDR(idx, i);
+ mcl[i].args[1] = 0;
+ mcl[i].args[2] = 0;
+ }
+
+ mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
+ if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
+ BUG();
+}
+
+
+/******************************************************************
+ * USB INTERFACE SCHEDULER LIST MAINTENANCE
+ */
+
+static struct list_head usbio_schedule_list;
+static spinlock_t usbio_schedule_list_lock;
+
+static int __on_usbif_list(usbif_priv_t *up)
+{
+ return up->usbif_list.next != NULL;
+}
+
+void remove_from_usbif_list(usbif_priv_t *up)
+{
+ unsigned long flags;
+ if ( !__on_usbif_list(up) ) return;
+ spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+ if ( __on_usbif_list(up) )
+ {
+ list_del(&up->usbif_list);
+ up->usbif_list.next = NULL;
+ usbif_put(up);
+ }
+ spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+static void add_to_usbif_list_tail(usbif_priv_t *up)
+{
+ unsigned long flags;
+ if ( __on_usbif_list(up) ) return;
+ spin_lock_irqsave(&usbio_schedule_list_lock, flags);
+ if ( !__on_usbif_list(up) && (up->status == CONNECTED) )
+ {
+ list_add_tail(&up->usbif_list, &usbio_schedule_list);
+ usbif_get(up);
+ }
+ spin_unlock_irqrestore(&usbio_schedule_list_lock, flags);
+}
+
+void free_pending(int pending_idx)
+{
+ unsigned long flags;
+
+ /* Free the pending request. */
+ spin_lock_irqsave(&pend_prod_lock, flags);
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ spin_unlock_irqrestore(&pend_prod_lock, flags);
+}
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as urb->complete()
+ */
+
+static void maybe_trigger_usbio_schedule(void);
+
+static void __end_usb_io_op(struct urb *purb)
+{
+ pending_req_t *pending_req;
+ int pending_idx;
+
+ pending_req = purb->context;
+
+ pending_idx = pending_req - pending_reqs;
+
+ ASSERT(purb->actual_length <= purb->transfer_buffer_length);
+ ASSERT(purb->actual_length <= pending_req->nr_pages * PAGE_SIZE);
+
+ /* An error fails the entire request. */
+ if ( purb->status )
+ {
+ printk(KERN_WARNING "URB @ %p failed. Status %d\n", purb, purb->status);
+ }
+
+ if ( usb_pipetype(purb->pipe) == 0 )
+ {
+ int i;
+ usbif_iso_t *sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, pending_req->nr_pages - 1);
+
+ /* If we're dealing with an iso pipe, we need to copy back the schedule. */
+ for ( i = 0; i < purb->number_of_packets; i++ )
+ {
+ sched[i].length = purb->iso_frame_desc[i].actual_length;
+ ASSERT(sched[i].buffer_offset ==
+ purb->iso_frame_desc[i].offset);
+ sched[i].status = purb->iso_frame_desc[i].status;
+ }
+ }
+
+ fast_flush_area(pending_req - pending_reqs, pending_req->nr_pages);
+
+ kfree(purb->setup_packet);
+
+ make_response(pending_req->usbif_priv, pending_req->id,
+ pending_req->operation, pending_req->status, 0, purb->actual_length);
+ usbif_put(pending_req->usbif_priv);
+
+ usb_free_urb(purb);
+
+ free_pending(pending_idx);
+
+ rmb();
+
+ /* Check for anything still waiting in the rings, having freed a request... */
+ maybe_trigger_usbio_schedule();
+}
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static DECLARE_WAIT_QUEUE_HEAD(usbio_schedule_wait);
+
+static int usbio_schedule(void *arg)
+{
+ DECLARE_WAITQUEUE(wq, current);
+
+ usbif_priv_t *up;
+ struct list_head *ent;
+
+ daemonize();
+
+ for ( ; ; )
+ {
+ /* Wait for work to do. */
+ add_wait_queue(&usbio_schedule_wait, &wq);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if ( (NR_PENDING_REQS == MAX_PENDING_REQS) ||
+ list_empty(&usbio_schedule_list) )
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&usbio_schedule_wait, &wq);
+
+ /* Queue up a batch of requests. */
+ while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+ !list_empty(&usbio_schedule_list) )
+ {
+ ent = usbio_schedule_list.next;
+ up = list_entry(ent, usbif_priv_t, usbif_list);
+ usbif_get(up);
+ remove_from_usbif_list(up);
+ if ( do_usb_io_op(up, BATCH_PER_DOMAIN) )
+ add_to_usbif_list_tail(up);
+ usbif_put(up);
+ }
+ }
+}
+
+static void maybe_trigger_usbio_schedule(void)
+{
+ /*
+ * Needed so that two processes, who together make the following predicate
+ * true, don't both read stale values and evaluate the predicate
+ * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
+ */
+ smp_mb();
+
+ if ( !list_empty(&usbio_schedule_list) )
+ wake_up(&usbio_schedule_wait);
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+irqreturn_t usbif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ usbif_priv_t *up = dev_id;
+
+ smp_mb();
+
+ add_to_usbif_list_tail(up);
+
+ /* Will in fact /always/ trigger an io schedule in this case. */
+ maybe_trigger_usbio_schedule();
+
+ return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the usb-device layer proper.
+ */
+
+static int do_usb_io_op(usbif_priv_t *up, int max_to_do)
+{
+ usbif_back_ring_t *usb_ring = &up->usb_ring;
+ usbif_request_t *req;
+ RING_IDX i, rp;
+ int more_to_do = 0;
+
+ rp = usb_ring->sring->req_prod;
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ /* Take items off the comms ring, taking care not to overflow. */
+ for ( i = usb_ring->req_cons;
+ (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
+ i++ )
+ {
+ if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
+ {
+ more_to_do = 1;
+ break;
+ }
+
+ req = RING_GET_REQUEST(usb_ring, i);
+
+ switch ( req->operation )
+ {
+ case USBIF_OP_PROBE:
+ dispatch_usb_probe(up, req->id, req->port);
+ break;
+
+ case USBIF_OP_IO:
+ /* Assemble an appropriate URB. */
+ dispatch_usb_io(up, req);
+ break;
+
+ case USBIF_OP_RESET:
+ dispatch_usb_reset(up, req->port);
+ break;
+
+ default:
+ DPRINTK("error: unknown USB io operation [%d]\n",
+ req->operation);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ break;
+ }
+ }
+
+ usb_ring->req_cons = i;
+
+ return more_to_do;
+}
+
+static owned_port_t *find_guest_port(usbif_priv_t *up, int port)
+{
+ unsigned long flags;
+ struct list_head *l;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each(l, &owned_ports)
+ {
+ owned_port_t *p = list_entry(l, owned_port_t, list);
+ if(p->usbif_priv == up && p->guest_port == port)
+ {
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+ return p;
+ }
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return NULL;
+}
+
+static void dispatch_usb_reset(usbif_priv_t *up, unsigned long portid)
+{
+ owned_port_t *port = find_guest_port(up, portid);
+ int ret = 0;
+
+
+ /* Allowing the guest to actually reset the device causes more problems
+ * than it's worth. We just fake it out in software but we will do a real
+ * reset when the interface is destroyed. */
+
+ dump_port(port);
+
+ port->guest_address = 0;
+ /* If there's an attached device then the port is now enabled. */
+ if ( port->dev_present )
+ port->enabled = 1;
+ else
+ port->enabled = 0;
+
+ make_response(up, 0, USBIF_OP_RESET, ret, 0, 0);
+}
+
+static void dispatch_usb_probe(usbif_priv_t *up, unsigned long id, unsigned long portid)
+{
+ owned_port_t *port = find_guest_port(up, portid);
+ int ret;
+
+ if ( port != NULL )
+ ret = port->dev_present;
+ else
+ {
+ ret = -EINVAL;
+ printk(KERN_INFO "dispatch_usb_probe(): invalid port probe request "
+ "(port %ld)\n", portid);
+ }
+
+ /* Probe result is sent back in-band. Probes don't have an associated id
+ * right now... */
+ make_response(up, id, USBIF_OP_PROBE, ret, portid, 0);
+}
+
+/**
+ * check_iso_schedule - safety check the isochronous schedule for an URB
+ * @purb : the URB in question
+ */
+static int check_iso_schedule(struct urb *purb)
+{
+ int i;
+ unsigned long total_length = 0;
+
+ for ( i = 0; i < purb->number_of_packets; i++ )
+ {
+ struct usb_iso_packet_descriptor *desc = &purb->iso_frame_desc[i];
+
+ if ( desc->offset >= purb->transfer_buffer_length
+ || ( desc->offset + desc->length) > purb->transfer_buffer_length )
+ return -EINVAL;
+
+ total_length += desc->length;
+
+ if ( total_length > purb->transfer_buffer_length )
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req);
+
+static void dispatch_usb_io(usbif_priv_t *up, usbif_request_t *req)
+{
+ unsigned long buffer_mach;
+ int i = 0, offset = 0,
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+ pending_req_t *pending_req;
+ unsigned long remap_prot;
+ multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
+ struct urb *purb = NULL;
+ owned_port_t *port;
+ unsigned char *setup;
+
+ dump_request(req);
+
+ if ( NR_PENDING_REQS == MAX_PENDING_REQS )
+ {
+ printk(KERN_WARNING "usbback: Max requests already queued. "
+ "Giving up!\n");
+
+ return;
+ }
+
+ port = find_port_for_request(up, req);
+
+ if ( port == NULL )
+ {
+ printk(KERN_WARNING "No such device! (%d)\n", req->devnum);
+ dump_request(req);
+
+ make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+ return;
+ }
+ else if ( !port->dev_present )
+ {
+ /* In normal operation, we'll only get here if a device is unplugged
+ * and the frontend hasn't noticed yet. */
+ make_response(up, req->id, req->operation, -ENODEV, 0, 0);
+ return;
+ }
+
+
+ setup = kmalloc(8, GFP_KERNEL);
+
+ if ( setup == NULL )
+ goto no_mem;
+
+ /* Copy request out for safety. */
+ memcpy(setup, req->setup, 8);
+
+ if( setup[0] == 0x0 && setup[1] == 0x5)
+ {
+ /* To virtualise the USB address space, we need to intercept
+ * set_address messages and emulate. From the USB specification:
+ * bmRequestType = 0x0;
+ * Brequest = SET_ADDRESS (i.e. 0x5)
+ * wValue = device address
+ * wIndex = 0
+ * wLength = 0
+ * data = None
+ */
+ /* Store into the guest transfer buffer using cpu_to_le16 */
+ port->guest_address = le16_to_cpu(*(u16 *)(setup + 2));
+ /* Make a successful response. That was easy! */
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+ else if ( setup[0] == 0x0 && setup[1] == 0x9 )
+ {
+ /* The host kernel needs to know what device configuration is in use
+ * because various error checks get confused otherwise. We just do
+ * configuration settings here, under controlled conditions.
+ */
+
+ /* Ignore configuration setting and hope that the host kernel
+ did it right. */
+ /* usb_set_configuration(port->dev, setup[2]); */
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+ else if ( setup[0] == 0x1 && setup[1] == 0xB )
+ {
+ /* The host kernel needs to know what device interface is in use
+ * because various error checks get confused otherwise. We just do
+ * configuration settings here, under controlled conditions.
+ */
+ usb_set_interface(port->dev, (setup[4] | setup[5] << 8),
+ (setup[2] | setup[3] << 8) );
+
+ make_response(up, req->id, req->operation, 0, 0, 0);
+
+ kfree(setup);
+ return;
+ }
+
+ if ( ( req->transfer_buffer - (req->transfer_buffer & PAGE_MASK)
+ + req->length )
+ > MMAP_PAGES_PER_REQUEST * PAGE_SIZE )
+ {
+ printk(KERN_WARNING "usbback: request of %lu bytes too large\n",
+ req->length);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ kfree(setup);
+ return;
+ }
+
+ buffer_mach = req->transfer_buffer;
+
+ if( buffer_mach == 0 )
+ goto no_remap;
+
+ ASSERT((req->length >> PAGE_SHIFT) <= MMAP_PAGES_PER_REQUEST);
+ ASSERT(buffer_mach);
+
+ /* Always map writeable for now. */
+ remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
+
+ for ( i = 0, offset = 0; offset < req->length;
+ i++, offset += PAGE_SIZE )
+ {
+ mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+ mcl[i].args[1] = ((buffer_mach & PAGE_MASK) + offset) | remap_prot;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = up->domid;
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
+
+ ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
+ == buffer_mach + i << PAGE_SHIFT);
+ }
+
+ if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
+ {
+ /* Map in ISO schedule, if necessary. */
+ mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[i].args[0] = MMAP_VADDR(pending_idx, i);
+ mcl[i].args[1] = (req->iso_schedule & PAGE_MASK) | remap_prot;
+ mcl[i].args[2] = 0;
+ mcl[i].args[3] = up->domid;
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(req->iso_schedule >> PAGE_SHIFT);
+
+ i++;
+ }
+
+ if ( unlikely(HYPERVISOR_multicall(mcl, i) != 0) )
+ BUG();
+
+ {
+ int j;
+ for ( j = 0; j < i; j++ )
+ {
+ if ( unlikely(mcl[j].args[5] != 0) )
+ {
+ printk(KERN_WARNING
+ "invalid buffer %d -- could not remap it\n", j);
+ fast_flush_area(pending_idx, i);
+ goto bad_descriptor;
+ }
+ }
+ }
+
+ no_remap:
+
+ ASSERT(i <= MMAP_PAGES_PER_REQUEST);
+ ASSERT(i * PAGE_SIZE >= req->length);
+
+ /* We have to do this because some things might complete out of order. */
+ pending_req = &pending_reqs[pending_idx];
+ pending_req->usbif_priv= up;
+ pending_req->id = req->id;
+ pending_req->operation = req->operation;
+ pending_req->nr_pages = i;
+
+ pending_cons++;
+
+ usbif_get(up);
+
+ /* Fill out an actual request for the USB layer. */
+ purb = usb_alloc_urb(req->num_iso);
+
+ if ( purb == NULL )
+ {
+ usbif_put(up);
+ free_pending(pending_idx);
+ goto no_mem;
+ }
+
+ purb->dev = port->dev;
+ purb->context = pending_req;
+ purb->transfer_buffer =
+ (void *)(MMAP_VADDR(pending_idx, 0) + (buffer_mach & ~PAGE_MASK));
+ if(buffer_mach == 0)
+ purb->transfer_buffer = NULL;
+ purb->complete = __end_usb_io_op;
+ purb->transfer_buffer_length = req->length;
+ purb->transfer_flags = req->transfer_flags;
+
+ purb->pipe = 0;
+ purb->pipe |= req->direction << 7;
+ purb->pipe |= port->dev->devnum << 8;
+ purb->pipe |= req->speed << 26;
+ purb->pipe |= req->pipe_type << 30;
+ purb->pipe |= req->endpoint << 15;
+
+ purb->number_of_packets = req->num_iso;
+
+ if ( purb->number_of_packets * sizeof(usbif_iso_t) > PAGE_SIZE )
+ goto urb_error;
+
+ /* Make sure there's always some kind of timeout. */
+ purb->timeout = ( req->timeout > 0 ) ? (req->timeout * HZ) / 1000
+ : 1000;
+
+ purb->setup_packet = setup;
+
+ if ( req->pipe_type == 0 ) /* ISO */
+ {
+ int j;
+ usbif_iso_t *iso_sched = (usbif_iso_t *)MMAP_VADDR(pending_idx, i - 1);
+
+ /* If we're dealing with an iso pipe, we need to copy in a schedule. */
+ for ( j = 0; j < purb->number_of_packets; j++ )
+ {
+ purb->iso_frame_desc[j].length = iso_sched[j].length;
+ purb->iso_frame_desc[j].offset = iso_sched[j].buffer_offset;
+ iso_sched[j].status = 0;
+ }
+ }
+
+ if ( check_iso_schedule(purb) != 0 )
+ goto urb_error;
+
+ if ( usb_submit_urb(purb) != 0 )
+ goto urb_error;
+
+ return;
+
+ urb_error:
+ dump_urb(purb);
+ usbif_put(up);
+ free_pending(pending_idx);
+
+ bad_descriptor:
+ kfree ( setup );
+ if ( purb != NULL )
+ usb_free_urb(purb);
+ make_response(up, req->id, req->operation, -EINVAL, 0, 0);
+ return;
+
+ no_mem:
+ if ( setup != NULL )
+ kfree(setup);
+ make_response(up, req->id, req->operation, -ENOMEM, 0, 0);
+ return;
+}
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(usbif_priv_t *up, unsigned long id,
+ unsigned short op, int st, int inband,
+ unsigned long length)
+{
+ usbif_response_t *resp;
+ unsigned long flags;
+ usbif_back_ring_t *usb_ring = &up->usb_ring;
+
+ /* Place on the response ring for the relevant domain. */
+ spin_lock_irqsave(&up->usb_ring_lock, flags);
+ resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
+ resp->id = id;
+ resp->operation = op;
+ resp->status = st;
+ resp->data = inband;
+ resp->length = length;
+ wmb(); /* Ensure other side can see the response fields. */
+
+ dump_response(resp);
+
+ usb_ring->rsp_prod_pvt++;
+ RING_PUSH_RESPONSES(usb_ring);
+ spin_unlock_irqrestore(&up->usb_ring_lock, flags);
+
+ /* Kick the relevant domain. */
+ notify_via_evtchn(up->evtchn);
+}
+
+/**
+ * usbif_claim_port - claim devices on a port on behalf of guest
+ *
+ * Once completed, this will ensure that any device attached to that
+ * port is claimed by this driver for use by the guest.
+ */
+int usbif_claim_port(usbif_be_claim_port_t *msg)
+{
+ owned_port_t *o_p;
+
+ /* Sanity... */
+ if ( usbif_find_port(msg->path) != NULL )
+ {
+ printk(KERN_WARNING "usbback: Attempted to claim USB port "
+ "we already own!\n");
+ return -EINVAL;
+ }
+
+ /* No need for a slab cache - this should be infrequent. */
+ o_p = kmalloc(sizeof(owned_port_t), GFP_KERNEL);
+
+ if ( o_p == NULL )
+ return -ENOMEM;
+
+ o_p->enabled = 0;
+ o_p->usbif_priv = usbif_find(msg->domid);
+ o_p->guest_port = msg->usbif_port;
+ o_p->dev_present = 0;
+ o_p->guest_address = 0; /* Default address. */
+
+ strcpy(o_p->path, msg->path);
+
+ spin_lock_irq(&owned_ports_lock);
+
+ list_add(&o_p->list, &owned_ports);
+
+ spin_unlock_irq(&owned_ports_lock);
+
+ printk(KERN_INFO "usbback: Claimed USB port (%s) for %d.%d\n", o_p->path,
+ msg->domid, msg->usbif_port);
+
+ /* Force a reprobe for unclaimed devices. */
+ usb_scan_devices();
+
+ return 0;
+}
+
+owned_port_t *find_port_for_request(usbif_priv_t *up, usbif_request_t *req)
+{
+ unsigned long flags;
+ struct list_head *port;
+
+ /* I'm assuming this is not called from IRQ context - correct? I think
+ * it's probably only called in response to control messages or plug events
+ * in the USB hub kernel thread, so should be OK. */
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each(port, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if(p->usbif_priv == up && p->guest_address == req->devnum && p->enabled )
+ {
+ dump_port(p);
+
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+ return p;
+ }
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return NULL;
+}
+
+owned_port_t *__usbif_find_port(char *path)
+{
+ struct list_head *port;
+
+ list_for_each(port, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if(!strcmp(path, p->path))
+ {
+ return p;
+ }
+ }
+
+ return NULL;
+}
+
+owned_port_t *usbif_find_port(char *path)
+{
+ owned_port_t *ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ ret = __usbif_find_port(path);
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+
+ return ret;
+}
+
+
+static void *probe(struct usb_device *dev, unsigned iface,
+ const struct usb_device_id *id)
+{
+ owned_port_t *p;
+
+ /* We don't care what the device is - if we own the port, we want it. We
+ * don't deal with device-specifics in this driver, so we don't care what
+ * the device actually is ;-) */
+ if ( ( p = usbif_find_port(dev->devpath) ) != NULL )
+ {
+ printk(KERN_INFO "usbback: claimed device attached to owned port\n");
+
+ p->dev_present = 1;
+ p->dev = dev;
+ set_bit(iface, &p->ifaces);
+
+ return p->usbif_priv;
+ }
+ else
+ printk(KERN_INFO "usbback: hotplug for non-owned port (%s), ignoring\n",
+ dev->devpath);
+
+
+ return NULL;
+}
+
+static void disconnect(struct usb_device *dev, void *usbif)
+{
+ /* Note the device is removed so we can tell the guest when it probes. */
+ owned_port_t *port = usbif_find_port(dev->devpath);
+ port->dev_present = 0;
+ port->dev = NULL;
+ port->ifaces = 0;
+}
+
+
+struct usb_driver driver =
+{
+ .owner = THIS_MODULE,
+ .name = "Xen USB Backend",
+ .probe = probe,
+ .disconnect = disconnect,
+ .id_table = NULL,
+};
+
+/* __usbif_release_port - internal mechanics for releasing a port */
+void __usbif_release_port(owned_port_t *p)
+{
+ int i;
+
+ for ( i = 0; p->ifaces != 0; i++)
+ if ( p->ifaces & 1 << i )
+ {
+ usb_driver_release_interface(&driver, usb_ifnum_to_if(p->dev, i));
+ clear_bit(i, &p->ifaces);
+ }
+ list_del(&p->list);
+
+ /* Reset the real device. We don't simulate disconnect / probe for other
+ * drivers in this kernel because we assume the device is completely under
+ * the control of ourselves (i.e. the guest!). This should ensure that the
+ * device is in a sane state for the next customer ;-) */
+
+ /* MAW NB: we're not resetting the real device here. This looks perfectly
+ * valid to me but it causes memory corruption. We seem to get away with not
+ * resetting for now, although it'd be nice to have this tracked down. */
+/* if ( p->dev != NULL) */
+/* usb_reset_device(p->dev); */
+
+ kfree(p);
+}
+
+
+/**
+ * usbif_release_port - stop claiming devices on a port on behalf of guest
+ */
+void usbif_release_port(usbif_be_release_port_t *msg)
+{
+ owned_port_t *p;
+
+ spin_lock_irq(&owned_ports_lock);
+ p = __usbif_find_port(msg->path);
+ __usbif_release_port(p);
+ spin_unlock_irq(&owned_ports_lock);
+}
+
+void usbif_release_ports(usbif_priv_t *up)
+{
+ struct list_head *port, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&owned_ports_lock, flags);
+ list_for_each_safe(port, tmp, &owned_ports)
+ {
+ owned_port_t *p = list_entry(port, owned_port_t, list);
+ if ( p->usbif_priv == up )
+ __usbif_release_port(p);
+ }
+ spin_unlock_irqrestore(&owned_ports_lock, flags);
+}
+
+static int __init usbif_init(void)
+{
+ int i;
+
+ if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
+ !(xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+ return 0;
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
+ BUG();
+
+ pending_cons = 0;
+ pending_prod = MAX_PENDING_REQS;
+ memset(pending_reqs, 0, sizeof(pending_reqs));
+ for ( i = 0; i < MAX_PENDING_REQS; i++ )
+ pending_ring[i] = i;
+
+ spin_lock_init(&pend_prod_lock);
+
+ spin_lock_init(&owned_ports_lock);
+ INIT_LIST_HEAD(&owned_ports);
+
+ spin_lock_init(&usbio_schedule_list_lock);
+ INIT_LIST_HEAD(&usbio_schedule_list);
+
+ if ( kernel_thread(usbio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
+ BUG();
+
+ usbif_interface_init();
+
+ usbif_ctrlif_init();
+
+ usb_register(&driver);
+
+ printk(KERN_INFO "Xen USB Backend Initialised");
+
+ return 0;
+}
+
+__initcall(usbif_init);
--- /dev/null
+/*
+ * Xen Virtual USB Frontend Driver
+ *
+ * This file contains the first version of the Xen virtual USB hub
+ * that I've managed not to delete by mistake (3rd time lucky!).
+ *
+ * Based on Linux's uhci.c, original copyright notices are displayed
+ * below. Portions also (c) 2004 Intel Research Cambridge
+ * and (c) 2004, 2005 Mark Williamson
+ *
+ * Contact <mark.williamson@cl.cam.ac.uk> or
+ * <xen-devel@lists.sourceforge.net> regarding this code.
+ *
+ * Still to be (maybe) implemented:
+ * - migration / backend restart support?
+ * - support for building / using as a module
+ */
+
+/*
+ * Universal Host Controller Interface driver for USB.
+ *
+ * Maintainer: Johannes Erdfelt <johannes@erdfelt.com>
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ * (C) Copyright 1999-2002 Johannes Erdfelt, johannes@erdfelt.com
+ * (C) Copyright 1999 Randy Dunlap
+ * (C) Copyright 1999 Georg Acher, acher@in.tum.de
+ * (C) Copyright 1999 Deti Fliegl, deti@fliegl.de
+ * (C) Copyright 1999 Thomas Sailer, sailer@ife.ee.ethz.ch
+ * (C) Copyright 1999 Roman Weissgaerber, weissg@vienna.at
+ * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
+ * support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
+ * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
+ *
+ * Intel documents this fairly well, and as far as I know there
+ * are no royalties or anything like that, but even so there are
+ * people who decided that they want to do the same thing in a
+ * completely different way.
+ *
+ * WARNING! The USB documentation is downright evil. Most of it
+ * is just crap, written by a committee. You're better off ignoring
+ * most of it, the important stuff is:
+ * - the low-level protocol (fairly simple but lots of small details)
+ * - working around the horridness of the rest
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#ifdef CONFIG_USB_DEBUG
+#define DEBUG
+#else
+#undef DEBUG
+#endif
+#include <linux/usb.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+
+#include "xhci.h"
+
+#include "../../../../../drivers/usb/hcd.h"
+
+#include <asm-xen/xen-public/io/usbif.h>
+#include <asm/ctrl_if.h>
+#include <asm/xen-public/io/domain_controller.h>
+
+/*
+ * Version Information
+ */
+#define DRIVER_VERSION "v1.0"
+#define DRIVER_AUTHOR "Linus 'Frodo Rabbit' Torvalds, Johannes Erdfelt, " \
+ "Randy Dunlap, Georg Acher, Deti Fliegl, " \
+ "Thomas Sailer, Roman Weissgaerber, Mark Williamson"
+#define DRIVER_DESC "Xen Virtual USB Host Controller Interface"
+
+/*
+ * debug = 0, no debugging messages
+ * debug = 1, dump failed URB's except for stalls
+ * debug = 2, dump all failed URB's (including stalls)
+ */
+#ifdef DEBUG
+static int debug = 1;
+#else
+static int debug = 0;
+#endif
+MODULE_PARM(debug, "i");
+MODULE_PARM_DESC(debug, "Debug level");
+static char *errbuf;
+#define ERRBUF_LEN (PAGE_SIZE * 8)
+
+static int rh_submit_urb(struct urb *urb);
+static int rh_unlink_urb(struct urb *urb);
+static int xhci_unlink_urb(struct urb *urb);
+static void xhci_call_completion(struct urb *urb);
+static void xhci_drain_ring(void);
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb);
+static void xhci_finish_completion(void);
+
+#define MAX_URB_LOOP 2048 /* Maximum number of linked URB's */
+
+static kmem_cache_t *xhci_up_cachep; /* urb_priv cache */
+static struct xhci *xhci; /* XHCI structure for the interface */
+
+/******************************************************************************
+ * DEBUGGING
+ */
+
+#ifdef DEBUG
+
+static void dump_urb(struct urb *urb)
+{
+ printk(KERN_DEBUG "dumping urb @ %p\n"
+ " hcpriv = %p\n"
+ " next = %p\n"
+ " dev = %p\n"
+ " pipe = 0x%lx\n"
+ " status = %d\n"
+ " transfer_flags = 0x%lx\n"
+ " transfer_buffer = %p\n"
+ " transfer_buffer_length = %d\n"
+ " actual_length = %d\n"
+ " bandwidth = %d\n"
+ " setup_packet = %p\n",
+ urb, urb->hcpriv, urb->next, urb->dev, urb->pipe, urb->status,
+ urb->transfer_flags, urb->transfer_buffer,
+ urb->transfer_buffer_length, urb->actual_length, urb->bandwidth,
+ urb->setup_packet);
+ if ( urb->setup_packet != NULL )
+ printk(KERN_DEBUG
+ "setup = { 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x }\n",
+ urb->setup_packet[0], urb->setup_packet[1],
+ urb->setup_packet[2], urb->setup_packet[3],
+ urb->setup_packet[4], urb->setup_packet[5],
+ urb->setup_packet[6], urb->setup_packet[7]);
+ printk(KERN_DEBUG "complete = %p\n"
+ "interval = %d\n", urb->complete, urb->interval);
+
+}
+
+static void xhci_show_resp(usbif_response_t *r)
+{
+ printk(KERN_DEBUG "dumping response @ %p\n"
+ " id=0x%lx\n"
+ " op=0x%x\n"
+ " data=0x%x\n"
+ " status=0x%x\n"
+ " length=0x%lx\n",
+ r->id, r->operation, r->data, r->status, r->length);
+}
+
+#define DPRINK(...) printk(KERN_DEBUG __VA_ARGS__)
+
+#else /* DEBUG */
+
+#define dump_urb(blah) ((void)0)
+#define xhci_show_resp(blah) ((void)0)
+#define DPRINTK(blah,...) ((void)0)
+
+#endif /* DEBUG */
+
+/******************************************************************************
+ * RING REQUEST HANDLING
+ */
+
+/**
+ * xhci_construct_isoc - add isochronous information to a request
+ */
+static int xhci_construct_isoc(usbif_request_t *req, struct urb *urb)
+{
+ usbif_iso_t *schedule;
+ int i;
+ struct urb_priv *urb_priv = urb->hcpriv;
+
+ req->num_iso = urb->number_of_packets;
+ schedule = (usbif_iso_t *)__get_free_page(GFP_KERNEL);
+
+ if ( schedule == NULL )
+ return -ENOMEM;
+
+ for ( i = 0; i < req->num_iso; i++ )
+ {
+ schedule[i].buffer_offset = urb->iso_frame_desc[i].offset;
+ schedule[i].length = urb->iso_frame_desc[i].length;
+ }
+
+ urb_priv->schedule = schedule;
+ req->iso_schedule = virt_to_machine(schedule);
+
+ return 0;
+}
+
+/**
+ * xhci_queue_req - construct and queue request for an URB
+ */
+static int xhci_queue_req(struct urb *urb)
+{
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+ printk(KERN_DEBUG
+ "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons = %d\n",
+ usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+ usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+
+
+ if ( RING_FULL(usb_ring) )
+ {
+ printk(KERN_WARNING
+ "xhci_queue_req(): USB ring full, not queuing request\n");
+ return -ENOBUFS;
+ }
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ req->operation = USBIF_OP_IO;
+ req->port = 0; /* We don't care what the port is. */
+ req->id = (unsigned long) urb->hcpriv;
+ req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+ req->devnum = usb_pipedevice(urb->pipe);
+ req->direction = usb_pipein(urb->pipe);
+ req->speed = usb_pipeslow(urb->pipe);
+ req->pipe_type = usb_pipetype(urb->pipe);
+ req->length = urb->transfer_buffer_length;
+ req->transfer_flags = urb->transfer_flags;
+ req->endpoint = usb_pipeendpoint(urb->pipe);
+ req->speed = usb_pipeslow(urb->pipe);
+ req->timeout = urb->timeout * (1000 / HZ);
+
+ if ( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+ {
+ int ret = xhci_construct_isoc(req, urb);
+ if ( ret != 0 )
+ return ret;
+ }
+
+ if(urb->setup_packet != NULL)
+ memcpy(req->setup, urb->setup_packet, 8);
+ else
+ memset(req->setup, 0, 8);
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ DPRINTK("Queued request for an URB.\n");
+ dump_urb(urb);
+
+ return -EINPROGRESS;
+}
+
+/**
+ * xhci_queue_probe - queue a probe request for a particular port
+ */
+static inline usbif_request_t *xhci_queue_probe(usbif_vdev_t port)
+{
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+#if DEBUG
+ printk(KERN_DEBUG
+ "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
+ "resp_cons = %d\n", usbif->req_prod,
+ virt_to_machine(&usbif->req_prod),
+ usbif->resp_prod, xhci->usb_resp_cons);
+#endif
+
+ if ( RING_FULL(usb_ring) )
+ {
+ printk(KERN_WARNING
+ "xhci_queue_probe(): ring full, not queuing request\n");
+ return NULL;
+ }
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ memset(req, sizeof(*req), 0);
+
+ req->operation = USBIF_OP_PROBE;
+ req->port = port;
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ return req;
+}
+
+/**
+ * xhci_port_reset - queue a reset request for a particular port
+ */
+static int xhci_port_reset(usbif_vdev_t port)
+{
+ usbif_request_t *req;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+
+ /* We only reset one port at a time, so we only need one variable per
+ * hub. */
+ xhci->awaiting_reset = 1;
+
+ /* Stick something in the shared communications ring. */
+ req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
+
+ memset(req, sizeof(*req), 0);
+
+ req->operation = USBIF_OP_RESET;
+ req->port = port;
+
+ usb_ring->req_prod_pvt++;
+ RING_PUSH_REQUESTS(usb_ring);
+
+ notify_via_evtchn(xhci->evtchn);
+
+ while ( xhci->awaiting_reset > 0 )
+ {
+ mdelay(1);
+ xhci_drain_ring();
+ }
+
+ xhci->rh.ports[port].pe = 1;
+ xhci->rh.ports[port].pe_chg = 1;
+
+ return xhci->awaiting_reset;
+}
+
+
+/******************************************************************************
+ * RING RESPONSE HANDLING
+ */
+
+static void receive_usb_reset(usbif_response_t *resp)
+{
+ xhci->awaiting_reset = resp->status;
+ rmb();
+
+}
+
+static void receive_usb_probe(usbif_response_t *resp)
+{
+ spin_lock(&xhci->rh.port_state_lock);
+
+ if ( resp->status > 0 )
+ {
+ if ( resp->status == 1 )
+ {
+ /* If theres a device there and there wasn't one before there must
+ * have been a connection status change. */
+ if( xhci->rh.ports[resp->data].cs == 0 )
+ {
+ xhci->rh.ports[resp->data].cs = 1;
+ xhci->rh.ports[resp->data].ccs = 1;
+ xhci->rh.ports[resp->data].cs_chg = 1;
+ }
+ }
+ else
+ printk(KERN_WARNING "receive_usb_probe(): unexpected status %d "
+ "for port %d\n", resp->status, resp->data);
+ }
+ else if ( resp->status < 0)
+ printk(KERN_WARNING "receive_usb_probe(): got error status %d\n",
+ resp->status);
+
+ spin_unlock(&xhci->rh.port_state_lock);
+}
+
+static void receive_usb_io(usbif_response_t *resp)
+{
+ struct urb_priv *urbp = (struct urb_priv *)resp->id;
+ struct urb *urb = urbp->urb;
+
+ urb->actual_length = resp->length;
+ urbp->in_progress = 0;
+
+ if( usb_pipetype(urb->pipe) == 0 ) /* ISO */
+ {
+ int i;
+
+ /* Copy ISO schedule results back in. */
+ for ( i = 0; i < urb->number_of_packets; i++ )
+ {
+ urb->iso_frame_desc[i].status
+ = urbp->schedule[i].status;
+ urb->iso_frame_desc[i].actual_length
+ = urbp->schedule[i].length;
+ }
+ free_page((unsigned long)urbp->schedule);
+ }
+
+ /* Only set status if it's not been changed since submission. It might
+ * have been changed if the URB has been unlinked asynchronously, for
+ * instance. */
+ if ( urb->status == -EINPROGRESS )
+ urbp->status = urb->status = resp->status;
+}
+
+/**
+ * xhci_drain_ring - drain responses from the ring, calling handlers
+ *
+ * This may be called from interrupt context when an event is received from the
+ * backend domain, or sometimes in process context whilst waiting for a port
+ * reset or URB completion.
+ */
+static void xhci_drain_ring(void)
+{
+ struct list_head *tmp, *head;
+ usbif_front_ring_t *usb_ring = &xhci->usb_ring;
+ usbif_response_t *resp;
+ RING_IDX i, rp;
+
+ /* Walk the ring here to get responses, updating URBs to show what
+ * completed. */
+
+ rp = usb_ring->sring->rsp_prod;
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+ /* Take items off the comms ring, taking care not to overflow. */
+ for ( i = usb_ring->rsp_cons; i != rp; i++ )
+ {
+ resp = RING_GET_RESPONSE(usb_ring, i);
+
+ /* May need to deal with batching and with putting a ceiling on
+ the number dispatched for performance and anti-dos reasons */
+
+ xhci_show_resp(resp);
+
+ switch ( resp->operation )
+ {
+ case USBIF_OP_PROBE:
+ receive_usb_probe(resp);
+ break;
+
+ case USBIF_OP_IO:
+ receive_usb_io(resp);
+ break;
+
+ case USBIF_OP_RESET:
+ receive_usb_reset(resp);
+ break;
+
+ default:
+ printk(KERN_WARNING
+ "error: unknown USB io operation response [%d]\n",
+ resp->operation);
+ break;
+ }
+ }
+
+ usb_ring->rsp_cons = i;
+
+ /* Walk the list of pending URB's to see which ones completed and do
+ * callbacks, etc. */
+ spin_lock(&xhci->urb_list_lock);
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *urb = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ /* Checks the status and does all of the magic necessary */
+ xhci_transfer_result(xhci, urb);
+ }
+ spin_unlock(&xhci->urb_list_lock);
+
+ xhci_finish_completion();
+}
+
+
+static void xhci_interrupt(int irq, void *__xhci, struct pt_regs *regs)
+{
+ xhci_drain_ring();
+}
+
+/******************************************************************************
+ * HOST CONTROLLER FUNCTIONALITY
+ */
+
+/**
+ * no-op implementation of private device alloc / free routines
+ */
+static int xhci_do_nothing_dev(struct usb_device *dev)
+{
+ return 0;
+}
+
+static inline void xhci_add_complete(struct urb *urb)
+{
+ struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ list_add_tail(&urbp->complete_list, &xhci->complete_list);
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+/* When this returns, the owner of the URB may free its
+ * storage.
+ *
+ * We spin and wait for the URB to complete before returning.
+ *
+ * Call with urb->lock acquired.
+ */
+static void xhci_delete_urb(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = urb->hcpriv;
+
+ /* If there's no urb_priv structure for this URB then it can't have
+ * been submitted at all. */
+ if ( urbp == NULL )
+ return;
+
+ /* For now we just spin until the URB completes. It shouldn't take too
+ * long and we don't expect to have to do this very often. */
+ while ( urb->status == -EINPROGRESS )
+ {
+ xhci_drain_ring();
+ mdelay(1);
+ }
+
+ /* Now we know that further transfers to the buffer won't
+ * occur, so we can safely return. */
+}
+
+static struct urb_priv *xhci_alloc_urb_priv(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = kmem_cache_alloc(xhci_up_cachep, SLAB_ATOMIC);
+ if (!urbp) {
+ err("xhci_alloc_urb_priv: couldn't allocate memory for urb_priv\n");
+ return NULL;
+ }
+
+ memset((void *)urbp, 0, sizeof(*urbp));
+
+ urbp->inserttime = jiffies;
+ urbp->urb = urb;
+ urbp->dev = urb->dev;
+
+ INIT_LIST_HEAD(&urbp->complete_list);
+
+ urb->hcpriv = urbp;
+
+ return urbp;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+/* When is this called? Do we need to stop the transfer (as we
+ * currently do)? */
+static void xhci_destroy_urb_priv(struct urb *urb)
+{
+ struct urb_priv *urbp;
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+ if (!urbp)
+ return;
+
+ if (!list_empty(&urb->urb_list))
+ warn("xhci_destroy_urb_priv: urb %p still on xhci->urb_list", urb);
+
+ if (!list_empty(&urbp->complete_list))
+ warn("xhci_destroy_urb_priv: urb %p still on xhci->complete_list", urb);
+
+ kmem_cache_free(xhci_up_cachep, urb->hcpriv);
+
+ urb->hcpriv = NULL;
+}
+
+/**
+ * Try to find URBs in progress on the same pipe to the same device.
+ *
+ * MUST be called with xhci->urb_list_lock acquired
+ */
+static struct urb *xhci_find_urb_ep(struct xhci *xhci, struct urb *urb)
+{
+ struct list_head *tmp, *head;
+
+ /* We don't match Isoc transfers since they are special */
+ if (usb_pipeisoc(urb->pipe))
+ return NULL;
+
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ if (u->dev == urb->dev && u->pipe == urb->pipe &&
+ u->status == -EINPROGRESS)
+ return u;
+ }
+
+ return NULL;
+}
+
+static int xhci_submit_urb(struct urb *urb)
+{
+ int ret = -EINVAL;
+ unsigned long flags;
+ struct urb *eurb;
+ int bustime;
+
+ DPRINTK("URB submitted to XHCI driver.\n");
+ dump_urb(urb);
+
+ if (!urb)
+ return -EINVAL;
+
+ if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv) {
+ warn("xhci_submit_urb: urb %p belongs to disconnected device or bus?", urb);
+ return -ENODEV;
+ }
+
+ if ( urb->dev->devpath == NULL )
+ BUG();
+
+ usb_inc_dev_use(urb->dev);
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ spin_lock(&urb->lock);
+
+ if (urb->status == -EINPROGRESS || urb->status == -ECONNRESET ||
+ urb->status == -ECONNABORTED) {
+ dbg("xhci_submit_urb: urb not available to submit (status = %d)", urb->status);
+ /* Since we can have problems on the out path */
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ usb_dec_dev_use(urb->dev);
+
+ return ret;
+ }
+
+ INIT_LIST_HEAD(&urb->urb_list);
+ if (!xhci_alloc_urb_priv(urb)) {
+ ret = -ENOMEM;
+
+ goto out;
+ }
+
+ ( (struct urb_priv *)urb->hcpriv )->in_progress = 1;
+
+ eurb = xhci_find_urb_ep(xhci, urb);
+ if (eurb && !(urb->transfer_flags & USB_QUEUE_BULK)) {
+ ret = -ENXIO;
+
+ goto out;
+ }
+
+ /* Short circuit the virtual root hub */
+ if (urb->dev == xhci->rh.dev) {
+ ret = rh_submit_urb(urb);
+
+ goto out;
+ }
+
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_CONTROL:
+ case PIPE_BULK:
+ ret = xhci_queue_req(urb);
+ break;
+
+ case PIPE_INTERRUPT:
+ if (urb->bandwidth == 0) { /* not yet checked/allocated */
+ bustime = usb_check_bandwidth(urb->dev, urb);
+ if (bustime < 0)
+ ret = bustime;
+ else {
+ ret = xhci_queue_req(urb);
+ if (ret == -EINPROGRESS)
+ usb_claim_bandwidth(urb->dev, urb,
+ bustime, 0);
+ }
+ } else /* bandwidth is already set */
+ ret = xhci_queue_req(urb);
+ break;
+
+ case PIPE_ISOCHRONOUS:
+ if (urb->bandwidth == 0) { /* not yet checked/allocated */
+ if (urb->number_of_packets <= 0) {
+ ret = -EINVAL;
+ break;
+ }
+ bustime = usb_check_bandwidth(urb->dev, urb);
+ if (bustime < 0) {
+ ret = bustime;
+ break;
+ }
+
+ ret = xhci_queue_req(urb);
+ if (ret == -EINPROGRESS)
+ usb_claim_bandwidth(urb->dev, urb, bustime, 1);
+ } else /* bandwidth is already set */
+ ret = xhci_queue_req(urb);
+ break;
+ }
+out:
+ urb->status = ret;
+
+ if (ret == -EINPROGRESS) {
+ /* We use _tail to make find_urb_ep more efficient */
+ list_add_tail(&urb->urb_list, &xhci->urb_list);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ return 0;
+ }
+
+ xhci_delete_urb(urb);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ /* Only call completion if it was successful */
+ if (!ret)
+ xhci_call_completion(urb);
+
+ return ret;
+}
+
+/*
+ * Return the result of a transfer
+ *
+ * MUST be called with urb_list_lock acquired
+ */
+static void xhci_transfer_result(struct xhci *xhci, struct urb *urb)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct urb_priv *urbp;
+
+ /* The root hub is special */
+ if (urb->dev == xhci->rh.dev)
+ return;
+
+ spin_lock_irqsave(&urb->lock, flags);
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+
+ if ( ( (struct urb_priv *)urb->hcpriv )->in_progress )
+ ret = -EINPROGRESS;
+
+ if (urb->actual_length < urb->transfer_buffer_length) {
+ if (urb->transfer_flags & USB_DISABLE_SPD) {
+ ret = -EREMOTEIO;
+ }
+ }
+
+ if (urb->status == -EPIPE)
+ {
+ ret = urb->status;
+ /* endpoint has stalled - mark it halted */
+ usb_endpoint_halt(urb->dev, usb_pipeendpoint(urb->pipe),
+ usb_pipeout(urb->pipe));
+ }
+
+ if ((debug == 1 && ret != 0 && ret != -EPIPE) ||
+ (ret != 0 && debug > 1)) {
+ /* Some debugging code */
+ dbg("xhci_result_interrupt/bulk() failed with status %x",
+ status);
+ }
+
+ if (ret == -EINPROGRESS)
+ goto out;
+
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_CONTROL:
+ case PIPE_BULK:
+ case PIPE_ISOCHRONOUS:
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth)
+ usb_release_bandwidth(urb->dev, urb, 1);
+ xhci_delete_urb(urb);
+ break;
+ case PIPE_INTERRUPT:
+ /* Interrupts are an exception */
+ if (urb->interval)
+ goto out_complete;
+
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth)
+ usb_release_bandwidth(urb->dev, urb, 0);
+ xhci_delete_urb(urb);
+ break;
+ default:
+ info("xhci_transfer_result: unknown pipe type %d for urb %p\n",
+ usb_pipetype(urb->pipe), urb);
+ }
+
+ /* Remove it from xhci->urb_list */
+ list_del_init(&urb->urb_list);
+
+out_complete:
+ xhci_add_complete(urb);
+
+out:
+ spin_unlock_irqrestore(&urb->lock, flags);
+}
+
+static int xhci_unlink_urb(struct urb *urb)
+{
+ unsigned long flags;
+ struct urb_priv *urbp = urb->hcpriv;
+
+ if (!urb)
+ return -EINVAL;
+
+ if (!urb->dev || !urb->dev->bus || !urb->dev->bus->hcpriv)
+ return -ENODEV;
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ spin_lock(&urb->lock);
+
+ /* Release bandwidth for Interrupt or Isoc. transfers */
+ /* Spinlock needed ? */
+ if (urb->bandwidth) {
+ switch (usb_pipetype(urb->pipe)) {
+ case PIPE_INTERRUPT:
+ usb_release_bandwidth(urb->dev, urb, 0);
+ break;
+ case PIPE_ISOCHRONOUS:
+ usb_release_bandwidth(urb->dev, urb, 1);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (urb->status != -EINPROGRESS) {
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ return 0;
+ }
+
+ list_del_init(&urb->urb_list);
+
+ /* Short circuit the virtual root hub */
+ if (urb->dev == xhci->rh.dev) {
+ rh_unlink_urb(urb);
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ xhci_call_completion(urb);
+ } else {
+ if (urb->transfer_flags & USB_ASYNC_UNLINK) {
+ /* We currently don't currently attempt to cancel URBs
+ * that have been queued in the ring. We handle async
+ * unlinked URBs when they complete. */
+ urbp->status = urb->status = -ECONNABORTED;
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+ } else {
+ urb->status = -ENOENT;
+
+ spin_unlock(&urb->lock);
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ if (in_interrupt()) { /* wait at least 1 frame */
+ static int errorcount = 10;
+
+ if (errorcount--)
+ dbg("xhci_unlink_urb called from interrupt for urb %p", urb);
+ udelay(1000);
+ } else
+ schedule_timeout(1+1*HZ/1000);
+
+ xhci_delete_urb(urb);
+
+ xhci_call_completion(urb);
+ }
+ }
+
+ return 0;
+}
+
+static void xhci_call_completion(struct urb *urb)
+{
+ struct urb_priv *urbp;
+ struct usb_device *dev = urb->dev;
+ int is_ring = 0, killed, resubmit_interrupt, status;
+ struct urb *nurb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&urb->lock, flags);
+
+ urbp = (struct urb_priv *)urb->hcpriv;
+ if (!urbp || !urb->dev) {
+ spin_unlock_irqrestore(&urb->lock, flags);
+ return;
+ }
+
+ killed = (urb->status == -ENOENT || urb->status == -ECONNABORTED ||
+ urb->status == -ECONNRESET);
+ resubmit_interrupt = (usb_pipetype(urb->pipe) == PIPE_INTERRUPT &&
+ urb->interval);
+
+ nurb = urb->next;
+ if (nurb && !killed) {
+ int count = 0;
+
+ while (nurb && nurb != urb && count < MAX_URB_LOOP) {
+ if (nurb->status == -ENOENT ||
+ nurb->status == -ECONNABORTED ||
+ nurb->status == -ECONNRESET) {
+ killed = 1;
+ break;
+ }
+
+ nurb = nurb->next;
+ count++;
+ }
+
+ if (count == MAX_URB_LOOP)
+ err("xhci_call_completion: too many linked URB's, loop? (first loop)");
+
+ /* Check to see if chain is a ring */
+ is_ring = (nurb == urb);
+ }
+
+ status = urbp->status;
+ if (!resubmit_interrupt || killed)
+ /* We don't need urb_priv anymore */
+ xhci_destroy_urb_priv(urb);
+
+ if (!killed)
+ urb->status = status;
+
+ spin_unlock_irqrestore(&urb->lock, flags);
+
+ if (urb->complete)
+ urb->complete(urb);
+
+ if (resubmit_interrupt)
+ /* Recheck the status. The completion handler may have */
+ /* unlinked the resubmitting interrupt URB */
+ killed = (urb->status == -ENOENT ||
+ urb->status == -ECONNABORTED ||
+ urb->status == -ECONNRESET);
+
+ if (resubmit_interrupt && !killed) {
+ if ( urb->dev != xhci->rh.dev )
+ xhci_queue_req(urb); /* XXX What if this fails? */
+ /* Don't need to resubmit URBs for the virtual root dev. */
+ } else {
+ if (is_ring && !killed) {
+ urb->dev = dev;
+ xhci_submit_urb(urb);
+ } else {
+ /* We decrement the usage count after we're done */
+ /* with everything */
+ usb_dec_dev_use(dev);
+ }
+ }
+}
+
+static void xhci_finish_completion(void)
+{
+ struct list_head *tmp, *head;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ head = &xhci->complete_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb_priv *urbp = list_entry(tmp, struct urb_priv,
+ complete_list);
+ struct urb *urb = urbp->urb;
+
+ list_del_init(&urbp->complete_list);
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+
+ xhci_call_completion(urb);
+
+ spin_lock_irqsave(&xhci->complete_list_lock, flags);
+ head = &xhci->complete_list;
+ tmp = head->next;
+ }
+ spin_unlock_irqrestore(&xhci->complete_list_lock, flags);
+}
+
+static struct usb_operations xhci_device_operations = {
+ .allocate = xhci_do_nothing_dev,
+ .deallocate = xhci_do_nothing_dev,
+ /* It doesn't look like any drivers actually care what the frame number
+ * is at the moment! If necessary, we could approximate the current
+ * frame nubmer by passing it from the backend in response messages. */
+ .get_frame_number = NULL,
+ .submit_urb = xhci_submit_urb,
+ .unlink_urb = xhci_unlink_urb
+};
+
+/******************************************************************************
+ * VIRTUAL ROOT HUB EMULATION
+ */
+
+static __u8 root_hub_dev_des[] =
+{
+ 0x12, /* __u8 bLength; */
+ 0x01, /* __u8 bDescriptorType; Device */
+ 0x00, /* __u16 bcdUSB; v1.0 */
+ 0x01,
+ 0x09, /* __u8 bDeviceClass; HUB_CLASSCODE */
+ 0x00, /* __u8 bDeviceSubClass; */
+ 0x00, /* __u8 bDeviceProtocol; */
+ 0x08, /* __u8 bMaxPacketSize0; 8 Bytes */
+ 0x00, /* __u16 idVendor; */
+ 0x00,
+ 0x00, /* __u16 idProduct; */
+ 0x00,
+ 0x00, /* __u16 bcdDevice; */
+ 0x00,
+ 0x00, /* __u8 iManufacturer; */
+ 0x02, /* __u8 iProduct; */
+ 0x01, /* __u8 iSerialNumber; */
+ 0x01 /* __u8 bNumConfigurations; */
+};
+
+
+/* Configuration descriptor */
+static __u8 root_hub_config_des[] =
+{
+ 0x09, /* __u8 bLength; */
+ 0x02, /* __u8 bDescriptorType; Configuration */
+ 0x19, /* __u16 wTotalLength; */
+ 0x00,
+ 0x01, /* __u8 bNumInterfaces; */
+ 0x01, /* __u8 bConfigurationValue; */
+ 0x00, /* __u8 iConfiguration; */
+ 0x40, /* __u8 bmAttributes;
+ Bit 7: Bus-powered, 6: Self-powered,
+ Bit 5 Remote-wakeup, 4..0: resvd */
+ 0x00, /* __u8 MaxPower; */
+
+ /* interface */
+ 0x09, /* __u8 if_bLength; */
+ 0x04, /* __u8 if_bDescriptorType; Interface */
+ 0x00, /* __u8 if_bInterfaceNumber; */
+ 0x00, /* __u8 if_bAlternateSetting; */
+ 0x01, /* __u8 if_bNumEndpoints; */
+ 0x09, /* __u8 if_bInterfaceClass; HUB_CLASSCODE */
+ 0x00, /* __u8 if_bInterfaceSubClass; */
+ 0x00, /* __u8 if_bInterfaceProtocol; */
+ 0x00, /* __u8 if_iInterface; */
+
+ /* endpoint */
+ 0x07, /* __u8 ep_bLength; */
+ 0x05, /* __u8 ep_bDescriptorType; Endpoint */
+ 0x81, /* __u8 ep_bEndpointAddress; IN Endpoint 1 */
+ 0x03, /* __u8 ep_bmAttributes; Interrupt */
+ 0x08, /* __u16 ep_wMaxPacketSize; 8 Bytes */
+ 0x00,
+ 0xff /* __u8 ep_bInterval; 255 ms */
+};
+
+static __u8 root_hub_hub_des[] =
+{
+ 0x09, /* __u8 bLength; */
+ 0x29, /* __u8 bDescriptorType; Hub-descriptor */
+ 0x02, /* __u8 bNbrPorts; */
+ 0x00, /* __u16 wHubCharacteristics; */
+ 0x00,
+ 0x01, /* __u8 bPwrOn2pwrGood; 2ms */
+ 0x00, /* __u8 bHubContrCurrent; 0 mA */
+ 0x00, /* __u8 DeviceRemovable; *** 7 Ports max *** */
+ 0xff /* __u8 PortPwrCtrlMask; *** 7 ports max *** */
+};
+
+/* prepare Interrupt pipe transaction data; HUB INTERRUPT ENDPOINT */
+static int rh_send_irq(struct urb *urb)
+{
+ struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+ xhci_port_t *ports = xhci->rh.ports;
+ unsigned long flags;
+ int i, len = 1;
+ __u16 data = 0;
+
+ spin_lock_irqsave(&urb->lock, flags);
+ for (i = 0; i < xhci->rh.numports; i++) {
+ /* Set a bit if anything at all has changed on the port, as per
+ * USB spec 11.12 */
+ data |= (ports[i].cs_chg || ports[i].pe_chg )
+ ? (1 << (i + 1))
+ : 0;
+
+ len = (i + 1) / 8 + 1;
+ }
+
+ *(__u16 *) urb->transfer_buffer = cpu_to_le16(data);
+ urb->actual_length = len;
+ urbp->status = 0;
+
+ spin_unlock_irqrestore(&urb->lock, flags);
+
+ if ((data > 0) && (xhci->rh.send != 0)) {
+ dbg("root-hub INT complete: data: %x", data);
+ xhci_call_completion(urb);
+ }
+
+ return 0;
+}
+
+/* Virtual Root Hub INTs are polled by this timer every "interval" ms */
+static int rh_init_int_timer(struct urb *urb);
+
+static void rh_int_timer_do(unsigned long ptr)
+{
+ struct urb *urb = (struct urb *)ptr;
+ struct list_head list, *tmp, *head;
+ unsigned long flags;
+ int i;
+
+ for ( i = 0; i < xhci->rh.numports; i++)
+ xhci_queue_probe(i);
+
+ if (xhci->rh.send)
+ rh_send_irq(urb);
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irqsave(&xhci->urb_list_lock, flags);
+ head = &xhci->urb_list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+ struct urb_priv *up = (struct urb_priv *)u->hcpriv;
+
+ tmp = tmp->next;
+
+ spin_lock(&u->lock);
+
+ /* Check if the URB timed out */
+ if (u->timeout && time_after_eq(jiffies,
+ up->inserttime + u->timeout)) {
+ list_del(&u->urb_list);
+ list_add_tail(&u->urb_list, &list);
+ }
+
+ spin_unlock(&u->lock);
+ }
+ spin_unlock_irqrestore(&xhci->urb_list_lock, flags);
+
+ head = &list;
+ tmp = head->next;
+ while (tmp != head) {
+ struct urb *u = list_entry(tmp, struct urb, urb_list);
+
+ tmp = tmp->next;
+
+ u->transfer_flags |= USB_ASYNC_UNLINK | USB_TIMEOUT_KILLED;
+ xhci_unlink_urb(u);
+ }
+
+ rh_init_int_timer(urb);
+}
+
+/* Root Hub INTs are polled by this timer */
+static int rh_init_int_timer(struct urb *urb)
+{
+ xhci->rh.interval = urb->interval;
+ init_timer(&xhci->rh.rh_int_timer);
+ xhci->rh.rh_int_timer.function = rh_int_timer_do;
+ xhci->rh.rh_int_timer.data = (unsigned long)urb;
+ xhci->rh.rh_int_timer.expires = jiffies
+ + (HZ * (urb->interval < 30 ? 30 : urb->interval)) / 1000;
+ add_timer(&xhci->rh.rh_int_timer);
+
+ return 0;
+}
+
+#define OK(x) len = (x); break
+
+/* Root Hub Control Pipe */
+static int rh_submit_urb(struct urb *urb)
+{
+ unsigned int pipe = urb->pipe;
+ struct usb_ctrlrequest *cmd =
+ (struct usb_ctrlrequest *)urb->setup_packet;
+ void *data = urb->transfer_buffer;
+ int leni = urb->transfer_buffer_length;
+ int len = 0;
+ xhci_port_t *status;
+ int stat = 0;
+ int i;
+ int retstatus;
+ unsigned long flags;
+
+ __u16 cstatus;
+ __u16 bmRType_bReq;
+ __u16 wValue;
+ __u16 wIndex;
+ __u16 wLength;
+
+ if (usb_pipetype(pipe) == PIPE_INTERRUPT) {
+ xhci->rh.urb = urb;
+ xhci->rh.send = 1;
+ xhci->rh.interval = urb->interval;
+ rh_init_int_timer(urb);
+
+ return -EINPROGRESS;
+ }
+
+ bmRType_bReq = cmd->bRequestType | cmd->bRequest << 8;
+ wValue = le16_to_cpu(cmd->wValue);
+ wIndex = le16_to_cpu(cmd->wIndex);
+ wLength = le16_to_cpu(cmd->wLength);
+
+ for (i = 0; i < 8; i++)
+ xhci->rh.c_p_r[i] = 0;
+
+ status = &xhci->rh.ports[wIndex - 1];
+
+ spin_lock_irqsave(&xhci->rh.port_state_lock, flags);
+
+ switch (bmRType_bReq) {
+ /* Request Destination:
+ without flags: Device,
+ RH_INTERFACE: interface,
+ RH_ENDPOINT: endpoint,
+ RH_CLASS means HUB here,
+ RH_OTHER | RH_CLASS almost ever means HUB_PORT here
+ */
+
+ case RH_GET_STATUS:
+ *(__u16 *)data = cpu_to_le16(1);
+ OK(2);
+ case RH_GET_STATUS | RH_INTERFACE:
+ *(__u16 *)data = cpu_to_le16(0);
+ OK(2);
+ case RH_GET_STATUS | RH_ENDPOINT:
+ *(__u16 *)data = cpu_to_le16(0);
+ OK(2);
+ case RH_GET_STATUS | RH_CLASS:
+ *(__u32 *)data = cpu_to_le32(0);
+ OK(4); /* hub power */
+ case RH_GET_STATUS | RH_OTHER | RH_CLASS:
+ cstatus = (status->cs_chg) |
+ (status->pe_chg << 1) |
+ (xhci->rh.c_p_r[wIndex - 1] << 4);
+ retstatus = (status->ccs) |
+ (status->pe << 1) |
+ (status->susp << 2) |
+ (status->pr << 8) |
+ (1 << 8) | /* power on */
+ (status->lsda << 9);
+ *(__u16 *)data = cpu_to_le16(retstatus);
+ *(__u16 *)(data + 2) = cpu_to_le16(cstatus);
+ OK(4);
+ case RH_CLEAR_FEATURE | RH_ENDPOINT:
+ switch (wValue) {
+ case RH_ENDPOINT_STALL:
+ OK(0);
+ }
+ break;
+ case RH_CLEAR_FEATURE | RH_CLASS:
+ switch (wValue) {
+ case RH_C_HUB_OVER_CURRENT:
+ OK(0); /* hub power over current */
+ }
+ break;
+ case RH_CLEAR_FEATURE | RH_OTHER | RH_CLASS:
+ switch (wValue) {
+ case RH_PORT_ENABLE:
+ status->pe = 0;
+ OK(0);
+ case RH_PORT_SUSPEND:
+ status->susp = 0;
+ OK(0);
+ case RH_PORT_POWER:
+ OK(0); /* port power */
+ case RH_C_PORT_CONNECTION:
+ status->cs_chg = 0;
+ OK(0);
+ case RH_C_PORT_ENABLE:
+ status->pe_chg = 0;
+ OK(0);
+ case RH_C_PORT_SUSPEND:
+ /*** WR_RH_PORTSTAT(RH_PS_PSSC); */
+ OK(0);
+ case RH_C_PORT_OVER_CURRENT:
+ OK(0); /* port power over current */
+ case RH_C_PORT_RESET:
+ xhci->rh.c_p_r[wIndex - 1] = 0;
+ OK(0);
+ }
+ break;
+ case RH_SET_FEATURE | RH_OTHER | RH_CLASS:
+ switch (wValue) {
+ case RH_PORT_SUSPEND:
+ status->susp = 1;
+ OK(0);
+ case RH_PORT_RESET:
+ {
+ int ret;
+ xhci->rh.c_p_r[wIndex - 1] = 1;
+ status->pr = 0;
+ status->pe = 1;
+ ret = xhci_port_reset(wIndex - 1);
+ /* XXX MAW: should probably cancel queued transfers during reset... *\/ */
+ if ( ret == 0 ) { OK(0); }
+ else { return ret; }
+ }
+ break;
+ case RH_PORT_POWER:
+ OK(0); /* port power ** */
+ case RH_PORT_ENABLE:
+ status->pe = 1;
+ OK(0);
+ }
+ break;
+ case RH_SET_ADDRESS:
+ xhci->rh.devnum = wValue;
+ OK(0);
+ case RH_GET_DESCRIPTOR:
+ switch ((wValue & 0xff00) >> 8) {
+ case 0x01: /* device descriptor */
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int,
+ sizeof(root_hub_dev_des), wLength));
+ memcpy(data, root_hub_dev_des, len);
+ OK(len);
+ case 0x02: /* configuration descriptor */
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int,
+ sizeof(root_hub_config_des), wLength));
+ memcpy (data, root_hub_config_des, len);
+ OK(len);
+ case 0x03: /* string descriptors */
+ len = usb_root_hub_string (wValue & 0xff,
+ 0, "XHCI-alt",
+ data, wLength);
+ if (len > 0) {
+ OK(min_t(int, leni, len));
+ } else
+ stat = -EPIPE;
+ }
+ break;
+ case RH_GET_DESCRIPTOR | RH_CLASS:
+ root_hub_hub_des[2] = xhci->rh.numports;
+ len = min_t(unsigned int, leni,
+ min_t(unsigned int, sizeof(root_hub_hub_des), wLength));
+ memcpy(data, root_hub_hub_des, len);
+ OK(len);
+ case RH_GET_CONFIGURATION:
+ *(__u8 *)data = 0x01;
+ OK(1);
+ case RH_SET_CONFIGURATION:
+ OK(0);
+ case RH_GET_INTERFACE | RH_INTERFACE:
+ *(__u8 *)data = 0x00;
+ OK(1);
+ case RH_SET_INTERFACE | RH_INTERFACE:
+ OK(0);
+ default:
+ stat = -EPIPE;
+ }
+
+ spin_unlock_irqrestore(&xhci->rh.port_state_lock, flags);
+
+ urb->actual_length = len;
+
+ return stat;
+}
+
+/*
+ * MUST be called with urb->lock acquired
+ */
+static int rh_unlink_urb(struct urb *urb)
+{
+ if (xhci->rh.urb == urb) {
+ urb->status = -ENOENT;
+ xhci->rh.send = 0;
+ xhci->rh.urb = NULL;
+ del_timer(&xhci->rh.rh_int_timer);
+ }
+ return 0;
+}
+
+/******************************************************************************
+ * CONTROL PLANE FUNCTIONALITY
+ */
+
+/**
+ * alloc_xhci - initialise a new virtual root hub for a new USB device channel
+ */
+static int alloc_xhci(void)
+{
+ int retval;
+ struct usb_bus *bus;
+
+ retval = -EBUSY;
+
+ xhci = kmalloc(sizeof(*xhci), GFP_KERNEL);
+ if (!xhci) {
+ err("couldn't allocate xhci structure");
+ retval = -ENOMEM;
+ goto err_alloc_xhci;
+ }
+
+ xhci->state = USBIF_STATE_CLOSED;
+
+ spin_lock_init(&xhci->urb_list_lock);
+ INIT_LIST_HEAD(&xhci->urb_list);
+
+ spin_lock_init(&xhci->complete_list_lock);
+ INIT_LIST_HEAD(&xhci->complete_list);
+
+ spin_lock_init(&xhci->frame_list_lock);
+
+ bus = usb_alloc_bus(&xhci_device_operations);
+
+ if (!bus) {
+ err("unable to allocate bus");
+ goto err_alloc_bus;
+ }
+
+ xhci->bus = bus;
+ bus->bus_name = "XHCI";
+ bus->hcpriv = xhci;
+
+ usb_register_bus(xhci->bus);
+
+ /* Initialize the root hub */
+
+ xhci->rh.numports = 0;
+
+ xhci->bus->root_hub = xhci->rh.dev = usb_alloc_dev(NULL, xhci->bus);
+ if (!xhci->rh.dev) {
+ err("unable to allocate root hub");
+ goto err_alloc_root_hub;
+ }
+
+ xhci->state = 0;
+
+ return 0;
+
+/*
+ * error exits:
+ */
+err_alloc_root_hub:
+ usb_deregister_bus(xhci->bus);
+ usb_free_bus(xhci->bus);
+ xhci->bus = NULL;
+
+err_alloc_bus:
+ kfree(xhci);
+
+err_alloc_xhci:
+ return retval;
+}
+
+/**
+ * usbif_status_change - deal with an incoming USB_INTERFACE_STATUS_ message
+ */
+static void usbif_status_change(usbif_fe_interface_status_changed_t *status)
+{
+ ctrl_msg_t cmsg;
+ usbif_fe_interface_connect_t up;
+ long rc;
+ usbif_sring_t *sring;
+
+ switch ( status->status )
+ {
+ case USBIF_INTERFACE_STATUS_DESTROYED:
+ printk(KERN_WARNING "Unexpected usbif-DESTROYED message in state %d\n",
+ xhci->state);
+ break;
+
+ case USBIF_INTERFACE_STATUS_DISCONNECTED:
+ if ( xhci->state != USBIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected usbif-DISCONNECTED message"
+ " in state %d\n", xhci->state);
+ break;
+ /* Not bothering to do recovery here for now. Keep things
+ * simple. */
+ }
+
+ /* Move from CLOSED to DISCONNECTED state. */
+ sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&xhci->usb_ring, sring);
+ xhci->state = USBIF_STATE_DISCONNECTED;
+
+ /* Construct an interface-CONNECT message for the domain controller. */
+ cmsg.type = CMSG_USBIF_FE;
+ cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(usbif_fe_interface_connect_t);
+ up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+ memcpy(cmsg.msg, &up, sizeof(up));
+
+ /* Tell the controller to bring up the interface. */
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ break;
+
+ case USBIF_INTERFACE_STATUS_CONNECTED:
+ if ( xhci->state == USBIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected usbif-CONNECTED message"
+ " in state %d\n", xhci->state);
+ break;
+ }
+
+ xhci->evtchn = status->evtchn;
+ xhci->irq = bind_evtchn_to_irq(xhci->evtchn);
+ xhci->bandwidth = status->bandwidth;
+ xhci->rh.numports = status->num_ports;
+
+ xhci->rh.ports = kmalloc (sizeof(xhci_port_t) * xhci->rh.numports, GFP_KERNEL);
+ memset(xhci->rh.ports, 0, sizeof(xhci_port_t) * xhci->rh.numports);
+
+ usb_connect(xhci->rh.dev);
+
+ if (usb_new_device(xhci->rh.dev) != 0) {
+ err("unable to start root hub");
+ }
+
+ /* Allocate the appropriate USB bandwidth here... Need to
+ * somehow know what the total available is thought to be so we
+ * can calculate the reservation correctly. */
+ usb_claim_bandwidth(xhci->rh.dev, xhci->rh.urb,
+ 1000 - xhci->bandwidth, 0);
+
+ if ( (rc = request_irq(xhci->irq, xhci_interrupt,
+ SA_SAMPLE_RANDOM, "usbif", xhci)) )
+ printk(KERN_ALERT"usbfront request_irq failed (%ld)\n",rc);
+
+ DPRINTK(KERN_INFO __FILE__
+ ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d IRQ %d\n",
+ xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
+ xhci->evtchn, xhci->irq);
+
+ xhci->state = USBIF_STATE_CONNECTED;
+
+ break;
+
+ default:
+ printk(KERN_WARNING "Status change to unknown value %d\n",
+ status->status);
+ break;
+ }
+}
+
+/**
+ * usbif_ctrlif_rx - demux control messages by subtype
+ */
+static void usbif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED:
+ if ( msg->length != sizeof(usbif_fe_interface_status_changed_t) )
+ goto parse_error;
+ usbif_status_change((usbif_fe_interface_status_changed_t *)
+ &msg->msg[0]);
+ break;
+
+ /* New interface...? */
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+
+static int __init xhci_hcd_init(void)
+{
+ int retval = -ENOMEM, i;
+ usbif_fe_interface_status_changed_t st;
+ control_msg_t cmsg;
+
+ if ( (xen_start_info.flags & SIF_INITDOMAIN)
+ || (xen_start_info.flags & SIF_USB_BE_DOMAIN) )
+ return 0;
+
+ info(DRIVER_DESC " " DRIVER_VERSION);
+
+ if (debug) {
+ errbuf = kmalloc(ERRBUF_LEN, GFP_KERNEL);
+ if (!errbuf)
+ goto errbuf_failed;
+ }
+
+ xhci_up_cachep = kmem_cache_create("xhci_urb_priv",
+ sizeof(struct urb_priv), 0, 0, NULL, NULL);
+ if (!xhci_up_cachep)
+ goto up_failed;
+
+ /* Let the domain controller know we're here. For now we wait until
+ * connection, as for the block and net drivers. This is only strictly
+ * necessary if we're going to boot off a USB device. */
+ printk(KERN_INFO "Initialising Xen virtual USB hub\n");
+
+ (void)ctrl_if_register_receiver(CMSG_USBIF_FE, usbif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ alloc_xhci();
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_USBIF_FE;
+ cmsg.subtype = CMSG_USBIF_FE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(usbif_fe_driver_status_changed_t);
+ st.status = USBIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ for ( i=0; (xhci->state != USBIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ if (xhci->state != USBIF_STATE_CONNECTED)
+ printk(KERN_WARNING "Timeout connecting USB frontend driver!\n");
+
+ return 0;
+
+up_failed:
+ if (errbuf)
+ kfree(errbuf);
+
+errbuf_failed:
+ return retval;
+}
+
+module_init(xhci_hcd_init);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
--- /dev/null
+/******************************************************************************
+ * xhci.h
+ *
+ * Private definitions for the Xen Virtual USB Controller. Based on
+ * drivers/usb/host/uhci.h from Linux. Copyright for the imported content is
+ * retained by the original authors.
+ *
+ * Modifications are:
+ * Copyright (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2004, 2005 Mark Williamson
+ */
+
+#ifndef __LINUX_XHCI_H
+#define __LINUX_XHCI_H
+
+#include <linux/list.h>
+#include <linux/usb.h>
+#include <asm-xen/xen-public/io/usbif.h>
+#include <linux/spinlock.h>
+
+/* xhci_port_t - current known state of a virtual hub ports */
+typedef struct {
+ unsigned int cs :1; /* Connection status. do we really need this /and/ ccs? */
+ unsigned int cs_chg :1; /* Connection status change. */
+ unsigned int pe :1; /* Port enable. */
+ unsigned int pe_chg :1; /* Port enable change. */
+ unsigned int ccs :1; /* Current connect status. */
+ unsigned int susp :1; /* Suspended. */
+ unsigned int lsda :1; /* Low speed device attached. */
+ unsigned int pr :1; /* Port reset. */
+} xhci_port_t;
+
+/* struct virt_root_hub - state related to the virtual root hub */
+struct virt_root_hub {
+ struct usb_device *dev;
+ int devnum; /* Address of Root Hub endpoint */
+ struct urb *urb;
+ void *int_addr;
+ int send;
+ int interval;
+ int numports;
+ int c_p_r[8];
+ struct timer_list rh_int_timer;
+ spinlock_t port_state_lock;
+ xhci_port_t *ports;
+};
+
+/* struct xhci - contains the state associated with a single USB interface */
+struct xhci {
+
+#ifdef CONFIG_PROC_FS
+ /* procfs */
+ int num;
+ struct proc_dir_entry *proc_entry;
+#endif
+
+ int evtchn; /* Interdom channel to backend */
+ int irq; /* Bound to evtchn */
+ enum { USBIF_STATE_CONNECTED = 2,
+ USBIF_STATE_DISCONNECTED = 1,
+ USBIF_STATE_CLOSED = 0
+ } state; /* State of this USB interface */
+ unsigned long bandwidth;
+
+ struct usb_bus *bus;
+
+ /* Main list of URB's currently controlled by this HC */
+ spinlock_t urb_list_lock;
+ struct list_head urb_list; /* P: xhci->urb_list_lock */
+
+ /* List of URB's awaiting completion callback */
+ spinlock_t complete_list_lock;
+ struct list_head complete_list; /* P: xhci->complete_list_lock */
+
+ struct virt_root_hub rh; /* private data of the virtual root hub */
+
+ usbif_front_ring_t usb_ring;
+
+ int awaiting_reset;
+};
+
+/* per-URB private data structure for the host controller */
+struct urb_priv {
+ struct urb *urb;
+ usbif_iso_t *schedule;
+ struct usb_device *dev;
+
+ int in_progress : 1; /* QH was queued (not linked in) */
+ int short_control_packet : 1; /* If we get a short packet during */
+ /* a control transfer, retrigger */
+ /* the status phase */
+
+ int status; /* Final status */
+
+ unsigned long inserttime; /* In jiffies */
+
+ struct list_head complete_list; /* P: xhci->complete_list_lock */
+};
+
+/*
+ * Locking in xhci.c
+ *
+ * spinlocks are used extensively to protect the many lists and data
+ * structures we have. It's not that pretty, but it's necessary. We
+ * need to be done with all of the locks (except complete_list_lock) when
+ * we call urb->complete. I've tried to make it simple enough so I don't
+ * have to spend hours racking my brain trying to figure out if the
+ * locking is safe.
+ *
+ * Here's the safe locking order to prevent deadlocks:
+ *
+ * #1 xhci->urb_list_lock
+ * #2 urb->lock
+ * #3 xhci->urb_remove_list_lock
+ * #4 xhci->complete_list_lock
+ *
+ * If you're going to grab 2 or more locks at once, ALWAYS grab the lock
+ * at the lowest level FIRST and NEVER grab locks at the same level at the
+ * same time.
+ *
+ * So, if you need xhci->urb_list_lock, grab it before you grab urb->lock
+ */
+
+/* -------------------------------------------------------------------------
+ Virtual Root HUB
+ ------------------------------------------------------------------------- */
+/* destination of request */
+#define RH_DEVICE 0x00
+#define RH_INTERFACE 0x01
+#define RH_ENDPOINT 0x02
+#define RH_OTHER 0x03
+
+#define RH_CLASS 0x20
+#define RH_VENDOR 0x40
+
+/* Requests: bRequest << 8 | bmRequestType */
+#define RH_GET_STATUS 0x0080
+#define RH_CLEAR_FEATURE 0x0100
+#define RH_SET_FEATURE 0x0300
+#define RH_SET_ADDRESS 0x0500
+#define RH_GET_DESCRIPTOR 0x0680
+#define RH_SET_DESCRIPTOR 0x0700
+#define RH_GET_CONFIGURATION 0x0880
+#define RH_SET_CONFIGURATION 0x0900
+#define RH_GET_STATE 0x0280
+#define RH_GET_INTERFACE 0x0A80
+#define RH_SET_INTERFACE 0x0B00
+#define RH_SYNC_FRAME 0x0C80
+/* Our Vendor Specific Request */
+#define RH_SET_EP 0x2000
+
+/* Hub port features */
+#define RH_PORT_CONNECTION 0x00
+#define RH_PORT_ENABLE 0x01
+#define RH_PORT_SUSPEND 0x02
+#define RH_PORT_OVER_CURRENT 0x03
+#define RH_PORT_RESET 0x04
+#define RH_PORT_POWER 0x08
+#define RH_PORT_LOW_SPEED 0x09
+#define RH_C_PORT_CONNECTION 0x10
+#define RH_C_PORT_ENABLE 0x11
+#define RH_C_PORT_SUSPEND 0x12
+#define RH_C_PORT_OVER_CURRENT 0x13
+#define RH_C_PORT_RESET 0x14
+
+/* Hub features */
+#define RH_C_HUB_LOCAL_POWER 0x00
+#define RH_C_HUB_OVER_CURRENT 0x01
+#define RH_DEVICE_REMOTE_WAKEUP 0x00
+#define RH_ENDPOINT_STALL 0x01
+
+/* Our Vendor Specific feature */
+#define RH_REMOVE_EP 0x00
+
+#define RH_ACK 0x01
+#define RH_REQ_ERR -1
+#define RH_NACK 0x00
+
+#endif
+
--- /dev/null
+/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+ * which needs to alter them. */
+
+static inline void smpboot_clear_io_apic_irqs(void)
+{
+#if 1
+ printk("smpboot_clear_io_apic_irqs\n");
+#else
+ io_apic_irqs = 0;
+#endif
+}
+
+static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+{
+#if 1
+ printk("smpboot_setup_warm_reset_vector\n");
+#else
+ CMOS_WRITE(0xa, 0xf);
+ local_flush_tlb();
+ Dprintk("1.\n");
+ *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ Dprintk("2.\n");
+ *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ Dprintk("3.\n");
+#endif
+}
+
+static inline void smpboot_restore_warm_reset_vector(void)
+{
+ /*
+ * Install writable page 0 entry to set BIOS data area.
+ */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
+ CMOS_WRITE(0, 0xf);
+
+ *((volatile long *) phys_to_virt(0x467)) = 0;
+}
+
+static inline void smpboot_setup_io_apic(void)
+{
+#if 1
+ printk("smpboot_setup_io_apic\n");
+#else
+ /*
+ * Here we can be sure that there is an IO-APIC in the system. Let's
+ * go and set it up:
+ */
+ if (!skip_ioapic_setup && nr_ioapics)
+ setup_IO_APIC();
+#endif
+}
+
+
+#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1)
if (ret) ret = machine_to_phys(ret);
return ret;
}
-#define pgd_val_ma(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
static inline pte_t __pte(unsigned long x)
--- /dev/null
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
+
+asmlinkage int printk(const char * fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+ volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations. There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0)
+#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "jns 3f\n" \
+ "2:\t" \
+ "rep;nop\n\t" \
+ "cmpb $0,%0\n\t" \
+ "jle 2b\n\t" \
+ "jmp 1b\n" \
+ "3:\n\t"
+
+#define spin_lock_string_flags \
+ "\n1:\t" \
+ "lock ; decb %0\n\t" \
+ "jns 4f\n\t" \
+ "2:\t" \
+ "testl $0x200, %1\n\t" \
+ "jz 3f\n\t" \
+ "#sti\n\t" \
+ "3:\t" \
+ "rep;nop\n\t" \
+ "cmpb $0, %0\n\t" \
+ "jle 3b\n\t" \
+ "#cli\n\t" \
+ "jmp 1b\n" \
+ "4:\n\t"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+ "movb $1,%0" \
+ :"=m" (lock->slock) : : "memory"
+
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(lock->magic != SPINLOCK_MAGIC);
+ BUG_ON(!spin_is_locked(lock));
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#else
+
+#define spin_unlock_string \
+ "xchgb %b0, %1" \
+ :"=q" (oldval), "=m" (lock->slock) \
+ :"0" (oldval) : "memory"
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+ char oldval = 1;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(lock->magic != SPINLOCK_MAGIC);
+ BUG_ON(!spin_is_locked(lock));
+#endif
+ __asm__ __volatile__(
+ spin_unlock_string
+ );
+}
+
+#endif
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+ char oldval;
+ __asm__ __volatile__(
+ "xchgb %b0,%1"
+ :"=q" (oldval), "=m" (lock->slock)
+ :"0" (0) : "memory");
+ return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+ printk("eip: %p\n", __builtin_return_address(0));
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->slock) : : "memory");
+}
+
+static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
+ printk("eip: %p\n", __builtin_return_address(0));
+ BUG();
+ }
+#endif
+ __asm__ __volatile__(
+ spin_lock_string_flags
+ :"=m" (lock->slock) : "r" (flags) : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC 0xdeaf1eed
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define read_can_lock(x) ((int)(x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores. See
+ * semaphore.h for details. -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void _raw_read_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+ __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+ BUG_ON(rw->magic != RWLOCK_MAGIC);
+#endif
+ __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_read_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ atomic_dec(count);
+ if (atomic_read(count) >= 0)
+ return 1;
+ atomic_inc(count);
+ return 0;
+}
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+ return 1;
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */