x86/VPMU: move VPMU files up from hvm/ directory
authorBoris Ostrovsky <boris.ostrovsky@oracle.com>
Fri, 19 Jun 2015 18:45:00 +0000 (20:45 +0200)
committerJan Beulich <jbeulich@suse.com>
Thu, 9 Jul 2015 15:04:26 +0000 (17:04 +0200)
Since PMU is now not HVM specific we can move VPMU-related files up from
arch/x86/hvm/ directory.

Specifically:
    arch/x86/hvm/vpmu.c -> arch/x86/cpu/vpmu.c
    arch/x86/hvm/svm/vpmu.c -> arch/x86/cpu/vpmu_amd.c
    arch/x86/hvm/vmx/vpmu_core2.c -> arch/x86/cpu/vpmu_intel.c
    include/asm-x86/hvm/vpmu.h -> include/asm-x86/vpmu.h

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Acked-by: Kevin Tian <kevin.tian@intel.com>
16 files changed:
xen/arch/x86/cpu/Makefile
xen/arch/x86/cpu/vpmu.c [new file with mode: 0644]
xen/arch/x86/cpu/vpmu_amd.c [new file with mode: 0644]
xen/arch/x86/cpu/vpmu_intel.c [new file with mode: 0644]
xen/arch/x86/hvm/Makefile
xen/arch/x86/hvm/svm/Makefile
xen/arch/x86/hvm/svm/vpmu.c [deleted file]
xen/arch/x86/hvm/vlapic.c
xen/arch/x86/hvm/vmx/Makefile
xen/arch/x86/hvm/vmx/vpmu_core2.c [deleted file]
xen/arch/x86/hvm/vpmu.c [deleted file]
xen/arch/x86/oprofile/op_model_ppro.c
xen/arch/x86/traps.c
xen/include/asm-x86/hvm/vmx/vmcs.h
xen/include/asm-x86/hvm/vpmu.h [deleted file]
xen/include/asm-x86/vpmu.h [new file with mode: 0644]

index d73d93a6b7545534916fec342ba460559cff09a5..74f23aee0fd58a9d562c7daabc1ed9b9d1e766b5 100644 (file)
@@ -7,3 +7,4 @@ obj-y += common.o
 obj-y += intel.o
 obj-y += intel_cacheinfo.o
 obj-y += mwait-idle.o
+obj-y += vpmu.o vpmu_amd.o vpmu_intel.o
diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
new file mode 100644 (file)
index 0000000..9611b9d
--- /dev/null
@@ -0,0 +1,818 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/xenoprof.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/nmi.h>
+#include <asm/p2m.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/apic.h>
+#include <public/pmu.h>
+#include <xsm/xsm.h>
+
+#include <compat/pmu.h>
+CHECK_pmu_cntr_pair;
+CHECK_pmu_data;
+CHECK_pmu_params;
+
+/*
+ * "vpmu" :     vpmu generally enabled
+ * "vpmu=off" : vpmu generally disabled
+ * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
+ */
+static unsigned int __read_mostly opt_vpmu_enabled;
+unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
+unsigned int __read_mostly vpmu_features = 0;
+static void parse_vpmu_param(char *s);
+custom_param("vpmu", parse_vpmu_param);
+
+static DEFINE_SPINLOCK(vpmu_lock);
+static unsigned vpmu_count;
+
+static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+
+static void __init parse_vpmu_param(char *s)
+{
+    switch ( parse_bool(s) )
+    {
+    case 0:
+        break;
+    default:
+        if ( !strcmp(s, "bts") )
+            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+        else if ( *s )
+        {
+            printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+            break;
+        }
+        /* fall through */
+    case 1:
+        /* Default VPMU mode */
+        vpmu_mode = XENPMU_MODE_SELF;
+        opt_vpmu_enabled = 1;
+        break;
+    }
+}
+
+void vpmu_lvtpc_update(uint32_t val)
+{
+    struct vpmu_struct *vpmu;
+    struct vcpu *curr = current;
+
+    if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
+        return;
+
+    vpmu = vcpu_vpmu(curr);
+
+    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+
+    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+    if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
+         !vpmu_is_set(vpmu, VPMU_CACHED) )
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+}
+
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+                uint64_t supported, bool_t is_write)
+{
+    struct vcpu *curr = current;
+    struct vpmu_struct *vpmu;
+    const struct arch_vpmu_ops *ops;
+    int ret = 0;
+
+    if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
+         ((vpmu_mode & XENPMU_MODE_ALL) &&
+          !is_hardware_domain(current->domain)) )
+         goto nop;
+
+    vpmu = vcpu_vpmu(curr);
+    ops = vpmu->arch_vpmu_ops;
+    if ( !ops )
+        goto nop;
+
+    if ( is_write && ops->do_wrmsr )
+        ret = ops->do_wrmsr(msr, *msr_content, supported);
+    else if ( !is_write && ops->do_rdmsr )
+        ret = ops->do_rdmsr(msr, msr_content);
+    else
+        goto nop;
+
+    /*
+     * We may have received a PMU interrupt while handling MSR access
+     * and since do_wr/rdmsr may load VPMU context we should save
+     * (and unload) it again.
+     */
+    if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+        vpmu_is_set(vpmu, VPMU_CACHED) )
+    {
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+        ops->arch_vpmu_save(curr, 0);
+        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+    }
+
+    return ret;
+
+ nop:
+    if ( !is_write )
+        *msr_content = 0;
+
+    return 0;
+}
+
+static inline struct vcpu *choose_hwdom_vcpu(void)
+{
+    unsigned idx;
+
+    if ( hardware_domain->max_vcpus == 0 )
+        return NULL;
+
+    idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+    return hardware_domain->vcpu[idx];
+}
+
+void vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vcpu *sampled = current, *sampling;
+    struct vpmu_struct *vpmu;
+    struct vlapic *vlapic;
+    u32 vlapic_lvtpc;
+
+    /*
+     * dom0 will handle interrupt for special domains (e.g. idle domain) or,
+     * in XENPMU_MODE_ALL, for everyone.
+     */
+    if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+         (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+    {
+        sampling = choose_hwdom_vcpu();
+        if ( !sampling )
+            return;
+    }
+    else
+        sampling = sampled;
+
+    vpmu = vcpu_vpmu(sampling);
+    if ( !vpmu->arch_vpmu_ops )
+        return;
+
+    /* PV(H) guest */
+    if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) )
+    {
+        const struct cpu_user_regs *cur_regs;
+        uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
+        domid_t domid;
+
+        if ( !vpmu->xenpmu_data )
+            return;
+
+        if ( is_pvh_vcpu(sampling) &&
+             !(vpmu_mode & XENPMU_MODE_ALL) &&
+             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+            return;
+
+        if ( vpmu_is_set(vpmu, VPMU_CACHED) )
+            return;
+
+        /* PV guest will be reading PMU MSRs from xenpmu_data */
+        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
+        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+
+        if ( has_hvm_container_vcpu(sampled) )
+            *flags = 0;
+        else
+            *flags = PMU_SAMPLE_PV;
+
+        if ( sampled == sampling )
+            domid = DOMID_SELF;
+        else
+            domid = sampled->domain->domain_id;
+
+        /* Store appropriate registers in xenpmu_data */
+        /* FIXME: 32-bit PVH should go here as well */
+        if ( is_pv_32bit_vcpu(sampling) )
+        {
+            /*
+             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+             * and therefore we treat it the same way as a non-privileged
+             * PV 32-bit domain.
+             */
+            struct compat_pmu_regs *cmp;
+
+            cur_regs = guest_cpu_user_regs();
+
+            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
+            cmp->ip = cur_regs->rip;
+            cmp->sp = cur_regs->rsp;
+            cmp->flags = cur_regs->eflags;
+            cmp->ss = cur_regs->ss;
+            cmp->cs = cur_regs->cs;
+            if ( (cmp->cs & 3) > 1 )
+                *flags |= PMU_SAMPLE_USER;
+        }
+        else
+        {
+            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
+
+            if ( (vpmu_mode & XENPMU_MODE_SELF) )
+                cur_regs = guest_cpu_user_regs();
+            else if ( !guest_mode(regs) &&
+                      is_hardware_domain(sampling->domain) )
+            {
+                cur_regs = regs;
+                domid = DOMID_XEN;
+            }
+            else
+                cur_regs = guest_cpu_user_regs();
+
+            r->ip = cur_regs->rip;
+            r->sp = cur_regs->rsp;
+            r->flags = cur_regs->eflags;
+
+            if ( !has_hvm_container_vcpu(sampled) )
+            {
+                r->ss = cur_regs->ss;
+                r->cs = cur_regs->cs;
+                if ( !(sampled->arch.flags & TF_kernel_mode) )
+                    *flags |= PMU_SAMPLE_USER;
+            }
+            else
+            {
+                struct segment_register seg;
+
+                hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+                r->cs = seg.sel;
+                hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+                r->ss = seg.sel;
+                r->cpl = seg.attr.fields.dpl;
+                if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+                    *flags |= PMU_SAMPLE_REAL;
+            }
+        }
+
+        vpmu->xenpmu_data->domain_id = domid;
+        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
+        if ( is_hardware_domain(sampling->domain) )
+            vpmu->xenpmu_data->pcpu_id = smp_processor_id();
+        else
+            vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
+
+        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+        *flags |= PMU_CACHED;
+        vpmu_set(vpmu, VPMU_CACHED);
+
+        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+
+        return;
+    }
+
+    /* HVM guests */
+    vlapic = vcpu_vlapic(sampling);
+
+    /* We don't support (yet) HVM dom0 */
+    ASSERT(sampling == sampled);
+
+    if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
+         !is_vlapic_lvtpc_enabled(vlapic) )
+        return;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+    switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+    {
+    case APIC_MODE_FIXED:
+        vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+        break;
+    case APIC_MODE_NMI:
+        sampling->nmi_pending = 1;
+        break;
+    }
+}
+
+void vpmu_do_cpuid(unsigned int input,
+                   unsigned int *eax, unsigned int *ebx,
+                   unsigned int *ecx, unsigned int *edx)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid )
+        vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx);
+}
+
+static void vpmu_save_force(void *arg)
+{
+    struct vcpu *v = (struct vcpu *)arg;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return;
+
+    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+
+    if ( vpmu->arch_vpmu_ops )
+        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
+
+    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
+
+    per_cpu(last_vcpu, smp_processor_id()) = NULL;
+}
+
+void vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    int pcpu = smp_processor_id();
+
+    if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
+       return;
+
+    vpmu->last_pcpu = pcpu;
+    per_cpu(last_vcpu, pcpu) = v;
+
+    if ( vpmu->arch_vpmu_ops )
+        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
+            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
+    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+}
+
+int vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    int pcpu = smp_processor_id();
+    struct vcpu *prev = NULL;
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return 0;
+
+    /* First time this VCPU is running here */
+    if ( vpmu->last_pcpu != pcpu )
+    {
+        /*
+         * Get the context from last pcpu that we ran on. Note that if another
+         * VCPU is running there it must have saved this VPCU's context before
+         * startig to run (see below).
+         * There should be no race since remote pcpu will disable interrupts
+         * before saving the context.
+         */
+        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        {
+            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+                             vpmu_save_force, (void *)v, 1);
+            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+        }
+    } 
+
+    /* Prevent forced context save from remote CPU */
+    local_irq_disable();
+
+    prev = per_cpu(last_vcpu, pcpu);
+
+    if ( prev != v && prev )
+    {
+        vpmu = vcpu_vpmu(prev);
+
+        /* Someone ran here before us */
+        vpmu_save_force(prev);
+        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
+        vpmu = vcpu_vpmu(v);
+    }
+
+    local_irq_enable();
+
+    /* Only when PMU is counting, we load PMU context immediately. */
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+         (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
+        return 0;
+
+    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
+    {
+        int ret;
+
+        apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+        /* Arch code needs to set VPMU_CONTEXT_LOADED */
+        ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
+        if ( ret )
+        {
+            apic_write_around(APIC_LVTPC,
+                              vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+void vpmu_initialise(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    uint8_t vendor = current_cpu_data.x86_vendor;
+    int ret;
+    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
+
+    BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
+    BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
+    BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
+    BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
+
+    ASSERT(!vpmu->flags && !vpmu->context);
+
+    if ( !is_priv_vpmu )
+    {
+        /*
+         * Count active VPMUs so that we won't try to change vpmu_mode while
+         * they are in use.
+         * vpmu_mode can be safely updated while dom0's VPMUs are active and
+         * so we don't need to include it in the count.
+         */
+        spin_lock(&vpmu_lock);
+        vpmu_count++;
+        spin_unlock(&vpmu_lock);
+    }
+
+    switch ( vendor )
+    {
+    case X86_VENDOR_AMD:
+        ret = svm_vpmu_initialise(v);
+        break;
+
+    case X86_VENDOR_INTEL:
+        ret = vmx_vpmu_initialise(v);
+        break;
+
+    default:
+        if ( vpmu_mode != XENPMU_MODE_OFF )
+        {
+            printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
+                   "Disabling VPMU\n", vendor);
+            opt_vpmu_enabled = 0;
+            vpmu_mode = XENPMU_MODE_OFF;
+        }
+        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
+    }
+
+    if ( ret )
+        printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
+
+    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
+    if ( !is_priv_vpmu &&
+         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
+          (vpmu_mode == XENPMU_MODE_ALL)) )
+    {
+        spin_lock(&vpmu_lock);
+        vpmu_count--;
+        spin_unlock(&vpmu_lock);
+    }
+}
+
+static void vpmu_clear_last(void *arg)
+{
+    if ( this_cpu(last_vcpu) == arg )
+        this_cpu(last_vcpu) = NULL;
+}
+
+void vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+        return;
+
+    /*
+     * Need to clear last_vcpu in case it points to v.
+     * We can check here non-atomically whether it is 'v' since
+     * last_vcpu can never become 'v' again at this point.
+     * We will test it again in vpmu_clear_last() with interrupts
+     * disabled to make sure we don't clear someone else.
+     */
+    if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v )
+        on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+                         vpmu_clear_last, v, 1);
+
+    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
+    {
+        /* Unload VPMU first. This will stop counters */
+        on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
+                         vpmu_save_force, v, 1);
+         vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+    }
+
+    spin_lock(&vpmu_lock);
+    if ( !is_hardware_domain(v->domain) )
+        vpmu_count--;
+    spin_unlock(&vpmu_lock);
+}
+
+static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
+{
+    struct vcpu *v;
+    struct vpmu_struct *vpmu;
+    struct page_info *page;
+    uint64_t gfn = params->val;
+
+    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
+         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
+        return -EINVAL;
+
+    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+        return -EINVAL;
+
+    page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+    if ( !page )
+        return -EINVAL;
+
+    if ( !get_page_type(page, PGT_writable_page) )
+    {
+        put_page(page);
+        return -EINVAL;
+    }
+
+    v = d->vcpu[params->vcpu];
+    vpmu = vcpu_vpmu(v);
+
+    spin_lock(&vpmu->vpmu_lock);
+
+    if ( v->arch.vpmu.xenpmu_data )
+    {
+        spin_unlock(&vpmu->vpmu_lock);
+        put_page_and_type(page);
+        return -EEXIST;
+    }
+
+    v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
+    if ( !v->arch.vpmu.xenpmu_data )
+    {
+        spin_unlock(&vpmu->vpmu_lock);
+        put_page_and_type(page);
+        return -ENOMEM;
+    }
+
+    vpmu_initialise(v);
+
+    spin_unlock(&vpmu->vpmu_lock);
+
+    return 0;
+}
+
+static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
+{
+    struct vcpu *v;
+    struct vpmu_struct *vpmu;
+    uint64_t mfn;
+    void *xenpmu_data;
+
+    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+        return;
+
+    v = d->vcpu[params->vcpu];
+    if ( v != current )
+        vcpu_pause(v);
+
+    vpmu = vcpu_vpmu(v);
+    spin_lock(&vpmu->vpmu_lock);
+
+    vpmu_destroy(v);
+    xenpmu_data = vpmu->xenpmu_data;
+    vpmu->xenpmu_data = NULL;
+
+    spin_unlock(&vpmu->vpmu_lock);
+
+    if ( xenpmu_data )
+    {
+        mfn = domain_page_map_to_mfn(xenpmu_data);
+        ASSERT(mfn_valid(mfn));
+        unmap_domain_page_global(xenpmu_data);
+        put_page_and_type(mfn_to_page(mfn));
+    }
+
+    if ( v != current )
+        vcpu_unpause(v);
+}
+
+/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
+void vpmu_dump(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
+        vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
+}
+
+long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
+{
+    int ret;
+    struct vcpu *curr;
+    struct xen_pmu_params pmu_params = {.val = 0};
+    struct xen_pmu_data *xenpmu_data;
+    struct vpmu_struct *vpmu;
+
+    if ( !opt_vpmu_enabled )
+        return -EOPNOTSUPP;
+
+    ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
+    if ( ret )
+        return ret;
+
+    /* Check major version when parameters are specified */
+    switch ( op )
+    {
+    case XENPMU_mode_set:
+    case XENPMU_feature_set:
+    case XENPMU_init:
+    case XENPMU_finish:
+        if ( copy_from_guest(&pmu_params, arg, 1) )
+            return -EFAULT;
+
+        if ( pmu_params.version.maj != XENPMU_VER_MAJ )
+            return -EINVAL;
+    }
+
+    switch ( op )
+    {
+    case XENPMU_mode_set:
+    {
+        if ( (pmu_params.val &
+              ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
+             (hweight64(pmu_params.val) > 1) )
+            return -EINVAL;
+
+        /* 32-bit dom0 can only sample itself. */
+        if ( is_pv_32bit_vcpu(current) &&
+             (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
+            return -EINVAL;
+
+        spin_lock(&vpmu_lock);
+
+        /*
+         * We can always safely switch between XENPMU_MODE_SELF and
+         * XENPMU_MODE_HV while other VPMUs are active.
+         */
+        if ( (vpmu_count == 0) ||
+             ((vpmu_mode ^ pmu_params.val) ==
+              (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+            vpmu_mode = pmu_params.val;
+        else if ( vpmu_mode != pmu_params.val )
+        {
+            printk(XENLOG_WARNING
+                   "VPMU: Cannot change mode while active VPMUs exist\n");
+            ret = -EBUSY;
+        }
+
+        spin_unlock(&vpmu_lock);
+
+        break;
+    }
+
+    case XENPMU_mode_get:
+        memset(&pmu_params, 0, sizeof(pmu_params));
+        pmu_params.val = vpmu_mode;
+
+        pmu_params.version.maj = XENPMU_VER_MAJ;
+        pmu_params.version.min = XENPMU_VER_MIN;
+
+        if ( copy_to_guest(arg, &pmu_params, 1) )
+            ret = -EFAULT;
+
+        break;
+
+    case XENPMU_feature_set:
+        if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
+            return -EINVAL;
+
+        spin_lock(&vpmu_lock);
+
+        if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
+            vpmu_features = pmu_params.val;
+        else
+        {
+            printk(XENLOG_WARNING "VPMU: Cannot change features while"
+                                  " active VPMUs exist\n");
+            ret = -EBUSY;
+        }
+
+        spin_unlock(&vpmu_lock);
+
+        break;
+
+    case XENPMU_feature_get:
+        pmu_params.val = vpmu_features;
+        if ( copy_field_to_guest(arg, &pmu_params, val) )
+            ret = -EFAULT;
+
+        break;
+
+    case XENPMU_init:
+        ret = pvpmu_init(current->domain, &pmu_params);
+        break;
+
+    case XENPMU_finish:
+        pvpmu_finish(current->domain, &pmu_params);
+        break;
+
+    case XENPMU_lvtpc_set:
+        xenpmu_data = current->arch.vpmu.xenpmu_data;
+        if ( xenpmu_data != NULL )
+            vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+        else
+            ret = -EINVAL;
+        break;
+
+    case XENPMU_flush:
+        curr = current;
+        vpmu = vcpu_vpmu(curr);
+        xenpmu_data = curr->arch.vpmu.xenpmu_data;
+        if ( xenpmu_data == NULL )
+            return -EINVAL;
+        xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
+        vpmu_reset(vpmu, VPMU_CACHED);
+        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+        if ( vpmu_load(curr, 1) )
+        {
+            xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
+            vpmu_set(vpmu, VPMU_CACHED);
+            ret = -EIO;
+        }
+        break ;
+
+    default:
+        ret = -EINVAL;
+    }
+
+    return ret;
+}
+
+static int __init vpmu_init(void)
+{
+    int vendor = current_cpu_data.x86_vendor;
+
+    if ( !opt_vpmu_enabled )
+    {
+        printk(XENLOG_INFO "VPMU: disabled\n");
+        return 0;
+    }
+
+    /* NMI watchdog uses LVTPC and HW counter */
+    if ( opt_watchdog && opt_vpmu_enabled )
+    {
+        printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
+        opt_vpmu_enabled = 0;
+        vpmu_mode = XENPMU_MODE_OFF;
+        return 0;
+    }
+
+    switch ( vendor )
+    {
+    case X86_VENDOR_AMD:
+        if ( amd_vpmu_init() )
+           vpmu_mode = XENPMU_MODE_OFF;
+        break;
+    case X86_VENDOR_INTEL:
+        if ( core2_vpmu_init() )
+           vpmu_mode = XENPMU_MODE_OFF;
+        break;
+    default:
+        printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
+               "Turning VPMU off.\n", vendor);
+        vpmu_mode = XENPMU_MODE_OFF;
+        break;
+    }
+
+    if ( vpmu_mode != XENPMU_MODE_OFF )
+        printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
+               __stringify(XENPMU_VER_MIN) "\n");
+    else
+        opt_vpmu_enabled = 0;
+
+    return 0;
+}
+__initcall(vpmu_init);
diff --git a/xen/arch/x86/cpu/vpmu_amd.c b/xen/arch/x86/cpu/vpmu_amd.c
new file mode 100644 (file)
index 0000000..e5195cf
--- /dev/null
@@ -0,0 +1,588 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ * Parts of this code are Copyright (c) 2007, Intel Corporation
+ *
+ * Author: Wei Wang <wei.wang2@amd.com>
+ * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/xenoprof.h>
+#include <xen/hvm/save.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <asm/apic.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/vlapic.h>
+#include <public/pmu.h>
+
+#define MSR_F10H_EVNTSEL_GO_SHIFT   40
+#define MSR_F10H_EVNTSEL_EN_SHIFT   22
+#define MSR_F10H_COUNTER_LENGTH     48
+
+#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
+#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT))
+#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
+#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
+
+static unsigned int __read_mostly num_counters;
+static const u32 __read_mostly *counters;
+static const u32 __read_mostly *ctrls;
+static bool_t __read_mostly k7_counters_mirrored;
+
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+
+#define F10H_NUM_COUNTERS   4
+#define F15H_NUM_COUNTERS   6
+#define MAX_NUM_COUNTERS    F15H_NUM_COUNTERS
+
+/* PMU Counter MSRs. */
+static const u32 AMD_F10H_COUNTERS[] = {
+    MSR_K7_PERFCTR0,
+    MSR_K7_PERFCTR1,
+    MSR_K7_PERFCTR2,
+    MSR_K7_PERFCTR3
+};
+
+/* PMU Control MSRs. */
+static const u32 AMD_F10H_CTRLS[] = {
+    MSR_K7_EVNTSEL0,
+    MSR_K7_EVNTSEL1,
+    MSR_K7_EVNTSEL2,
+    MSR_K7_EVNTSEL3
+};
+
+static const u32 AMD_F15H_COUNTERS[] = {
+    MSR_AMD_FAM15H_PERFCTR0,
+    MSR_AMD_FAM15H_PERFCTR1,
+    MSR_AMD_FAM15H_PERFCTR2,
+    MSR_AMD_FAM15H_PERFCTR3,
+    MSR_AMD_FAM15H_PERFCTR4,
+    MSR_AMD_FAM15H_PERFCTR5
+};
+
+static const u32 AMD_F15H_CTRLS[] = {
+    MSR_AMD_FAM15H_EVNTSEL0,
+    MSR_AMD_FAM15H_EVNTSEL1,
+    MSR_AMD_FAM15H_EVNTSEL2,
+    MSR_AMD_FAM15H_EVNTSEL3,
+    MSR_AMD_FAM15H_EVNTSEL4,
+    MSR_AMD_FAM15H_EVNTSEL5
+};
+
+/* Bits [63:42], [39:36], 21 and 19 are reserved */
+#define CTRL_RSVD_MASK ((-1ULL & (~((1ULL << 42) - 1))) | \
+                        (0xfULL << 36) | (1ULL << 21) | (1ULL << 19))
+static uint64_t __read_mostly ctrl_rsvd[MAX_NUM_COUNTERS];
+
+/* Use private context as a flag for MSR bitmap */
+#define msr_bitmap_on(vpmu)    do {                                    \
+                                   (vpmu)->priv_context = (void *)-1L; \
+                               } while (0)
+#define msr_bitmap_off(vpmu)   do {                                    \
+                                   (vpmu)->priv_context = NULL;        \
+                               } while (0)
+#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
+
+static inline int get_pmu_reg_type(u32 addr, unsigned int *idx)
+{
+    if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) )
+    {
+        *idx = addr - MSR_K7_EVNTSEL0;
+        return MSR_TYPE_CTRL;
+    }
+
+    if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
+    {
+        *idx = addr - MSR_K7_PERFCTR0;
+        return MSR_TYPE_COUNTER;
+    }
+
+    if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
+         (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
+    {
+        *idx = (addr - MSR_AMD_FAM15H_EVNTSEL0) >> 1;
+        if (addr & 1)
+            return MSR_TYPE_COUNTER;
+        else
+            return MSR_TYPE_CTRL;
+    }
+
+    /* unsupported registers */
+    return -1;
+}
+
+static inline u32 get_fam15h_addr(u32 addr)
+{
+    switch ( addr )
+    {
+    case MSR_K7_PERFCTR0:
+        return MSR_AMD_FAM15H_PERFCTR0;
+    case MSR_K7_PERFCTR1:
+        return MSR_AMD_FAM15H_PERFCTR1;
+    case MSR_K7_PERFCTR2:
+        return MSR_AMD_FAM15H_PERFCTR2;
+    case MSR_K7_PERFCTR3:
+        return MSR_AMD_FAM15H_PERFCTR3;
+    case MSR_K7_EVNTSEL0:
+        return MSR_AMD_FAM15H_EVNTSEL0;
+    case MSR_K7_EVNTSEL1:
+        return MSR_AMD_FAM15H_EVNTSEL1;
+    case MSR_K7_EVNTSEL2:
+        return MSR_AMD_FAM15H_EVNTSEL2;
+    case MSR_K7_EVNTSEL3:
+        return MSR_AMD_FAM15H_EVNTSEL3;
+    default:
+        break;
+    }
+
+    return addr;
+}
+
+static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
+{
+    unsigned i;
+    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+    memset(&ctxt->regs[0], 0, regs_sz);
+    for ( i = 0; i < num_counters; i++ )
+        ctrl_regs[i] = ctrl_rsvd[i];
+}
+
+static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+        svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE);
+        svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE);
+    }
+
+    msr_bitmap_on(vpmu);
+}
+
+static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+        svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW);
+        svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW);
+    }
+
+    msr_bitmap_off(vpmu);
+}
+
+static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    return 1;
+}
+
+static inline void context_load(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+        wrmsrl(counters[i], counter_regs[i]);
+        wrmsrl(ctrls[i], ctrl_regs[i]);
+    }
+}
+
+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_amd_ctxt *ctxt;
+    uint64_t *ctrl_regs;
+    unsigned int i;
+
+    vpmu_reset(vpmu, VPMU_FROZEN);
+
+    if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+    {
+        ctxt = vpmu->context;
+        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+        for ( i = 0; i < num_counters; i++ )
+            wrmsrl(ctrls[i], ctrl_regs[i]);
+
+        return 0;
+    }
+
+    if ( from_guest )
+    {
+        bool_t is_running = 0;
+        struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+
+        ASSERT(!is_hvm_vcpu(v));
+
+        ctxt = vpmu->context;
+        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+        memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
+
+        for ( i = 0; i < num_counters; i++ )
+        {
+            if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+            {
+                /*
+                 * Not necessary to re-init context since we should never load
+                 * it until guest provides valid values. But just to be safe.
+                 */
+                amd_vpmu_init_regs(ctxt);
+                return -EINVAL;
+            }
+
+            if ( is_pmu_enabled(ctrl_regs[i]) )
+                is_running = 1;
+        }
+
+        if ( is_running )
+            vpmu_set(vpmu, VPMU_RUNNING);
+        else
+            vpmu_reset(vpmu, VPMU_RUNNING);
+    }
+
+    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+
+    context_load(v);
+
+    return 0;
+}
+
+static inline void context_save(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+
+    /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
+    for ( i = 0; i < num_counters; i++ )
+        rdmsrl(counters[i], counter_regs[i]);
+}
+
+static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    unsigned int i;
+
+    /* Stop the counters. */
+    for ( i = 0; i < num_counters; i++ )
+        wrmsrl(ctrls[i], 0);
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
+    {
+        vpmu_set(vpmu, VPMU_FROZEN);
+        return 0;
+    }
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return 0;
+
+    context_save(v);
+
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
+         has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+        amd_vpmu_unset_msr_bitmap(v);
+
+    if ( to_guest )
+    {
+        struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
+
+        ASSERT(!is_hvm_vcpu(v));
+        ctxt = vpmu->context;
+        guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+        memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
+    }
+
+    return 1;
+}
+
+static void context_update(unsigned int msr, u64 msr_content)
+{
+    unsigned int i;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+    if ( k7_counters_mirrored &&
+        ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
+    {
+        msr = get_fam15h_addr(msr);
+    }
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+       if ( msr == ctrls[i] )
+       {
+           ctrl_regs[i] = msr_content;
+           return;
+       }
+        else if (msr == counters[i] )
+        {
+            counter_regs[i] = msr_content;
+            return;
+        }
+    }
+}
+
+static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+                             uint64_t supported)
+{
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    unsigned int idx = 0;
+    int type = get_pmu_reg_type(msr, &idx);
+
+    ASSERT(!supported);
+
+    if ( (type == MSR_TYPE_CTRL ) &&
+         ((msr_content & CTRL_RSVD_MASK) != ctrl_rsvd[idx]) )
+        return -EINVAL;
+
+    /* For all counters, enable guest only mode for HVM guest */
+    if ( has_hvm_container_vcpu(v) && (type == MSR_TYPE_CTRL) &&
+         !is_guest_mode(msr_content) )
+    {
+        set_guest_mode(msr_content);
+    }
+
+    /* check if the first counter is enabled */
+    if ( (type == MSR_TYPE_CTRL) &&
+        is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
+    {
+        if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+            return 0;
+        vpmu_set(vpmu, VPMU_RUNNING);
+
+        if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+             amd_vpmu_set_msr_bitmap(v);
+    }
+
+    /* stop saving & restore if guest stops first counter */
+    if ( (type == MSR_TYPE_CTRL) &&
+        (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
+    {
+        vpmu_reset(vpmu, VPMU_RUNNING);
+        if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+             amd_vpmu_unset_msr_bitmap(v);
+        release_pmu_ownship(PMU_OWNER_HVM);
+    }
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
+        || vpmu_is_set(vpmu, VPMU_FROZEN) )
+    {
+        context_load(v);
+        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+        vpmu_reset(vpmu, VPMU_FROZEN);
+    }
+
+    /* Update vpmu context immediately */
+    context_update(msr, msr_content);
+
+    /* Write to hw counters */
+    wrmsrl(msr, msr_content);
+    return 0;
+}
+
+static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
+        || vpmu_is_set(vpmu, VPMU_FROZEN) )
+    {
+        context_load(v);
+        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+        vpmu_reset(vpmu, VPMU_FROZEN);
+    }
+
+    rdmsrl(msr, *msr_content);
+
+    return 0;
+}
+
+static void amd_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+        amd_vpmu_unset_msr_bitmap(v);
+
+    xfree(vpmu->context);
+
+    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
+        release_pmu_ownship(PMU_OWNER_HVM);
+
+    vpmu_clear(vpmu);
+}
+
+/* VPMU part of the 'q' keyhandler */
+static void amd_vpmu_dump(const struct vcpu *v)
+{
+    const struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    const struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+    const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+    const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+    unsigned int i;
+
+    printk("    VPMU state: 0x%x ", vpmu->flags);
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+    {
+         printk("\n");
+         return;
+    }
+
+    printk("(");
+    if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) )
+        printk("PASSIVE_DOMAIN_ALLOCATED, ");
+    if ( vpmu_is_set(vpmu, VPMU_FROZEN) )
+        printk("FROZEN, ");
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
+        printk("SAVE, ");
+    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
+        printk("RUNNING, ");
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        printk("LOADED, ");
+    printk("ALLOCATED)\n");
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+        uint64_t ctrl, cntr;
+
+        rdmsrl(ctrls[i], ctrl);
+        rdmsrl(counters[i], cntr);
+        printk("      %#x: %#lx (%#lx in HW)    %#x: %#lx (%#lx in HW)\n",
+               ctrls[i], ctrl_regs[i], ctrl,
+               counters[i], counter_regs[i], cntr);
+    }
+}
+
+struct arch_vpmu_ops amd_vpmu_ops = {
+    .do_wrmsr = amd_vpmu_do_wrmsr,
+    .do_rdmsr = amd_vpmu_do_rdmsr,
+    .do_interrupt = amd_vpmu_do_interrupt,
+    .arch_vpmu_destroy = amd_vpmu_destroy,
+    .arch_vpmu_save = amd_vpmu_save,
+    .arch_vpmu_load = amd_vpmu_load,
+    .arch_vpmu_dump = amd_vpmu_dump
+};
+
+int svm_vpmu_initialise(struct vcpu *v)
+{
+    struct xen_pmu_amd_ctxt *ctxt;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu_mode == XENPMU_MODE_OFF )
+        return 0;
+
+    if ( !counters )
+        return -EINVAL;
+
+    ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
+    if ( !ctxt )
+    {
+        printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
+               " PMU feature is unavailable on domain %d vcpu %d.\n",
+               v->vcpu_id, v->domain->domain_id);
+        return -ENOMEM;
+    }
+
+    ctxt->counters = sizeof(*ctxt);
+    ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
+    amd_vpmu_init_regs(ctxt);
+
+    vpmu->context = ctxt;
+    vpmu->priv_context = NULL;
+
+    if ( !is_hvm_vcpu(v) )
+    {
+        /* Copy register offsets to shared area */
+        ASSERT(vpmu->xenpmu_data);
+        memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt,
+               offsetof(struct xen_pmu_amd_ctxt, regs));
+    }
+
+    vpmu->arch_vpmu_ops = &amd_vpmu_ops;
+
+    vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
+    return 0;
+}
+
+int __init amd_vpmu_init(void)
+{
+    unsigned int i;
+
+    switch ( current_cpu_data.x86 )
+    {
+    case 0x15:
+        num_counters = F15H_NUM_COUNTERS;
+        counters = AMD_F15H_COUNTERS;
+        ctrls = AMD_F15H_CTRLS;
+        k7_counters_mirrored = 1;
+        break;
+    case 0x10:
+    case 0x12:
+    case 0x14:
+    case 0x16:
+        num_counters = F10H_NUM_COUNTERS;
+        counters = AMD_F10H_COUNTERS;
+        ctrls = AMD_F10H_CTRLS;
+        k7_counters_mirrored = 0;
+        break;
+    default:
+        printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n",
+               current_cpu_data.x86);
+        return -EINVAL;
+    }
+
+    if ( sizeof(struct xen_pmu_data) +
+         2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
+    {
+        printk(XENLOG_WARNING
+               "VPMU: Register bank does not fit into VPMU shared page\n");
+        counters = ctrls = NULL;
+        num_counters = 0;
+        return -ENOSPC;
+    }
+
+    for ( i = 0; i < num_counters; i++ )
+    {
+        rdmsrl(ctrls[i], ctrl_rsvd[i]);
+        ctrl_rsvd[i] &= CTRL_RSVD_MASK;
+    }
+
+    regs_sz = 2 * sizeof(uint64_t) * num_counters;
+
+    return 0;
+}
+
diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
new file mode 100644 (file)
index 0000000..a7d3206
--- /dev/null
@@ -0,0 +1,1041 @@
+/*
+ * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/xenoprof.h>
+#include <xen/irq.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/apic.h>
+#include <asm/traps.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <public/pmu.h>
+
+/*
+ * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
+ * instruction.
+ * cpuid 0xa - Architectural Performance Monitoring Leaf
+ * Register eax
+ */
+#define PMU_VERSION_SHIFT        0  /* Version ID */
+#define PMU_VERSION_BITS         8  /* 8 bits 0..7 */
+#define PMU_VERSION_MASK         (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
+
+#define PMU_GENERAL_NR_SHIFT     8  /* Number of general pmu registers */
+#define PMU_GENERAL_NR_BITS      8  /* 8 bits 8..15 */
+#define PMU_GENERAL_NR_MASK      (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
+
+#define PMU_GENERAL_WIDTH_SHIFT 16  /* Width of general pmu registers */
+#define PMU_GENERAL_WIDTH_BITS   8  /* 8 bits 16..23 */
+#define PMU_GENERAL_WIDTH_MASK  (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
+/* Register edx */
+#define PMU_FIXED_NR_SHIFT       0  /* Number of fixed pmu registers */
+#define PMU_FIXED_NR_BITS        5  /* 5 bits 0..4 */
+#define PMU_FIXED_NR_MASK        (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
+
+#define PMU_FIXED_WIDTH_SHIFT    5  /* Width of fixed pmu registers */
+#define PMU_FIXED_WIDTH_BITS     8  /* 8 bits 5..12 */
+#define PMU_FIXED_WIDTH_MASK     (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
+
+/* Alias registers (0x4c1) for full-width writes to PMCs */
+#define MSR_PMC_ALIAS_MASK       (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
+static bool_t __read_mostly full_width_write;
+
+/* Intel-specific VPMU features */
+#define VPMU_CPU_HAS_DS                     0x100 /* Has Debug Store */
+#define VPMU_CPU_HAS_BTS                    0x200 /* Has Branch Trace Store */
+
+/*
+ * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
+ * counters. 4 bits for every counter.
+ */
+#define FIXED_CTR_CTRL_BITS 4
+#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
+
+#define ARCH_CNTR_ENABLED   (1ULL << 22)
+
+/* Number of general-purpose and fixed performance counters */
+static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
+
+/* Masks used for testing whether and MSR is valid */
+#define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21))
+static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
+static uint64_t __read_mostly global_ovf_ctrl_mask;
+
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+/* Offset into context of the beginning of PMU register block */
+static const unsigned int regs_off =
+        sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
+        sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
+
+/*
+ * QUIRK to workaround an issue on various family 6 cpus.
+ * The issue leads to endless PMC interrupt loops on the processor.
+ * If the interrupt handler is running and a pmc reaches the value 0, this
+ * value remains forever and it triggers immediately a new interrupt after
+ * finishing the handler.
+ * A workaround is to read all flagged counters and if the value is 0 write
+ * 1 (or another value != 0) into it.
+ * There exist no errata and the real cause of this behaviour is unknown.
+ */
+bool_t __read_mostly is_pmc_quirk;
+
+static void check_pmc_quirk(void)
+{
+    if ( current_cpu_data.x86 == 6 )
+        is_pmc_quirk = 1;
+    else
+        is_pmc_quirk = 0;    
+}
+
+static void handle_pmc_quirk(u64 msr_content)
+{
+    int i;
+    u64 val;
+
+    if ( !is_pmc_quirk )
+        return;
+
+    val = msr_content;
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        if ( val & 0x1 )
+        {
+            u64 cnt;
+            rdmsrl(MSR_P6_PERFCTR(i), cnt);
+            if ( cnt == 0 )
+                wrmsrl(MSR_P6_PERFCTR(i), 1);
+        }
+        val >>= 1;
+    }
+    val = msr_content >> 32;
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        if ( val & 0x1 )
+        {
+            u64 cnt;
+            rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
+            if ( cnt == 0 )
+                wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
+        }
+        val >>= 1;
+    }
+}
+
+/*
+ * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
+ */
+static int core2_get_arch_pmc_count(void)
+{
+    u32 eax;
+
+    eax = cpuid_eax(0xa);
+    return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
+}
+
+/*
+ * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
+ */
+static int core2_get_fixed_pmc_count(void)
+{
+    u32 eax;
+
+    eax = cpuid_eax(0xa);
+    return MASK_EXTR(eax, PMU_FIXED_NR_MASK);
+}
+
+/* edx bits 5-12: Bit width of fixed-function performance counters  */
+static int core2_get_bitwidth_fix_count(void)
+{
+    u32 edx;
+
+    edx = cpuid_edx(0xa);
+    return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
+}
+
+static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+    u32 msr_index_pmc;
+
+    switch ( msr_index )
+    {
+    case MSR_CORE_PERF_FIXED_CTR_CTRL:
+    case MSR_IA32_DS_AREA:
+    case MSR_IA32_PEBS_ENABLE:
+        *type = MSR_TYPE_CTRL;
+        return 1;
+
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        *type = MSR_TYPE_GLOBAL;
+        return 1;
+
+    default:
+
+        if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
+             (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
+        {
+            *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
+            *type = MSR_TYPE_COUNTER;
+            return 1;
+        }
+
+        if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
+             (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
+        {
+            *index = msr_index - MSR_P6_EVNTSEL(0);
+            *type = MSR_TYPE_ARCH_CTRL;
+            return 1;
+        }
+
+        msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
+        if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
+             (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
+        {
+            *type = MSR_TYPE_ARCH_COUNTER;
+            *index = msr_index_pmc - MSR_IA32_PERFCTR0;
+            return 1;
+        }
+        return 0;
+    }
+}
+
+static inline int msraddr_to_bitpos(int x)
+{
+    ASSERT(x == (x & 0x1fff));
+    return x;
+}
+
+static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap)
+{
+    int i;
+
+    /* Allow Read/Write PMU Counters MSR Directly. */
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
+                  msr_bitmap + 0x800/BYTES_PER_LONG);
+    }
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i),
+                  msr_bitmap + 0x800/BYTES_PER_LONG);
+
+        if ( full_width_write )
+        {
+            clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
+            clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
+                      msr_bitmap + 0x800/BYTES_PER_LONG);
+        }
+    }
+
+    /* Allow Read PMU Non-global Controls Directly. */
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+         clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
+
+    clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
+    clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
+    clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
+}
+
+static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap)
+{
+    int i;
+
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
+                msr_bitmap + 0x800/BYTES_PER_LONG);
+    }
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i),
+                msr_bitmap + 0x800/BYTES_PER_LONG);
+
+        if ( full_width_write )
+        {
+            set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
+            set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
+                      msr_bitmap + 0x800/BYTES_PER_LONG);
+        }
+    }
+
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+        set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
+
+    set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
+    set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
+    set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
+}
+
+static inline void __core2_vpmu_save(struct vcpu *v)
+{
+    int i;
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+        rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+        rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
+
+    if ( !has_hvm_container_vcpu(v) )
+        rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
+}
+
+static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !has_hvm_container_vcpu(v) )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+    if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
+        return 0;
+
+    __core2_vpmu_save(v);
+
+    /* Unset PMU MSR bitmap to trap lazy load. */
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
+         has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+    if ( to_guest )
+    {
+        ASSERT(!is_hvm_vcpu(v));
+        memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
+               vpmu->context + regs_off, regs_sz);
+    }
+
+    return 1;
+}
+
+static inline void __core2_vpmu_load(struct vcpu *v)
+{
+    unsigned int i, pmc_start;
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+        wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
+
+    if ( full_width_write )
+        pmc_start = MSR_IA32_A_PERFCTR0;
+    else
+        pmc_start = MSR_IA32_PERFCTR0;
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
+        wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
+    }
+
+    wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
+    wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
+    wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable);
+
+    if ( !has_hvm_container_vcpu(v) )
+    {
+        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
+        core2_vpmu_cxt->global_ovf_ctrl = 0;
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+    }
+}
+
+static int core2_vpmu_verify(struct vcpu *v)
+{
+    unsigned int i;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+    uint64_t fixed_ctrl;
+    uint64_t *priv_context = vpmu->priv_context;
+    uint64_t enabled_cntrs = 0;
+
+    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
+        return -EINVAL;
+
+    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
+    if ( fixed_ctrl & fixed_ctrl_mask )
+        return -EINVAL;
+
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        if ( fixed_counters[i] & fixed_counters_mask )
+            return -EINVAL;
+        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
+            enabled_cntrs |= (1ULL << i);
+    }
+    enabled_cntrs <<= 32;
+
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+    {
+        uint64_t control = xen_pmu_cntr_pair[i].control;
+
+        if ( control & ARCH_CTRL_MASK )
+            return -EINVAL;
+        if ( control & ARCH_CNTR_ENABLED )
+            enabled_cntrs |= (1ULL << i);
+    }
+
+    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
+         !is_canonical_address(core2_vpmu_cxt->ds_area) )
+        return -EINVAL;
+
+    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
+         (core2_vpmu_cxt->ds_area != 0) )
+        vpmu_set(vpmu, VPMU_RUNNING);
+    else
+        vpmu_reset(vpmu, VPMU_RUNNING);
+
+    *priv_context = enabled_cntrs;
+
+    return 0;
+}
+
+static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+        return 0;
+
+    if ( from_guest )
+    {
+        int ret;
+
+        ASSERT(!is_hvm_vcpu(v));
+
+        memcpy(vpmu->context + regs_off,
+               (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
+               regs_sz);
+
+        ret = core2_vpmu_verify(v);
+        if ( ret )
+        {
+            /*
+             * Not necessary since we should never load the context until
+             * guest provides valid values. But just to be safe.
+             */
+            memset(vpmu->context + regs_off, 0, regs_sz);
+            return ret;
+        }
+    }
+
+    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+
+    __core2_vpmu_load(v);
+
+    return 0;
+}
+
+static int core2_vpmu_alloc_resource(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
+    uint64_t *p = NULL;
+
+    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+        return 0;
+
+    if ( has_hvm_container_vcpu(v) )
+    {
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+            goto out_err;
+
+        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+            goto out_err;
+        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+    }
+
+    core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
+                                   sizeof(uint64_t) * fixed_pmc_cnt +
+                                   sizeof(struct xen_pmu_cntr_pair) *
+                                   arch_pmc_cnt);
+    p = xzalloc(uint64_t);
+    if ( !core2_vpmu_cxt || !p )
+        goto out_err;
+
+    core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
+    core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
+                                    sizeof(uint64_t) * fixed_pmc_cnt;
+
+    vpmu->context = core2_vpmu_cxt;
+    vpmu->priv_context = p;
+
+    if ( !is_hvm_vcpu(v) )
+    {
+        /* Copy fixed/arch register offsets to shared area */
+        ASSERT(vpmu->xenpmu_data);
+        memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
+    }
+
+    vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
+
+    return 1;
+
+out_err:
+    release_pmu_ownship(PMU_OWNER_HVM);
+
+    xfree(core2_vpmu_cxt);
+    xfree(p);
+
+    printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
+           v->vcpu_id, v->domain->domain_id);
+
+    return 0;
+}
+
+static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( !is_core2_vpmu_msr(msr_index, type, index) )
+        return 0;
+
+    if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
+         !core2_vpmu_alloc_resource(current) )
+        return 0;
+
+    /* Do the lazy load staff. */
+    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+    {
+        __core2_vpmu_load(current);
+        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+        if ( has_hvm_container_vcpu(current) &&
+             cpu_has_vmx_msr_bitmap )
+            core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+    }
+    return 1;
+}
+
+static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+                               uint64_t supported)
+{
+    int i, tmp;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
+    uint64_t *enabled_cntrs;
+
+    if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
+    {
+        /* Special handling for BTS */
+        if ( msr == MSR_IA32_DEBUGCTLMSR )
+        {
+            supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
+                         IA32_DEBUGCTLMSR_BTINT;
+
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
+                supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
+                             IA32_DEBUGCTLMSR_BTS_OFF_USR;
+            if ( !(msr_content & ~supported) &&
+                 vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+                return 0;
+            if ( (msr_content & supported) &&
+                 !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+                printk(XENLOG_G_WARNING
+                       "%pv: Debug Store unsupported on this CPU\n",
+                       current);
+        }
+        return -EINVAL;
+    }
+
+    ASSERT(!supported);
+
+    if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
+        /* Writing unsupported bits to a fixed counter */
+        return -EINVAL;
+
+    core2_vpmu_cxt = vpmu->context;
+    enabled_cntrs = vpmu->priv_context;
+    switch ( msr )
+    {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        if ( msr_content & global_ovf_ctrl_mask )
+            return -EINVAL;
+        core2_vpmu_cxt->global_status &= ~msr_content;
+        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
+        return 0;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
+                 "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+        return -EINVAL;
+    case MSR_IA32_PEBS_ENABLE:
+        if ( msr_content & 1 )
+            gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, "
+                     "which is not supported.\n");
+        core2_vpmu_cxt->pebs_enable = msr_content;
+        return 0;
+    case MSR_IA32_DS_AREA:
+        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
+        {
+            if ( !is_canonical_address(msr_content) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
+                         msr_content);
+                return -EINVAL;
+            }
+            core2_vpmu_cxt->ds_area = msr_content;
+            break;
+        }
+        gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+        return 0;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        core2_vpmu_cxt->global_ctrl = msr_content;
+        break;
+    case MSR_CORE_PERF_FIXED_CTR_CTRL:
+        if ( msr_content & fixed_ctrl_mask )
+            return -EINVAL;
+
+        if ( has_hvm_container_vcpu(v) )
+            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+                               &core2_vpmu_cxt->global_ctrl);
+        else
+            rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+        *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
+        if ( msr_content != 0 )
+        {
+            u64 val = msr_content;
+            for ( i = 0; i < fixed_pmc_cnt; i++ )
+            {
+                if ( val & 3 )
+                    *enabled_cntrs |= (1ULL << 32) << i;
+                val >>= FIXED_CTR_CTRL_BITS;
+            }
+        }
+
+        core2_vpmu_cxt->fixed_ctrl = msr_content;
+        break;
+    default:
+        tmp = msr - MSR_P6_EVNTSEL(0);
+        if ( tmp >= 0 && tmp < arch_pmc_cnt )
+        {
+            struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+                vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+            if ( msr_content & ARCH_CTRL_MASK )
+                return -EINVAL;
+
+            if ( has_hvm_container_vcpu(v) )
+                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+                                   &core2_vpmu_cxt->global_ctrl);
+            else
+                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+
+            if ( msr_content & ARCH_CNTR_ENABLED )
+                *enabled_cntrs |= 1ULL << tmp;
+            else
+                *enabled_cntrs &= ~(1ULL << tmp);
+
+            xen_pmu_cntr_pair[tmp].control = msr_content;
+        }
+    }
+
+    if ( type != MSR_TYPE_GLOBAL )
+        wrmsrl(msr, msr_content);
+    else
+    {
+        if ( has_hvm_container_vcpu(v) )
+            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+        else
+            wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+    }
+
+    if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
+         (core2_vpmu_cxt->ds_area != 0) )
+        vpmu_set(vpmu, VPMU_RUNNING);
+    else
+        vpmu_reset(vpmu, VPMU_RUNNING);
+
+    return 0;
+}
+
+static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
+
+    if ( core2_vpmu_msr_common_check(msr, &type, &index) )
+    {
+        core2_vpmu_cxt = vpmu->context;
+        switch ( msr )
+        {
+        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+            *msr_content = 0;
+            break;
+        case MSR_CORE_PERF_GLOBAL_STATUS:
+            *msr_content = core2_vpmu_cxt->global_status;
+            break;
+        case MSR_CORE_PERF_GLOBAL_CTRL:
+            if ( has_hvm_container_vcpu(v) )
+                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+            else
+                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
+            break;
+        default:
+            rdmsrl(msr, *msr_content);
+        }
+    }
+    else if ( msr == MSR_IA32_MISC_ENABLE )
+    {
+        /* Extension for BTS */
+        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+            *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
+    }
+
+    return 0;
+}
+
+static void core2_vpmu_do_cpuid(unsigned int input,
+                                unsigned int *eax, unsigned int *ebx,
+                                unsigned int *ecx, unsigned int *edx)
+{
+    if (input == 0x1)
+    {
+        struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
+        {
+            /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */
+            *edx |= cpufeat_mask(X86_FEATURE_DS);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
+                *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
+                *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
+        }
+    }
+}
+
+/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
+static void core2_vpmu_dump(const struct vcpu *v)
+{
+    const struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    unsigned int i;
+    const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
+    u64 val;
+    uint64_t *fixed_counters;
+    struct xen_pmu_cntr_pair *cntr_pair;
+
+    if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+         return;
+
+    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
+    {
+        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+            printk("    vPMU loaded\n");
+        else
+            printk("    vPMU allocated\n");
+        return;
+    }
+
+    printk("    vPMU running\n");
+
+    cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+    fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+
+    /* Print the contents of the counter and its configuration msr. */
+    for ( i = 0; i < arch_pmc_cnt; i++ )
+        printk("      general_%d: 0x%016lx ctrl: 0x%016lx\n",
+            i, cntr_pair[i].counter, cntr_pair[i].control);
+
+    /*
+     * The configuration of the fixed counter is 4 bits each in the
+     * MSR_CORE_PERF_FIXED_CTR_CTRL.
+     */
+    val = core2_vpmu_cxt->fixed_ctrl;
+    for ( i = 0; i < fixed_pmc_cnt; i++ )
+    {
+        printk("      fixed_%d:   0x%016lx ctrl: %#lx\n",
+               i, fixed_counters[i],
+               val & FIXED_CTR_CTRL_MASK);
+        val >>= FIXED_CTR_CTRL_BITS;
+    }
+}
+
+static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    u64 msr_content;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
+
+    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+    if ( msr_content )
+    {
+        if ( is_pmc_quirk )
+            handle_pmc_quirk(msr_content);
+        core2_vpmu_cxt->global_status |= msr_content;
+        msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1);
+        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
+    }
+    else
+    {
+        /* No PMC overflow but perhaps a Trace Message interrupt. */
+        __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
+        if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
+            return 0;
+    }
+
+    return 1;
+}
+
+static void core2_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    xfree(vpmu->context);
+    xfree(vpmu->priv_context);
+    if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+    release_pmu_ownship(PMU_OWNER_HVM);
+    vpmu_clear(vpmu);
+}
+
+struct arch_vpmu_ops core2_vpmu_ops = {
+    .do_wrmsr = core2_vpmu_do_wrmsr,
+    .do_rdmsr = core2_vpmu_do_rdmsr,
+    .do_interrupt = core2_vpmu_do_interrupt,
+    .do_cpuid = core2_vpmu_do_cpuid,
+    .arch_vpmu_destroy = core2_vpmu_destroy,
+    .arch_vpmu_save = core2_vpmu_save,
+    .arch_vpmu_load = core2_vpmu_load,
+    .arch_vpmu_dump = core2_vpmu_dump
+};
+
+static void core2_no_vpmu_do_cpuid(unsigned int input,
+                                unsigned int *eax, unsigned int *ebx,
+                                unsigned int *ecx, unsigned int *edx)
+{
+    /*
+     * As in this case the vpmu is not enabled reset some bits in the
+     * architectural performance monitoring related part.
+     */
+    if ( input == 0xa )
+    {
+        *eax &= ~PMU_VERSION_MASK;
+        *eax &= ~PMU_GENERAL_NR_MASK;
+        *eax &= ~PMU_GENERAL_WIDTH_MASK;
+
+        *edx &= ~PMU_FIXED_NR_MASK;
+        *edx &= ~PMU_FIXED_WIDTH_MASK;
+    }
+}
+
+/*
+ * If its a vpmu msr set it to 0.
+ */
+static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+    int type = -1, index = -1;
+    if ( !is_core2_vpmu_msr(msr, &type, &index) )
+        return -EINVAL;
+    *msr_content = 0;
+    return 0;
+}
+
+/*
+ * These functions are used in case vpmu is not enabled.
+ */
+struct arch_vpmu_ops core2_no_vpmu_ops = {
+    .do_rdmsr = core2_no_vpmu_do_rdmsr,
+    .do_cpuid = core2_no_vpmu_do_cpuid,
+};
+
+int vmx_vpmu_initialise(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    u64 msr_content;
+    static bool_t ds_warned;
+
+    vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
+    if ( vpmu_mode == XENPMU_MODE_OFF )
+        return 0;
+
+    if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
+        return -EINVAL;
+
+    if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
+        goto func_out;
+    /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
+    while ( boot_cpu_has(X86_FEATURE_DS) )
+    {
+        if ( !boot_cpu_has(X86_FEATURE_DTES64) )
+        {
+            if ( !ds_warned )
+                printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
+                       " - Debug Store disabled for guests\n");
+            break;
+        }
+        vpmu_set(vpmu, VPMU_CPU_HAS_DS);
+        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+        if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
+        {
+            /* If BTS_UNAVAIL is set reset the DS feature. */
+            vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
+            if ( !ds_warned )
+                printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
+                       " - Debug Store disabled for guests\n");
+            break;
+        }
+
+        vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
+        if ( !ds_warned )
+        {
+            if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
+                printk(XENLOG_G_INFO
+                       "vpmu: CPU doesn't support CPL-Qualified BTS\n");
+            printk("******************************************************\n");
+            printk("** WARNING: Emulation of BTS Feature is switched on **\n");
+            printk("** Using this processor feature in a virtualized    **\n");
+            printk("** environment is not 100%% safe.                    **\n");
+            printk("** Setting the DS buffer address with wrong values  **\n");
+            printk("** may lead to hypervisor hangs or crashes.         **\n");
+            printk("** It is NOT recommended for production use!        **\n");
+            printk("******************************************************\n");
+        }
+        break;
+    }
+    ds_warned = 1;
+ func_out:
+
+    /* PV domains can allocate resources immediately */
+    if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
+        return -EIO;
+
+    vpmu->arch_vpmu_ops = &core2_vpmu_ops;
+
+    return 0;
+}
+
+int __init core2_vpmu_init(void)
+{
+    u64 caps;
+
+    if ( current_cpu_data.x86 != 6 )
+    {
+        printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
+        return -EINVAL;
+    }
+
+    switch ( current_cpu_data.x86_model )
+    {
+        /* Core2: */
+        case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+        case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+        case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+        case 0x1d: /* six-core 45 nm xeon "Dunnington" */
+
+        case 0x2a: /* SandyBridge */
+        case 0x2d: /* SandyBridge, "Romley-EP" */
+
+        /* Nehalem: */
+        case 0x1a: /* 45 nm nehalem, "Bloomfield" */
+        case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */
+        case 0x2e: /* 45 nm nehalem-ex, "Beckton" */
+
+        /* Westmere: */
+        case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */
+        case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */
+        case 0x2f: /* 32 nm Westmere-EX */
+
+        case 0x3a: /* IvyBridge */
+        case 0x3e: /* IvyBridge EP */
+
+        /* Haswell: */
+        case 0x3c:
+        case 0x3f:
+        case 0x45:
+        case 0x46:
+
+        /* Broadwell */
+        case 0x3d:
+        case 0x4f:
+        case 0x56:
+
+        /* future: */
+        case 0x4e:
+
+        /* next gen Xeon Phi */
+        case 0x57:
+            break;
+
+        default:
+            printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n",
+                   current_cpu_data.x86_model);
+            return -EINVAL;
+    }
+
+    arch_pmc_cnt = core2_get_arch_pmc_count();
+    fixed_pmc_cnt = core2_get_fixed_pmc_count();
+    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+    full_width_write = (caps >> 13) & 1;
+
+    fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
+    fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
+    global_ovf_ctrl_mask = ~(0xC000000000000000 |
+                             (((1ULL << fixed_pmc_cnt) - 1) << 32) |
+                             ((1ULL << arch_pmc_cnt) - 1));
+
+    regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
+              sizeof(uint64_t) * fixed_pmc_cnt +
+              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
+
+    check_pmc_quirk();
+
+    if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
+         sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
+    {
+        printk(XENLOG_WARNING
+               "VPMU: Register bank does not fit into VPMU share page\n");
+        arch_pmc_cnt = fixed_pmc_cnt = 0;
+        return -ENOSPC;
+    }
+
+    return 0;
+}
+
index 69af47f712285058bdfde4caabd2b451841aef94..794e79396062493959ff2d9edb0d3e669435eb68 100644 (file)
@@ -23,4 +23,3 @@ obj-y += vlapic.o
 obj-y += vmsi.o
 obj-y += vpic.o
 obj-y += vpt.o
-obj-y += vpmu.o
index a10a55ef9ba840906d6e0129a27de2c37eec3dcb..760d2954da8372c42680e6b012bbeec9cc3d3ce6 100644 (file)
@@ -6,4 +6,3 @@ obj-y += nestedsvm.o
 obj-y += svm.o
 obj-y += svmdebug.o
 obj-y += vmcb.o
-obj-y += vpmu.o
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
deleted file mode 100644 (file)
index 17272cb..0000000
+++ /dev/null
@@ -1,588 +0,0 @@
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2010, Advanced Micro Devices, Inc.
- * Parts of this code are Copyright (c) 2007, Intel Corporation
- *
- * Author: Wei Wang <wei.wang2@amd.com>
- * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/xenoprof.h>
-#include <xen/hvm/save.h>
-#include <xen/sched.h>
-#include <xen/irq.h>
-#include <asm/apic.h>
-#include <asm/hvm/vlapic.h>
-#include <asm/hvm/vpmu.h>
-#include <public/pmu.h>
-
-#define MSR_F10H_EVNTSEL_GO_SHIFT   40
-#define MSR_F10H_EVNTSEL_EN_SHIFT   22
-#define MSR_F10H_COUNTER_LENGTH     48
-
-#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
-#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT))
-#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
-#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
-
-static unsigned int __read_mostly num_counters;
-static const u32 __read_mostly *counters;
-static const u32 __read_mostly *ctrls;
-static bool_t __read_mostly k7_counters_mirrored;
-
-/* Total size of PMU registers block (copied to/from PV(H) guest) */
-static unsigned int __read_mostly regs_sz;
-
-#define F10H_NUM_COUNTERS   4
-#define F15H_NUM_COUNTERS   6
-#define MAX_NUM_COUNTERS    F15H_NUM_COUNTERS
-
-/* PMU Counter MSRs. */
-static const u32 AMD_F10H_COUNTERS[] = {
-    MSR_K7_PERFCTR0,
-    MSR_K7_PERFCTR1,
-    MSR_K7_PERFCTR2,
-    MSR_K7_PERFCTR3
-};
-
-/* PMU Control MSRs. */
-static const u32 AMD_F10H_CTRLS[] = {
-    MSR_K7_EVNTSEL0,
-    MSR_K7_EVNTSEL1,
-    MSR_K7_EVNTSEL2,
-    MSR_K7_EVNTSEL3
-};
-
-static const u32 AMD_F15H_COUNTERS[] = {
-    MSR_AMD_FAM15H_PERFCTR0,
-    MSR_AMD_FAM15H_PERFCTR1,
-    MSR_AMD_FAM15H_PERFCTR2,
-    MSR_AMD_FAM15H_PERFCTR3,
-    MSR_AMD_FAM15H_PERFCTR4,
-    MSR_AMD_FAM15H_PERFCTR5
-};
-
-static const u32 AMD_F15H_CTRLS[] = {
-    MSR_AMD_FAM15H_EVNTSEL0,
-    MSR_AMD_FAM15H_EVNTSEL1,
-    MSR_AMD_FAM15H_EVNTSEL2,
-    MSR_AMD_FAM15H_EVNTSEL3,
-    MSR_AMD_FAM15H_EVNTSEL4,
-    MSR_AMD_FAM15H_EVNTSEL5
-};
-
-/* Bits [63:42], [39:36], 21 and 19 are reserved */
-#define CTRL_RSVD_MASK ((-1ULL & (~((1ULL << 42) - 1))) | \
-                        (0xfULL << 36) | (1ULL << 21) | (1ULL << 19))
-static uint64_t __read_mostly ctrl_rsvd[MAX_NUM_COUNTERS];
-
-/* Use private context as a flag for MSR bitmap */
-#define msr_bitmap_on(vpmu)    do {                                    \
-                                   (vpmu)->priv_context = (void *)-1L; \
-                               } while (0)
-#define msr_bitmap_off(vpmu)   do {                                    \
-                                   (vpmu)->priv_context = NULL;        \
-                               } while (0)
-#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
-
-static inline int get_pmu_reg_type(u32 addr, unsigned int *idx)
-{
-    if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) )
-    {
-        *idx = addr - MSR_K7_EVNTSEL0;
-        return MSR_TYPE_CTRL;
-    }
-
-    if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
-    {
-        *idx = addr - MSR_K7_PERFCTR0;
-        return MSR_TYPE_COUNTER;
-    }
-
-    if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
-         (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
-    {
-        *idx = (addr - MSR_AMD_FAM15H_EVNTSEL0) >> 1;
-        if (addr & 1)
-            return MSR_TYPE_COUNTER;
-        else
-            return MSR_TYPE_CTRL;
-    }
-
-    /* unsupported registers */
-    return -1;
-}
-
-static inline u32 get_fam15h_addr(u32 addr)
-{
-    switch ( addr )
-    {
-    case MSR_K7_PERFCTR0:
-        return MSR_AMD_FAM15H_PERFCTR0;
-    case MSR_K7_PERFCTR1:
-        return MSR_AMD_FAM15H_PERFCTR1;
-    case MSR_K7_PERFCTR2:
-        return MSR_AMD_FAM15H_PERFCTR2;
-    case MSR_K7_PERFCTR3:
-        return MSR_AMD_FAM15H_PERFCTR3;
-    case MSR_K7_EVNTSEL0:
-        return MSR_AMD_FAM15H_EVNTSEL0;
-    case MSR_K7_EVNTSEL1:
-        return MSR_AMD_FAM15H_EVNTSEL1;
-    case MSR_K7_EVNTSEL2:
-        return MSR_AMD_FAM15H_EVNTSEL2;
-    case MSR_K7_EVNTSEL3:
-        return MSR_AMD_FAM15H_EVNTSEL3;
-    default:
-        break;
-    }
-
-    return addr;
-}
-
-static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
-{
-    unsigned i;
-    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
-    memset(&ctxt->regs[0], 0, regs_sz);
-    for ( i = 0; i < num_counters; i++ )
-        ctrl_regs[i] = ctrl_rsvd[i];
-}
-
-static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
-{
-    unsigned int i;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-        svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE);
-        svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE);
-    }
-
-    msr_bitmap_on(vpmu);
-}
-
-static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
-{
-    unsigned int i;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-        svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW);
-        svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW);
-    }
-
-    msr_bitmap_off(vpmu);
-}
-
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
-    return 1;
-}
-
-static inline void context_load(struct vcpu *v)
-{
-    unsigned int i;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
-    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
-    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-        wrmsrl(counters[i], counter_regs[i]);
-        wrmsrl(ctrls[i], ctrl_regs[i]);
-    }
-}
-
-static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_amd_ctxt *ctxt;
-    uint64_t *ctrl_regs;
-    unsigned int i;
-
-    vpmu_reset(vpmu, VPMU_FROZEN);
-
-    if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-    {
-        ctxt = vpmu->context;
-        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
-        for ( i = 0; i < num_counters; i++ )
-            wrmsrl(ctrls[i], ctrl_regs[i]);
-
-        return 0;
-    }
-
-    if ( from_guest )
-    {
-        bool_t is_running = 0;
-        struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
-
-        ASSERT(!is_hvm_vcpu(v));
-
-        ctxt = vpmu->context;
-        ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
-        memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
-
-        for ( i = 0; i < num_counters; i++ )
-        {
-            if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
-            {
-                /*
-                 * Not necessary to re-init context since we should never load
-                 * it until guest provides valid values. But just to be safe.
-                 */
-                amd_vpmu_init_regs(ctxt);
-                return -EINVAL;
-            }
-
-            if ( is_pmu_enabled(ctrl_regs[i]) )
-                is_running = 1;
-        }
-
-        if ( is_running )
-            vpmu_set(vpmu, VPMU_RUNNING);
-        else
-            vpmu_reset(vpmu, VPMU_RUNNING);
-    }
-
-    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-
-    context_load(v);
-
-    return 0;
-}
-
-static inline void context_save(struct vcpu *v)
-{
-    unsigned int i;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
-    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
-
-    /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
-    for ( i = 0; i < num_counters; i++ )
-        rdmsrl(counters[i], counter_regs[i]);
-}
-
-static int amd_vpmu_save(struct vcpu *v,  bool_t to_guest)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    unsigned int i;
-
-    /* Stop the counters. */
-    for ( i = 0; i < num_counters; i++ )
-        wrmsrl(ctrls[i], 0);
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
-    {
-        vpmu_set(vpmu, VPMU_FROZEN);
-        return 0;
-    }
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        return 0;
-
-    context_save(v);
-
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
-         has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
-        amd_vpmu_unset_msr_bitmap(v);
-
-    if ( to_guest )
-    {
-        struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
-
-        ASSERT(!is_hvm_vcpu(v));
-        ctxt = vpmu->context;
-        guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
-        memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
-    }
-
-    return 1;
-}
-
-static void context_update(unsigned int msr, u64 msr_content)
-{
-    unsigned int i;
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
-    uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
-    uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
-    if ( k7_counters_mirrored &&
-        ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
-    {
-        msr = get_fam15h_addr(msr);
-    }
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-       if ( msr == ctrls[i] )
-       {
-           ctrl_regs[i] = msr_content;
-           return;
-       }
-        else if (msr == counters[i] )
-        {
-            counter_regs[i] = msr_content;
-            return;
-        }
-    }
-}
-
-static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
-                             uint64_t supported)
-{
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    unsigned int idx = 0;
-    int type = get_pmu_reg_type(msr, &idx);
-
-    ASSERT(!supported);
-
-    if ( (type == MSR_TYPE_CTRL ) &&
-         ((msr_content & CTRL_RSVD_MASK) != ctrl_rsvd[idx]) )
-        return -EINVAL;
-
-    /* For all counters, enable guest only mode for HVM guest */
-    if ( has_hvm_container_vcpu(v) && (type == MSR_TYPE_CTRL) &&
-         !is_guest_mode(msr_content) )
-    {
-        set_guest_mode(msr_content);
-    }
-
-    /* check if the first counter is enabled */
-    if ( (type == MSR_TYPE_CTRL) &&
-        is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
-    {
-        if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
-            return 0;
-        vpmu_set(vpmu, VPMU_RUNNING);
-
-        if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
-             amd_vpmu_set_msr_bitmap(v);
-    }
-
-    /* stop saving & restore if guest stops first counter */
-    if ( (type == MSR_TYPE_CTRL) &&
-        (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
-    {
-        vpmu_reset(vpmu, VPMU_RUNNING);
-        if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
-             amd_vpmu_unset_msr_bitmap(v);
-        release_pmu_ownship(PMU_OWNER_HVM);
-    }
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
-        || vpmu_is_set(vpmu, VPMU_FROZEN) )
-    {
-        context_load(v);
-        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-        vpmu_reset(vpmu, VPMU_FROZEN);
-    }
-
-    /* Update vpmu context immediately */
-    context_update(msr, msr_content);
-
-    /* Write to hw counters */
-    wrmsrl(msr, msr_content);
-    return 0;
-}
-
-static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
-        || vpmu_is_set(vpmu, VPMU_FROZEN) )
-    {
-        context_load(v);
-        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-        vpmu_reset(vpmu, VPMU_FROZEN);
-    }
-
-    rdmsrl(msr, *msr_content);
-
-    return 0;
-}
-
-static void amd_vpmu_destroy(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
-        amd_vpmu_unset_msr_bitmap(v);
-
-    xfree(vpmu->context);
-
-    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
-        release_pmu_ownship(PMU_OWNER_HVM);
-
-    vpmu_clear(vpmu);
-}
-
-/* VPMU part of the 'q' keyhandler */
-static void amd_vpmu_dump(const struct vcpu *v)
-{
-    const struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    const struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
-    const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
-    const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-    unsigned int i;
-
-    printk("    VPMU state: 0x%x ", vpmu->flags);
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-    {
-         printk("\n");
-         return;
-    }
-
-    printk("(");
-    if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) )
-        printk("PASSIVE_DOMAIN_ALLOCATED, ");
-    if ( vpmu_is_set(vpmu, VPMU_FROZEN) )
-        printk("FROZEN, ");
-    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
-        printk("SAVE, ");
-    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
-        printk("RUNNING, ");
-    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        printk("LOADED, ");
-    printk("ALLOCATED)\n");
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-        uint64_t ctrl, cntr;
-
-        rdmsrl(ctrls[i], ctrl);
-        rdmsrl(counters[i], cntr);
-        printk("      %#x: %#lx (%#lx in HW)    %#x: %#lx (%#lx in HW)\n",
-               ctrls[i], ctrl_regs[i], ctrl,
-               counters[i], counter_regs[i], cntr);
-    }
-}
-
-struct arch_vpmu_ops amd_vpmu_ops = {
-    .do_wrmsr = amd_vpmu_do_wrmsr,
-    .do_rdmsr = amd_vpmu_do_rdmsr,
-    .do_interrupt = amd_vpmu_do_interrupt,
-    .arch_vpmu_destroy = amd_vpmu_destroy,
-    .arch_vpmu_save = amd_vpmu_save,
-    .arch_vpmu_load = amd_vpmu_load,
-    .arch_vpmu_dump = amd_vpmu_dump
-};
-
-int svm_vpmu_initialise(struct vcpu *v)
-{
-    struct xen_pmu_amd_ctxt *ctxt;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu_mode == XENPMU_MODE_OFF )
-        return 0;
-
-    if ( !counters )
-        return -EINVAL;
-
-    ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
-    if ( !ctxt )
-    {
-        printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
-               " PMU feature is unavailable on domain %d vcpu %d.\n",
-               v->vcpu_id, v->domain->domain_id);
-        return -ENOMEM;
-    }
-
-    ctxt->counters = sizeof(*ctxt);
-    ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
-    amd_vpmu_init_regs(ctxt);
-
-    vpmu->context = ctxt;
-    vpmu->priv_context = NULL;
-
-    if ( !is_hvm_vcpu(v) )
-    {
-        /* Copy register offsets to shared area */
-        ASSERT(vpmu->xenpmu_data);
-        memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt,
-               offsetof(struct xen_pmu_amd_ctxt, regs));
-    }
-
-    vpmu->arch_vpmu_ops = &amd_vpmu_ops;
-
-    vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
-    return 0;
-}
-
-int __init amd_vpmu_init(void)
-{
-    unsigned int i;
-
-    switch ( current_cpu_data.x86 )
-    {
-    case 0x15:
-        num_counters = F15H_NUM_COUNTERS;
-        counters = AMD_F15H_COUNTERS;
-        ctrls = AMD_F15H_CTRLS;
-        k7_counters_mirrored = 1;
-        break;
-    case 0x10:
-    case 0x12:
-    case 0x14:
-    case 0x16:
-        num_counters = F10H_NUM_COUNTERS;
-        counters = AMD_F10H_COUNTERS;
-        ctrls = AMD_F10H_CTRLS;
-        k7_counters_mirrored = 0;
-        break;
-    default:
-        printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n",
-               current_cpu_data.x86);
-        return -EINVAL;
-    }
-
-    if ( sizeof(struct xen_pmu_data) +
-         2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
-    {
-        printk(XENLOG_WARNING
-               "VPMU: Register bank does not fit into VPMU shared page\n");
-        counters = ctrls = NULL;
-        num_counters = 0;
-        return -ENOSPC;
-    }
-
-    for ( i = 0; i < num_counters; i++ )
-    {
-        rdmsrl(ctrls[i], ctrl_rsvd[i]);
-        ctrl_rsvd[i] &= CTRL_RSVD_MASK;
-    }
-
-    regs_sz = 2 * sizeof(uint64_t) * num_counters;
-
-    return 0;
-}
-
index 32e649e0045c3d3f4ab85d7587349459defcc39e..8be5f8e0a59608f1ec7663eca7765911d85b784f 100644 (file)
 #include <asm/page.h>
 #include <asm/apic.h>
 #include <asm/io_apic.h>
+#include <asm/vpmu.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/nestedhvm.h>
-#include <asm/hvm/vpmu.h>
 #include <public/hvm/ioreq.h>
 #include <public/hvm/params.h>
 
index 373b3d91f529eb773ced06abd854a35839673b6a..04a29ce59d6c83aa6319f53142cb22a28803886e 100644 (file)
@@ -3,5 +3,4 @@ obj-y += intr.o
 obj-y += realmode.o
 obj-y += vmcs.o
 obj-y += vmx.o
-obj-y += vpmu_core2.o
 obj-y += vvmx.o
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
deleted file mode 100644 (file)
index 1206e90..0000000
+++ /dev/null
@@ -1,1041 +0,0 @@
-/*
- * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/xenoprof.h>
-#include <xen/irq.h>
-#include <asm/system.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/apic.h>
-#include <asm/traps.h>
-#include <asm/msr.h>
-#include <asm/msr-index.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vlapic.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <public/sched.h>
-#include <public/hvm/save.h>
-#include <public/pmu.h>
-#include <asm/hvm/vpmu.h>
-
-/*
- * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
- * instruction.
- * cpuid 0xa - Architectural Performance Monitoring Leaf
- * Register eax
- */
-#define PMU_VERSION_SHIFT        0  /* Version ID */
-#define PMU_VERSION_BITS         8  /* 8 bits 0..7 */
-#define PMU_VERSION_MASK         (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
-
-#define PMU_GENERAL_NR_SHIFT     8  /* Number of general pmu registers */
-#define PMU_GENERAL_NR_BITS      8  /* 8 bits 8..15 */
-#define PMU_GENERAL_NR_MASK      (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
-
-#define PMU_GENERAL_WIDTH_SHIFT 16  /* Width of general pmu registers */
-#define PMU_GENERAL_WIDTH_BITS   8  /* 8 bits 16..23 */
-#define PMU_GENERAL_WIDTH_MASK  (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
-/* Register edx */
-#define PMU_FIXED_NR_SHIFT       0  /* Number of fixed pmu registers */
-#define PMU_FIXED_NR_BITS        5  /* 5 bits 0..4 */
-#define PMU_FIXED_NR_MASK        (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
-
-#define PMU_FIXED_WIDTH_SHIFT    5  /* Width of fixed pmu registers */
-#define PMU_FIXED_WIDTH_BITS     8  /* 8 bits 5..12 */
-#define PMU_FIXED_WIDTH_MASK     (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
-
-/* Alias registers (0x4c1) for full-width writes to PMCs */
-#define MSR_PMC_ALIAS_MASK       (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
-static bool_t __read_mostly full_width_write;
-
-/* Intel-specific VPMU features */
-#define VPMU_CPU_HAS_DS                     0x100 /* Has Debug Store */
-#define VPMU_CPU_HAS_BTS                    0x200 /* Has Branch Trace Store */
-
-/*
- * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
- * counters. 4 bits for every counter.
- */
-#define FIXED_CTR_CTRL_BITS 4
-#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
-
-#define ARCH_CNTR_ENABLED   (1ULL << 22)
-
-/* Number of general-purpose and fixed performance counters */
-static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
-
-/* Masks used for testing whether and MSR is valid */
-#define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21))
-static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
-static uint64_t __read_mostly global_ovf_ctrl_mask;
-
-/* Total size of PMU registers block (copied to/from PV(H) guest) */
-static unsigned int __read_mostly regs_sz;
-/* Offset into context of the beginning of PMU register block */
-static const unsigned int regs_off =
-        sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
-        sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
-
-/*
- * QUIRK to workaround an issue on various family 6 cpus.
- * The issue leads to endless PMC interrupt loops on the processor.
- * If the interrupt handler is running and a pmc reaches the value 0, this
- * value remains forever and it triggers immediately a new interrupt after
- * finishing the handler.
- * A workaround is to read all flagged counters and if the value is 0 write
- * 1 (or another value != 0) into it.
- * There exist no errata and the real cause of this behaviour is unknown.
- */
-bool_t __read_mostly is_pmc_quirk;
-
-static void check_pmc_quirk(void)
-{
-    if ( current_cpu_data.x86 == 6 )
-        is_pmc_quirk = 1;
-    else
-        is_pmc_quirk = 0;    
-}
-
-static void handle_pmc_quirk(u64 msr_content)
-{
-    int i;
-    u64 val;
-
-    if ( !is_pmc_quirk )
-        return;
-
-    val = msr_content;
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-    {
-        if ( val & 0x1 )
-        {
-            u64 cnt;
-            rdmsrl(MSR_P6_PERFCTR(i), cnt);
-            if ( cnt == 0 )
-                wrmsrl(MSR_P6_PERFCTR(i), 1);
-        }
-        val >>= 1;
-    }
-    val = msr_content >> 32;
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-    {
-        if ( val & 0x1 )
-        {
-            u64 cnt;
-            rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
-            if ( cnt == 0 )
-                wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
-        }
-        val >>= 1;
-    }
-}
-
-/*
- * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
- */
-static int core2_get_arch_pmc_count(void)
-{
-    u32 eax;
-
-    eax = cpuid_eax(0xa);
-    return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
-}
-
-/*
- * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
- */
-static int core2_get_fixed_pmc_count(void)
-{
-    u32 eax;
-
-    eax = cpuid_eax(0xa);
-    return MASK_EXTR(eax, PMU_FIXED_NR_MASK);
-}
-
-/* edx bits 5-12: Bit width of fixed-function performance counters  */
-static int core2_get_bitwidth_fix_count(void)
-{
-    u32 edx;
-
-    edx = cpuid_edx(0xa);
-    return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
-}
-
-static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
-{
-    u32 msr_index_pmc;
-
-    switch ( msr_index )
-    {
-    case MSR_CORE_PERF_FIXED_CTR_CTRL:
-    case MSR_IA32_DS_AREA:
-    case MSR_IA32_PEBS_ENABLE:
-        *type = MSR_TYPE_CTRL;
-        return 1;
-
-    case MSR_CORE_PERF_GLOBAL_CTRL:
-    case MSR_CORE_PERF_GLOBAL_STATUS:
-    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-        *type = MSR_TYPE_GLOBAL;
-        return 1;
-
-    default:
-
-        if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
-             (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
-        {
-            *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
-            *type = MSR_TYPE_COUNTER;
-            return 1;
-        }
-
-        if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
-             (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
-        {
-            *index = msr_index - MSR_P6_EVNTSEL(0);
-            *type = MSR_TYPE_ARCH_CTRL;
-            return 1;
-        }
-
-        msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
-        if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
-             (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
-        {
-            *type = MSR_TYPE_ARCH_COUNTER;
-            *index = msr_index_pmc - MSR_IA32_PERFCTR0;
-            return 1;
-        }
-        return 0;
-    }
-}
-
-static inline int msraddr_to_bitpos(int x)
-{
-    ASSERT(x == (x & 0x1fff));
-    return x;
-}
-
-static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap)
-{
-    int i;
-
-    /* Allow Read/Write PMU Counters MSR Directly. */
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-    {
-        clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
-        clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
-                  msr_bitmap + 0x800/BYTES_PER_LONG);
-    }
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-    {
-        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
-        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i),
-                  msr_bitmap + 0x800/BYTES_PER_LONG);
-
-        if ( full_width_write )
-        {
-            clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
-            clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
-                      msr_bitmap + 0x800/BYTES_PER_LONG);
-        }
-    }
-
-    /* Allow Read PMU Non-global Controls Directly. */
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-         clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
-
-    clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
-    clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
-    clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
-}
-
-static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap)
-{
-    int i;
-
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-    {
-        set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
-        set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
-                msr_bitmap + 0x800/BYTES_PER_LONG);
-    }
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-    {
-        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap);
-        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i),
-                msr_bitmap + 0x800/BYTES_PER_LONG);
-
-        if ( full_width_write )
-        {
-            set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
-            set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
-                      msr_bitmap + 0x800/BYTES_PER_LONG);
-        }
-    }
-
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-        set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
-
-    set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
-    set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
-    set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
-}
-
-static inline void __core2_vpmu_save(struct vcpu *v)
-{
-    int i;
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
-    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
-    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
-        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-        rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-        rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
-
-    if ( !has_hvm_container_vcpu(v) )
-        rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
-}
-
-static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !has_hvm_container_vcpu(v) )
-        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-    if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
-        return 0;
-
-    __core2_vpmu_save(v);
-
-    /* Unset PMU MSR bitmap to trap lazy load. */
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
-         has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
-        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
-
-    if ( to_guest )
-    {
-        ASSERT(!is_hvm_vcpu(v));
-        memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
-               vpmu->context + regs_off, regs_sz);
-    }
-
-    return 1;
-}
-
-static inline void __core2_vpmu_load(struct vcpu *v)
-{
-    unsigned int i, pmc_start;
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
-    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
-    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
-        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-        wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
-
-    if ( full_width_write )
-        pmc_start = MSR_IA32_A_PERFCTR0;
-    else
-        pmc_start = MSR_IA32_PERFCTR0;
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-    {
-        wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
-        wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
-    }
-
-    wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
-    wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
-    wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable);
-
-    if ( !has_hvm_container_vcpu(v) )
-    {
-        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
-        core2_vpmu_cxt->global_ovf_ctrl = 0;
-        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
-    }
-}
-
-static int core2_vpmu_verify(struct vcpu *v)
-{
-    unsigned int i;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
-    uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
-    struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
-        vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-    uint64_t fixed_ctrl;
-    uint64_t *priv_context = vpmu->priv_context;
-    uint64_t enabled_cntrs = 0;
-
-    if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
-        return -EINVAL;
-
-    fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
-    if ( fixed_ctrl & fixed_ctrl_mask )
-        return -EINVAL;
-
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-    {
-        if ( fixed_counters[i] & fixed_counters_mask )
-            return -EINVAL;
-        if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
-            enabled_cntrs |= (1ULL << i);
-    }
-    enabled_cntrs <<= 32;
-
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-    {
-        uint64_t control = xen_pmu_cntr_pair[i].control;
-
-        if ( control & ARCH_CTRL_MASK )
-            return -EINVAL;
-        if ( control & ARCH_CNTR_ENABLED )
-            enabled_cntrs |= (1ULL << i);
-    }
-
-    if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
-         !is_canonical_address(core2_vpmu_cxt->ds_area) )
-        return -EINVAL;
-
-    if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
-         (core2_vpmu_cxt->ds_area != 0) )
-        vpmu_set(vpmu, VPMU_RUNNING);
-    else
-        vpmu_reset(vpmu, VPMU_RUNNING);
-
-    *priv_context = enabled_cntrs;
-
-    return 0;
-}
-
-static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        return 0;
-
-    if ( from_guest )
-    {
-        int ret;
-
-        ASSERT(!is_hvm_vcpu(v));
-
-        memcpy(vpmu->context + regs_off,
-               (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
-               regs_sz);
-
-        ret = core2_vpmu_verify(v);
-        if ( ret )
-        {
-            /*
-             * Not necessary since we should never load the context until
-             * guest provides valid values. But just to be safe.
-             */
-            memset(vpmu->context + regs_off, 0, regs_sz);
-            return ret;
-        }
-    }
-
-    vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-
-    __core2_vpmu_load(v);
-
-    return 0;
-}
-
-static int core2_vpmu_alloc_resource(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
-    uint64_t *p = NULL;
-
-    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
-        return 0;
-
-    if ( has_hvm_container_vcpu(v) )
-    {
-        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
-            goto out_err;
-
-        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
-            goto out_err;
-        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-    }
-
-    core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
-                                   sizeof(uint64_t) * fixed_pmc_cnt +
-                                   sizeof(struct xen_pmu_cntr_pair) *
-                                   arch_pmc_cnt);
-    p = xzalloc(uint64_t);
-    if ( !core2_vpmu_cxt || !p )
-        goto out_err;
-
-    core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
-    core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
-                                    sizeof(uint64_t) * fixed_pmc_cnt;
-
-    vpmu->context = core2_vpmu_cxt;
-    vpmu->priv_context = p;
-
-    if ( !is_hvm_vcpu(v) )
-    {
-        /* Copy fixed/arch register offsets to shared area */
-        ASSERT(vpmu->xenpmu_data);
-        memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
-    }
-
-    vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
-
-    return 1;
-
-out_err:
-    release_pmu_ownship(PMU_OWNER_HVM);
-
-    xfree(core2_vpmu_cxt);
-    xfree(p);
-
-    printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
-           v->vcpu_id, v->domain->domain_id);
-
-    return 0;
-}
-
-static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( !is_core2_vpmu_msr(msr_index, type, index) )
-        return 0;
-
-    if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
-         !core2_vpmu_alloc_resource(current) )
-        return 0;
-
-    /* Do the lazy load staff. */
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-    {
-        __core2_vpmu_load(current);
-        vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-        if ( has_hvm_container_vcpu(current) &&
-             cpu_has_vmx_msr_bitmap )
-            core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
-    }
-    return 1;
-}
-
-static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
-                               uint64_t supported)
-{
-    int i, tmp;
-    int type = -1, index = -1;
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
-    uint64_t *enabled_cntrs;
-
-    if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
-    {
-        /* Special handling for BTS */
-        if ( msr == MSR_IA32_DEBUGCTLMSR )
-        {
-            supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
-                         IA32_DEBUGCTLMSR_BTINT;
-
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
-                supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
-                             IA32_DEBUGCTLMSR_BTS_OFF_USR;
-            if ( !(msr_content & ~supported) &&
-                 vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
-                return 0;
-            if ( (msr_content & supported) &&
-                 !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
-                printk(XENLOG_G_WARNING
-                       "%pv: Debug Store unsupported on this CPU\n",
-                       current);
-        }
-        return -EINVAL;
-    }
-
-    ASSERT(!supported);
-
-    if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
-        /* Writing unsupported bits to a fixed counter */
-        return -EINVAL;
-
-    core2_vpmu_cxt = vpmu->context;
-    enabled_cntrs = vpmu->priv_context;
-    switch ( msr )
-    {
-    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-        if ( msr_content & global_ovf_ctrl_mask )
-            return -EINVAL;
-        core2_vpmu_cxt->global_status &= ~msr_content;
-        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
-        return 0;
-    case MSR_CORE_PERF_GLOBAL_STATUS:
-        gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
-                 "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
-        return -EINVAL;
-    case MSR_IA32_PEBS_ENABLE:
-        if ( msr_content & 1 )
-            gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, "
-                     "which is not supported.\n");
-        core2_vpmu_cxt->pebs_enable = msr_content;
-        return 0;
-    case MSR_IA32_DS_AREA:
-        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
-        {
-            if ( !is_canonical_address(msr_content) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
-                         msr_content);
-                return -EINVAL;
-            }
-            core2_vpmu_cxt->ds_area = msr_content;
-            break;
-        }
-        gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
-        return 0;
-    case MSR_CORE_PERF_GLOBAL_CTRL:
-        core2_vpmu_cxt->global_ctrl = msr_content;
-        break;
-    case MSR_CORE_PERF_FIXED_CTR_CTRL:
-        if ( msr_content & fixed_ctrl_mask )
-            return -EINVAL;
-
-        if ( has_hvm_container_vcpu(v) )
-            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
-                               &core2_vpmu_cxt->global_ctrl);
-        else
-            rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
-        *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
-        if ( msr_content != 0 )
-        {
-            u64 val = msr_content;
-            for ( i = 0; i < fixed_pmc_cnt; i++ )
-            {
-                if ( val & 3 )
-                    *enabled_cntrs |= (1ULL << 32) << i;
-                val >>= FIXED_CTR_CTRL_BITS;
-            }
-        }
-
-        core2_vpmu_cxt->fixed_ctrl = msr_content;
-        break;
-    default:
-        tmp = msr - MSR_P6_EVNTSEL(0);
-        if ( tmp >= 0 && tmp < arch_pmc_cnt )
-        {
-            struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
-                vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
-            if ( msr_content & ARCH_CTRL_MASK )
-                return -EINVAL;
-
-            if ( has_hvm_container_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
-                                   &core2_vpmu_cxt->global_ctrl);
-            else
-                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
-
-            if ( msr_content & ARCH_CNTR_ENABLED )
-                *enabled_cntrs |= 1ULL << tmp;
-            else
-                *enabled_cntrs &= ~(1ULL << tmp);
-
-            xen_pmu_cntr_pair[tmp].control = msr_content;
-        }
-    }
-
-    if ( type != MSR_TYPE_GLOBAL )
-        wrmsrl(msr, msr_content);
-    else
-    {
-        if ( has_hvm_container_vcpu(v) )
-            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
-        else
-            wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
-    }
-
-    if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
-         (core2_vpmu_cxt->ds_area != 0) )
-        vpmu_set(vpmu, VPMU_RUNNING);
-    else
-        vpmu_reset(vpmu, VPMU_RUNNING);
-
-    return 0;
-}
-
-static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
-    int type = -1, index = -1;
-    struct vcpu *v = current;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
-
-    if ( core2_vpmu_msr_common_check(msr, &type, &index) )
-    {
-        core2_vpmu_cxt = vpmu->context;
-        switch ( msr )
-        {
-        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-            *msr_content = 0;
-            break;
-        case MSR_CORE_PERF_GLOBAL_STATUS:
-            *msr_content = core2_vpmu_cxt->global_status;
-            break;
-        case MSR_CORE_PERF_GLOBAL_CTRL:
-            if ( has_hvm_container_vcpu(v) )
-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
-            else
-                rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
-            break;
-        default:
-            rdmsrl(msr, *msr_content);
-        }
-    }
-    else if ( msr == MSR_IA32_MISC_ENABLE )
-    {
-        /* Extension for BTS */
-        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
-            *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
-    }
-
-    return 0;
-}
-
-static void core2_vpmu_do_cpuid(unsigned int input,
-                                unsigned int *eax, unsigned int *ebx,
-                                unsigned int *ecx, unsigned int *edx)
-{
-    if (input == 0x1)
-    {
-        struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-        if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
-        {
-            /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */
-            *edx |= cpufeat_mask(X86_FEATURE_DS);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
-                *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
-                *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
-        }
-    }
-}
-
-/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
-static void core2_vpmu_dump(const struct vcpu *v)
-{
-    const struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    unsigned int i;
-    const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
-    u64 val;
-    uint64_t *fixed_counters;
-    struct xen_pmu_cntr_pair *cntr_pair;
-
-    if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-         return;
-
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
-    {
-        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-            printk("    vPMU loaded\n");
-        else
-            printk("    vPMU allocated\n");
-        return;
-    }
-
-    printk("    vPMU running\n");
-
-    cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-    fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
-
-    /* Print the contents of the counter and its configuration msr. */
-    for ( i = 0; i < arch_pmc_cnt; i++ )
-        printk("      general_%d: 0x%016lx ctrl: 0x%016lx\n",
-            i, cntr_pair[i].counter, cntr_pair[i].control);
-
-    /*
-     * The configuration of the fixed counter is 4 bits each in the
-     * MSR_CORE_PERF_FIXED_CTR_CTRL.
-     */
-    val = core2_vpmu_cxt->fixed_ctrl;
-    for ( i = 0; i < fixed_pmc_cnt; i++ )
-    {
-        printk("      fixed_%d:   0x%016lx ctrl: %#lx\n",
-               i, fixed_counters[i],
-               val & FIXED_CTR_CTRL_MASK);
-        val >>= FIXED_CTR_CTRL_BITS;
-    }
-}
-
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
-    struct vcpu *v = current;
-    u64 msr_content;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
-
-    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
-    if ( msr_content )
-    {
-        if ( is_pmc_quirk )
-            handle_pmc_quirk(msr_content);
-        core2_vpmu_cxt->global_status |= msr_content;
-        msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1);
-        wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
-    }
-    else
-    {
-        /* No PMC overflow but perhaps a Trace Message interrupt. */
-        __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
-        if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
-            return 0;
-    }
-
-    return 1;
-}
-
-static void core2_vpmu_destroy(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    xfree(vpmu->context);
-    xfree(vpmu->priv_context);
-    if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
-        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
-    release_pmu_ownship(PMU_OWNER_HVM);
-    vpmu_clear(vpmu);
-}
-
-struct arch_vpmu_ops core2_vpmu_ops = {
-    .do_wrmsr = core2_vpmu_do_wrmsr,
-    .do_rdmsr = core2_vpmu_do_rdmsr,
-    .do_interrupt = core2_vpmu_do_interrupt,
-    .do_cpuid = core2_vpmu_do_cpuid,
-    .arch_vpmu_destroy = core2_vpmu_destroy,
-    .arch_vpmu_save = core2_vpmu_save,
-    .arch_vpmu_load = core2_vpmu_load,
-    .arch_vpmu_dump = core2_vpmu_dump
-};
-
-static void core2_no_vpmu_do_cpuid(unsigned int input,
-                                unsigned int *eax, unsigned int *ebx,
-                                unsigned int *ecx, unsigned int *edx)
-{
-    /*
-     * As in this case the vpmu is not enabled reset some bits in the
-     * architectural performance monitoring related part.
-     */
-    if ( input == 0xa )
-    {
-        *eax &= ~PMU_VERSION_MASK;
-        *eax &= ~PMU_GENERAL_NR_MASK;
-        *eax &= ~PMU_GENERAL_WIDTH_MASK;
-
-        *edx &= ~PMU_FIXED_NR_MASK;
-        *edx &= ~PMU_FIXED_WIDTH_MASK;
-    }
-}
-
-/*
- * If its a vpmu msr set it to 0.
- */
-static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
-    int type = -1, index = -1;
-    if ( !is_core2_vpmu_msr(msr, &type, &index) )
-        return -EINVAL;
-    *msr_content = 0;
-    return 0;
-}
-
-/*
- * These functions are used in case vpmu is not enabled.
- */
-struct arch_vpmu_ops core2_no_vpmu_ops = {
-    .do_rdmsr = core2_no_vpmu_do_rdmsr,
-    .do_cpuid = core2_no_vpmu_do_cpuid,
-};
-
-int vmx_vpmu_initialise(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    u64 msr_content;
-    static bool_t ds_warned;
-
-    vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
-    if ( vpmu_mode == XENPMU_MODE_OFF )
-        return 0;
-
-    if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
-        return -EINVAL;
-
-    if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
-        goto func_out;
-    /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
-    while ( boot_cpu_has(X86_FEATURE_DS) )
-    {
-        if ( !boot_cpu_has(X86_FEATURE_DTES64) )
-        {
-            if ( !ds_warned )
-                printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
-                       " - Debug Store disabled for guests\n");
-            break;
-        }
-        vpmu_set(vpmu, VPMU_CPU_HAS_DS);
-        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
-        if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
-        {
-            /* If BTS_UNAVAIL is set reset the DS feature. */
-            vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
-            if ( !ds_warned )
-                printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
-                       " - Debug Store disabled for guests\n");
-            break;
-        }
-
-        vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
-        if ( !ds_warned )
-        {
-            if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
-                printk(XENLOG_G_INFO
-                       "vpmu: CPU doesn't support CPL-Qualified BTS\n");
-            printk("******************************************************\n");
-            printk("** WARNING: Emulation of BTS Feature is switched on **\n");
-            printk("** Using this processor feature in a virtualized    **\n");
-            printk("** environment is not 100%% safe.                    **\n");
-            printk("** Setting the DS buffer address with wrong values  **\n");
-            printk("** may lead to hypervisor hangs or crashes.         **\n");
-            printk("** It is NOT recommended for production use!        **\n");
-            printk("******************************************************\n");
-        }
-        break;
-    }
-    ds_warned = 1;
- func_out:
-
-    /* PV domains can allocate resources immediately */
-    if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
-        return -EIO;
-
-    vpmu->arch_vpmu_ops = &core2_vpmu_ops;
-
-    return 0;
-}
-
-int __init core2_vpmu_init(void)
-{
-    u64 caps;
-
-    if ( current_cpu_data.x86 != 6 )
-    {
-        printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
-        return -EINVAL;
-    }
-
-    switch ( current_cpu_data.x86_model )
-    {
-        /* Core2: */
-        case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-        case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-        case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-        case 0x1d: /* six-core 45 nm xeon "Dunnington" */
-
-        case 0x2a: /* SandyBridge */
-        case 0x2d: /* SandyBridge, "Romley-EP" */
-
-        /* Nehalem: */
-        case 0x1a: /* 45 nm nehalem, "Bloomfield" */
-        case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */
-        case 0x2e: /* 45 nm nehalem-ex, "Beckton" */
-
-        /* Westmere: */
-        case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */
-        case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */
-        case 0x2f: /* 32 nm Westmere-EX */
-
-        case 0x3a: /* IvyBridge */
-        case 0x3e: /* IvyBridge EP */
-
-        /* Haswell: */
-        case 0x3c:
-        case 0x3f:
-        case 0x45:
-        case 0x46:
-
-        /* Broadwell */
-        case 0x3d:
-        case 0x4f:
-        case 0x56:
-
-        /* future: */
-        case 0x4e:
-
-        /* next gen Xeon Phi */
-        case 0x57:
-            break;
-
-        default:
-            printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n",
-                   current_cpu_data.x86_model);
-            return -EINVAL;
-    }
-
-    arch_pmc_cnt = core2_get_arch_pmc_count();
-    fixed_pmc_cnt = core2_get_fixed_pmc_count();
-    rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
-    full_width_write = (caps >> 13) & 1;
-
-    fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
-    fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
-    global_ovf_ctrl_mask = ~(0xC000000000000000 |
-                             (((1ULL << fixed_pmc_cnt) - 1) << 32) |
-                             ((1ULL << arch_pmc_cnt) - 1));
-
-    regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
-              sizeof(uint64_t) * fixed_pmc_cnt +
-              sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
-
-    check_pmc_quirk();
-
-    if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
-         sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
-    {
-        printk(XENLOG_WARNING
-               "VPMU: Register bank does not fit into VPMU share page\n");
-        arch_pmc_cnt = fixed_pmc_cnt = 0;
-        return -ENOSPC;
-    }
-
-    return 0;
-}
-
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
deleted file mode 100644 (file)
index 55fe4c0..0000000
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/xenoprof.h>
-#include <xen/event.h>
-#include <xen/guest_access.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/nmi.h>
-#include <asm/p2m.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <asm/hvm/vpmu.h>
-#include <asm/hvm/svm/svm.h>
-#include <asm/hvm/svm/vmcb.h>
-#include <asm/apic.h>
-#include <public/pmu.h>
-#include <xsm/xsm.h>
-
-#include <compat/pmu.h>
-CHECK_pmu_cntr_pair;
-CHECK_pmu_data;
-CHECK_pmu_params;
-
-/*
- * "vpmu" :     vpmu generally enabled
- * "vpmu=off" : vpmu generally disabled
- * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
- */
-static unsigned int __read_mostly opt_vpmu_enabled;
-unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
-unsigned int __read_mostly vpmu_features = 0;
-static void parse_vpmu_param(char *s);
-custom_param("vpmu", parse_vpmu_param);
-
-static DEFINE_SPINLOCK(vpmu_lock);
-static unsigned vpmu_count;
-
-static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
-
-static void __init parse_vpmu_param(char *s)
-{
-    switch ( parse_bool(s) )
-    {
-    case 0:
-        break;
-    default:
-        if ( !strcmp(s, "bts") )
-            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
-        else if ( *s )
-        {
-            printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
-            break;
-        }
-        /* fall through */
-    case 1:
-        /* Default VPMU mode */
-        vpmu_mode = XENPMU_MODE_SELF;
-        opt_vpmu_enabled = 1;
-        break;
-    }
-}
-
-void vpmu_lvtpc_update(uint32_t val)
-{
-    struct vpmu_struct *vpmu;
-    struct vcpu *curr = current;
-
-    if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
-        return;
-
-    vpmu = vcpu_vpmu(curr);
-
-    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
-
-    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
-    if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
-         !vpmu_is_set(vpmu, VPMU_CACHED) )
-        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
-}
-
-int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
-                uint64_t supported, bool_t is_write)
-{
-    struct vcpu *curr = current;
-    struct vpmu_struct *vpmu;
-    const struct arch_vpmu_ops *ops;
-    int ret = 0;
-
-    if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
-         ((vpmu_mode & XENPMU_MODE_ALL) &&
-          !is_hardware_domain(current->domain)) )
-         goto nop;
-
-    vpmu = vcpu_vpmu(curr);
-    ops = vpmu->arch_vpmu_ops;
-    if ( !ops )
-        goto nop;
-
-    if ( is_write && ops->do_wrmsr )
-        ret = ops->do_wrmsr(msr, *msr_content, supported);
-    else if ( !is_write && ops->do_rdmsr )
-        ret = ops->do_rdmsr(msr, msr_content);
-    else
-        goto nop;
-
-    /*
-     * We may have received a PMU interrupt while handling MSR access
-     * and since do_wr/rdmsr may load VPMU context we should save
-     * (and unload) it again.
-     */
-    if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
-        vpmu_is_set(vpmu, VPMU_CACHED) )
-    {
-        vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
-        ops->arch_vpmu_save(curr, 0);
-        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-    }
-
-    return ret;
-
- nop:
-    if ( !is_write )
-        *msr_content = 0;
-
-    return 0;
-}
-
-static inline struct vcpu *choose_hwdom_vcpu(void)
-{
-    unsigned idx;
-
-    if ( hardware_domain->max_vcpus == 0 )
-        return NULL;
-
-    idx = smp_processor_id() % hardware_domain->max_vcpus;
-
-    return hardware_domain->vcpu[idx];
-}
-
-void vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
-    struct vcpu *sampled = current, *sampling;
-    struct vpmu_struct *vpmu;
-    struct vlapic *vlapic;
-    u32 vlapic_lvtpc;
-
-    /*
-     * dom0 will handle interrupt for special domains (e.g. idle domain) or,
-     * in XENPMU_MODE_ALL, for everyone.
-     */
-    if ( (vpmu_mode & XENPMU_MODE_ALL) ||
-         (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
-    {
-        sampling = choose_hwdom_vcpu();
-        if ( !sampling )
-            return;
-    }
-    else
-        sampling = sampled;
-
-    vpmu = vcpu_vpmu(sampling);
-    if ( !vpmu->arch_vpmu_ops )
-        return;
-
-    /* PV(H) guest */
-    if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) )
-    {
-        const struct cpu_user_regs *cur_regs;
-        uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
-        domid_t domid;
-
-        if ( !vpmu->xenpmu_data )
-            return;
-
-        if ( is_pvh_vcpu(sampling) &&
-             !(vpmu_mode & XENPMU_MODE_ALL) &&
-             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
-            return;
-
-        if ( vpmu_is_set(vpmu, VPMU_CACHED) )
-            return;
-
-        /* PV guest will be reading PMU MSRs from xenpmu_data */
-        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
-        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-
-        if ( has_hvm_container_vcpu(sampled) )
-            *flags = 0;
-        else
-            *flags = PMU_SAMPLE_PV;
-
-        if ( sampled == sampling )
-            domid = DOMID_SELF;
-        else
-            domid = sampled->domain->domain_id;
-
-        /* Store appropriate registers in xenpmu_data */
-        /* FIXME: 32-bit PVH should go here as well */
-        if ( is_pv_32bit_vcpu(sampling) )
-        {
-            /*
-             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
-             * and therefore we treat it the same way as a non-privileged
-             * PV 32-bit domain.
-             */
-            struct compat_pmu_regs *cmp;
-
-            cur_regs = guest_cpu_user_regs();
-
-            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
-            cmp->ip = cur_regs->rip;
-            cmp->sp = cur_regs->rsp;
-            cmp->flags = cur_regs->eflags;
-            cmp->ss = cur_regs->ss;
-            cmp->cs = cur_regs->cs;
-            if ( (cmp->cs & 3) > 1 )
-                *flags |= PMU_SAMPLE_USER;
-        }
-        else
-        {
-            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
-
-            if ( (vpmu_mode & XENPMU_MODE_SELF) )
-                cur_regs = guest_cpu_user_regs();
-            else if ( !guest_mode(regs) &&
-                      is_hardware_domain(sampling->domain) )
-            {
-                cur_regs = regs;
-                domid = DOMID_XEN;
-            }
-            else
-                cur_regs = guest_cpu_user_regs();
-
-            r->ip = cur_regs->rip;
-            r->sp = cur_regs->rsp;
-            r->flags = cur_regs->eflags;
-
-            if ( !has_hvm_container_vcpu(sampled) )
-            {
-                r->ss = cur_regs->ss;
-                r->cs = cur_regs->cs;
-                if ( !(sampled->arch.flags & TF_kernel_mode) )
-                    *flags |= PMU_SAMPLE_USER;
-            }
-            else
-            {
-                struct segment_register seg;
-
-                hvm_get_segment_register(sampled, x86_seg_cs, &seg);
-                r->cs = seg.sel;
-                hvm_get_segment_register(sampled, x86_seg_ss, &seg);
-                r->ss = seg.sel;
-                r->cpl = seg.attr.fields.dpl;
-                if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
-                    *flags |= PMU_SAMPLE_REAL;
-            }
-        }
-
-        vpmu->xenpmu_data->domain_id = domid;
-        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
-        if ( is_hardware_domain(sampling->domain) )
-            vpmu->xenpmu_data->pcpu_id = smp_processor_id();
-        else
-            vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
-
-        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
-        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
-        *flags |= PMU_CACHED;
-        vpmu_set(vpmu, VPMU_CACHED);
-
-        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
-
-        return;
-    }
-
-    /* HVM guests */
-    vlapic = vcpu_vlapic(sampling);
-
-    /* We don't support (yet) HVM dom0 */
-    ASSERT(sampling == sampled);
-
-    if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
-         !is_vlapic_lvtpc_enabled(vlapic) )
-        return;
-
-    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
-
-    switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
-    {
-    case APIC_MODE_FIXED:
-        vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
-        break;
-    case APIC_MODE_NMI:
-        sampling->nmi_pending = 1;
-        break;
-    }
-}
-
-void vpmu_do_cpuid(unsigned int input,
-                   unsigned int *eax, unsigned int *ebx,
-                   unsigned int *ecx, unsigned int *edx)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
-    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid )
-        vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx);
-}
-
-static void vpmu_save_force(void *arg)
-{
-    struct vcpu *v = (struct vcpu *)arg;
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        return;
-
-    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
-
-    if ( vpmu->arch_vpmu_ops )
-        (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
-
-    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
-
-    per_cpu(last_vcpu, smp_processor_id()) = NULL;
-}
-
-void vpmu_save(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    int pcpu = smp_processor_id();
-
-    if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
-       return;
-
-    vpmu->last_pcpu = pcpu;
-    per_cpu(last_vcpu, pcpu) = v;
-
-    if ( vpmu->arch_vpmu_ops )
-        if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
-            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-
-    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
-}
-
-int vpmu_load(struct vcpu *v, bool_t from_guest)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    int pcpu = smp_processor_id();
-    struct vcpu *prev = NULL;
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-        return 0;
-
-    /* First time this VCPU is running here */
-    if ( vpmu->last_pcpu != pcpu )
-    {
-        /*
-         * Get the context from last pcpu that we ran on. Note that if another
-         * VCPU is running there it must have saved this VPCU's context before
-         * startig to run (see below).
-         * There should be no race since remote pcpu will disable interrupts
-         * before saving the context.
-         */
-        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        {
-            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
-                             vpmu_save_force, (void *)v, 1);
-            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-        }
-    } 
-
-    /* Prevent forced context save from remote CPU */
-    local_irq_disable();
-
-    prev = per_cpu(last_vcpu, pcpu);
-
-    if ( prev != v && prev )
-    {
-        vpmu = vcpu_vpmu(prev);
-
-        /* Someone ran here before us */
-        vpmu_save_force(prev);
-        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-
-        vpmu = vcpu_vpmu(v);
-    }
-
-    local_irq_enable();
-
-    /* Only when PMU is counting, we load PMU context immediately. */
-    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
-         (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
-        return 0;
-
-    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
-    {
-        int ret;
-
-        apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
-        /* Arch code needs to set VPMU_CONTEXT_LOADED */
-        ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
-        if ( ret )
-        {
-            apic_write_around(APIC_LVTPC,
-                              vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-void vpmu_initialise(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    uint8_t vendor = current_cpu_data.x86_vendor;
-    int ret;
-    bool_t is_priv_vpmu = is_hardware_domain(v->domain);
-
-    BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
-    BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
-    BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
-    BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
-
-    ASSERT(!vpmu->flags && !vpmu->context);
-
-    if ( !is_priv_vpmu )
-    {
-        /*
-         * Count active VPMUs so that we won't try to change vpmu_mode while
-         * they are in use.
-         * vpmu_mode can be safely updated while dom0's VPMUs are active and
-         * so we don't need to include it in the count.
-         */
-        spin_lock(&vpmu_lock);
-        vpmu_count++;
-        spin_unlock(&vpmu_lock);
-    }
-
-    switch ( vendor )
-    {
-    case X86_VENDOR_AMD:
-        ret = svm_vpmu_initialise(v);
-        break;
-
-    case X86_VENDOR_INTEL:
-        ret = vmx_vpmu_initialise(v);
-        break;
-
-    default:
-        if ( vpmu_mode != XENPMU_MODE_OFF )
-        {
-            printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
-                   "Disabling VPMU\n", vendor);
-            opt_vpmu_enabled = 0;
-            vpmu_mode = XENPMU_MODE_OFF;
-        }
-        return; /* Don't bother restoring vpmu_count, VPMU is off forever */
-    }
-
-    if ( ret )
-        printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
-
-    /* Intel needs to initialize VPMU ops even if VPMU is not in use */
-    if ( !is_priv_vpmu &&
-         (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
-          (vpmu_mode == XENPMU_MODE_ALL)) )
-    {
-        spin_lock(&vpmu_lock);
-        vpmu_count--;
-        spin_unlock(&vpmu_lock);
-    }
-}
-
-static void vpmu_clear_last(void *arg)
-{
-    if ( this_cpu(last_vcpu) == arg )
-        this_cpu(last_vcpu) = NULL;
-}
-
-void vpmu_destroy(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-        return;
-
-    /*
-     * Need to clear last_vcpu in case it points to v.
-     * We can check here non-atomically whether it is 'v' since
-     * last_vcpu can never become 'v' again at this point.
-     * We will test it again in vpmu_clear_last() with interrupts
-     * disabled to make sure we don't clear someone else.
-     */
-    if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v )
-        on_selected_cpus(cpumask_of(vpmu->last_pcpu),
-                         vpmu_clear_last, v, 1);
-
-    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
-    {
-        /* Unload VPMU first. This will stop counters */
-        on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
-                         vpmu_save_force, v, 1);
-         vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
-    }
-
-    spin_lock(&vpmu_lock);
-    if ( !is_hardware_domain(v->domain) )
-        vpmu_count--;
-    spin_unlock(&vpmu_lock);
-}
-
-static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
-{
-    struct vcpu *v;
-    struct vpmu_struct *vpmu;
-    struct page_info *page;
-    uint64_t gfn = params->val;
-
-    if ( (vpmu_mode == XENPMU_MODE_OFF) ||
-         ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
-        return -EINVAL;
-
-    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
-        return -EINVAL;
-
-    page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
-    if ( !page )
-        return -EINVAL;
-
-    if ( !get_page_type(page, PGT_writable_page) )
-    {
-        put_page(page);
-        return -EINVAL;
-    }
-
-    v = d->vcpu[params->vcpu];
-    vpmu = vcpu_vpmu(v);
-
-    spin_lock(&vpmu->vpmu_lock);
-
-    if ( v->arch.vpmu.xenpmu_data )
-    {
-        spin_unlock(&vpmu->vpmu_lock);
-        put_page_and_type(page);
-        return -EEXIST;
-    }
-
-    v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
-    if ( !v->arch.vpmu.xenpmu_data )
-    {
-        spin_unlock(&vpmu->vpmu_lock);
-        put_page_and_type(page);
-        return -ENOMEM;
-    }
-
-    vpmu_initialise(v);
-
-    spin_unlock(&vpmu->vpmu_lock);
-
-    return 0;
-}
-
-static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
-{
-    struct vcpu *v;
-    struct vpmu_struct *vpmu;
-    uint64_t mfn;
-    void *xenpmu_data;
-
-    if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
-        return;
-
-    v = d->vcpu[params->vcpu];
-    if ( v != current )
-        vcpu_pause(v);
-
-    vpmu = vcpu_vpmu(v);
-    spin_lock(&vpmu->vpmu_lock);
-
-    vpmu_destroy(v);
-    xenpmu_data = vpmu->xenpmu_data;
-    vpmu->xenpmu_data = NULL;
-
-    spin_unlock(&vpmu->vpmu_lock);
-
-    if ( xenpmu_data )
-    {
-        mfn = domain_page_map_to_mfn(xenpmu_data);
-        ASSERT(mfn_valid(mfn));
-        unmap_domain_page_global(xenpmu_data);
-        put_page_and_type(mfn_to_page(mfn));
-    }
-
-    if ( v != current )
-        vcpu_unpause(v);
-}
-
-/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
-void vpmu_dump(struct vcpu *v)
-{
-    struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
-    if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
-        vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
-}
-
-long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
-{
-    int ret;
-    struct vcpu *curr;
-    struct xen_pmu_params pmu_params = {.val = 0};
-    struct xen_pmu_data *xenpmu_data;
-    struct vpmu_struct *vpmu;
-
-    if ( !opt_vpmu_enabled )
-        return -EOPNOTSUPP;
-
-    ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
-    if ( ret )
-        return ret;
-
-    /* Check major version when parameters are specified */
-    switch ( op )
-    {
-    case XENPMU_mode_set:
-    case XENPMU_feature_set:
-    case XENPMU_init:
-    case XENPMU_finish:
-        if ( copy_from_guest(&pmu_params, arg, 1) )
-            return -EFAULT;
-
-        if ( pmu_params.version.maj != XENPMU_VER_MAJ )
-            return -EINVAL;
-    }
-
-    switch ( op )
-    {
-    case XENPMU_mode_set:
-    {
-        if ( (pmu_params.val &
-              ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
-             (hweight64(pmu_params.val) > 1) )
-            return -EINVAL;
-
-        /* 32-bit dom0 can only sample itself. */
-        if ( is_pv_32bit_vcpu(current) &&
-             (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
-            return -EINVAL;
-
-        spin_lock(&vpmu_lock);
-
-        /*
-         * We can always safely switch between XENPMU_MODE_SELF and
-         * XENPMU_MODE_HV while other VPMUs are active.
-         */
-        if ( (vpmu_count == 0) ||
-             ((vpmu_mode ^ pmu_params.val) ==
-              (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
-            vpmu_mode = pmu_params.val;
-        else if ( vpmu_mode != pmu_params.val )
-        {
-            printk(XENLOG_WARNING
-                   "VPMU: Cannot change mode while active VPMUs exist\n");
-            ret = -EBUSY;
-        }
-
-        spin_unlock(&vpmu_lock);
-
-        break;
-    }
-
-    case XENPMU_mode_get:
-        memset(&pmu_params, 0, sizeof(pmu_params));
-        pmu_params.val = vpmu_mode;
-
-        pmu_params.version.maj = XENPMU_VER_MAJ;
-        pmu_params.version.min = XENPMU_VER_MIN;
-
-        if ( copy_to_guest(arg, &pmu_params, 1) )
-            ret = -EFAULT;
-
-        break;
-
-    case XENPMU_feature_set:
-        if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
-            return -EINVAL;
-
-        spin_lock(&vpmu_lock);
-
-        if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
-            vpmu_features = pmu_params.val;
-        else
-        {
-            printk(XENLOG_WARNING "VPMU: Cannot change features while"
-                                  " active VPMUs exist\n");
-            ret = -EBUSY;
-        }
-
-        spin_unlock(&vpmu_lock);
-
-        break;
-
-    case XENPMU_feature_get:
-        pmu_params.val = vpmu_features;
-        if ( copy_field_to_guest(arg, &pmu_params, val) )
-            ret = -EFAULT;
-
-        break;
-
-    case XENPMU_init:
-        ret = pvpmu_init(current->domain, &pmu_params);
-        break;
-
-    case XENPMU_finish:
-        pvpmu_finish(current->domain, &pmu_params);
-        break;
-
-    case XENPMU_lvtpc_set:
-        xenpmu_data = current->arch.vpmu.xenpmu_data;
-        if ( xenpmu_data != NULL )
-            vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
-        else
-            ret = -EINVAL;
-        break;
-
-    case XENPMU_flush:
-        curr = current;
-        vpmu = vcpu_vpmu(curr);
-        xenpmu_data = curr->arch.vpmu.xenpmu_data;
-        if ( xenpmu_data == NULL )
-            return -EINVAL;
-        xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
-        vpmu_reset(vpmu, VPMU_CACHED);
-        vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
-        if ( vpmu_load(curr, 1) )
-        {
-            xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
-            vpmu_set(vpmu, VPMU_CACHED);
-            ret = -EIO;
-        }
-        break ;
-
-    default:
-        ret = -EINVAL;
-    }
-
-    return ret;
-}
-
-static int __init vpmu_init(void)
-{
-    int vendor = current_cpu_data.x86_vendor;
-
-    if ( !opt_vpmu_enabled )
-    {
-        printk(XENLOG_INFO "VPMU: disabled\n");
-        return 0;
-    }
-
-    /* NMI watchdog uses LVTPC and HW counter */
-    if ( opt_watchdog && opt_vpmu_enabled )
-    {
-        printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
-        opt_vpmu_enabled = 0;
-        vpmu_mode = XENPMU_MODE_OFF;
-        return 0;
-    }
-
-    switch ( vendor )
-    {
-    case X86_VENDOR_AMD:
-        if ( amd_vpmu_init() )
-           vpmu_mode = XENPMU_MODE_OFF;
-        break;
-    case X86_VENDOR_INTEL:
-        if ( core2_vpmu_init() )
-           vpmu_mode = XENPMU_MODE_OFF;
-        break;
-    default:
-        printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
-               "Turning VPMU off.\n", vendor);
-        vpmu_mode = XENPMU_MODE_OFF;
-        break;
-    }
-
-    if ( vpmu_mode != XENPMU_MODE_OFF )
-        printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
-               __stringify(XENPMU_VER_MIN) "\n");
-    else
-        opt_vpmu_enabled = 0;
-
-    return 0;
-}
-__initcall(vpmu_init);
index ca429a13d8ccedc6067f09b16fe8192d2a7d147f..89649d0aafdd1a6c6db61327f9fbd2608c98fd21 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/processor.h>
 #include <asm/regs.h>
 #include <asm/current.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
index 2d21846463f039a8583509b48b1fda247f3d439e..37e459f2f22ef33a48f3951d1bdfdcba0ea97eee 100644 (file)
@@ -72,7 +72,7 @@
 #include <asm/apic.h>
 #include <asm/mc146818rtc.h>
 #include <asm/hpet.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
 #include <public/arch-x86/cpuid.h>
 #include <xsm/xsm.h>
 
index 1104bdab47876c01d816693997979c7b232b9009..31326447f7da7148a350d386d7ad5eca622f1709 100644 (file)
@@ -19,8 +19,8 @@
 #ifndef __ASM_X86_HVM_VMX_VMCS_H__
 #define __ASM_X86_HVM_VMX_VMCS_H__
 
+#include <asm/vpmu.h>
 #include <asm/hvm/io.h>
-#include <asm/hvm/vpmu.h>
 #include <irq_vectors.h>
 
 extern void vmcs_dump_vcpu(struct vcpu *v);
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
deleted file mode 100644 (file)
index 212e496..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * vpmu.h: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-
-#ifndef __ASM_X86_HVM_VPMU_H_
-#define __ASM_X86_HVM_VPMU_H_
-
-#include <public/pmu.h>
-
-#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.vpmu)
-#define vpmu_vcpu(vpmu)   container_of((vpmu), struct vcpu, arch.vpmu)
-
-#define MSR_TYPE_COUNTER            0
-#define MSR_TYPE_CTRL               1
-#define MSR_TYPE_GLOBAL             2
-#define MSR_TYPE_ARCH_COUNTER       3
-#define MSR_TYPE_ARCH_CTRL          4
-
-/* Start of PMU register bank */
-#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \
-                                                 (uintptr_t)ctxt->offset))
-
-/* Arch specific operations shared by all vpmus */
-struct arch_vpmu_ops {
-    int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
-                    uint64_t supported);
-    int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
-    int (*do_interrupt)(struct cpu_user_regs *regs);
-    void (*do_cpuid)(unsigned int input,
-                     unsigned int *eax, unsigned int *ebx,
-                     unsigned int *ecx, unsigned int *edx);
-    void (*arch_vpmu_destroy)(struct vcpu *v);
-    int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
-    int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
-    void (*arch_vpmu_dump)(const struct vcpu *);
-};
-
-int core2_vpmu_init(void);
-int vmx_vpmu_initialise(struct vcpu *);
-int amd_vpmu_init(void);
-int svm_vpmu_initialise(struct vcpu *);
-
-struct vpmu_struct {
-    u32 flags;
-    u32 last_pcpu;
-    u32 hw_lapic_lvtpc;
-    void *context;      /* May be shared with PV guest */
-    void *priv_context; /* hypervisor-only */
-    struct arch_vpmu_ops *arch_vpmu_ops;
-    struct xen_pmu_data *xenpmu_data;
-    spinlock_t vpmu_lock;
-};
-
-/* VPMU states */
-#define VPMU_CONTEXT_ALLOCATED              0x1
-#define VPMU_CONTEXT_LOADED                 0x2
-#define VPMU_RUNNING                        0x4
-#define VPMU_CONTEXT_SAVE                   0x8   /* Force context save */
-#define VPMU_FROZEN                         0x10  /* Stop counters while VCPU is not running */
-#define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
-/* PV(H) guests: VPMU registers are accessed by guest from shared page */
-#define VPMU_CACHED                         0x40
-
-static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
-{
-    vpmu->flags |= mask;
-}
-static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask)
-{
-    vpmu->flags &= ~mask;
-}
-static inline void vpmu_clear(struct vpmu_struct *vpmu)
-{
-    vpmu->flags = 0;
-}
-static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask)
-{
-    return !!(vpmu->flags & mask);
-}
-static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
-                                      const u32 mask)
-{
-    return !!((vpmu->flags & mask) == mask);
-}
-
-void vpmu_lvtpc_update(uint32_t val);
-int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
-                uint64_t supported, bool_t is_write);
-void vpmu_do_interrupt(struct cpu_user_regs *regs);
-void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
-                                       unsigned int *ecx, unsigned int *edx);
-void vpmu_initialise(struct vcpu *v);
-void vpmu_destroy(struct vcpu *v);
-void vpmu_save(struct vcpu *v);
-int vpmu_load(struct vcpu *v, bool_t from_guest);
-void vpmu_dump(struct vcpu *v);
-
-static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
-                                uint64_t supported)
-{
-    return vpmu_do_msr(msr, &msr_content, supported, 1);
-}
-static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
-    return vpmu_do_msr(msr, msr_content, 0, 0);
-}
-
-extern int acquire_pmu_ownership(int pmu_ownership);
-extern void release_pmu_ownership(int pmu_ownership);
-
-extern unsigned int vpmu_mode;
-extern unsigned int vpmu_features;
-
-/* Context switch */
-static inline void vpmu_switch_from(struct vcpu *prev)
-{
-    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
-        vpmu_save(prev);
-}
-
-static inline void vpmu_switch_to(struct vcpu *next)
-{
-    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
-        vpmu_load(next, 0);
-}
-
-#endif /* __ASM_X86_HVM_VPMU_H_*/
-
diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h
new file mode 100644 (file)
index 0000000..212e496
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * vpmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#include <public/pmu.h>
+
+#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.vpmu)
+#define vpmu_vcpu(vpmu)   container_of((vpmu), struct vcpu, arch.vpmu)
+
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+/* Start of PMU register bank */
+#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \
+                                                 (uintptr_t)ctxt->offset))
+
+/* Arch specific operations shared by all vpmus */
+struct arch_vpmu_ops {
+    int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
+                    uint64_t supported);
+    int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
+    int (*do_interrupt)(struct cpu_user_regs *regs);
+    void (*do_cpuid)(unsigned int input,
+                     unsigned int *eax, unsigned int *ebx,
+                     unsigned int *ecx, unsigned int *edx);
+    void (*arch_vpmu_destroy)(struct vcpu *v);
+    int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
+    int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
+    void (*arch_vpmu_dump)(const struct vcpu *);
+};
+
+int core2_vpmu_init(void);
+int vmx_vpmu_initialise(struct vcpu *);
+int amd_vpmu_init(void);
+int svm_vpmu_initialise(struct vcpu *);
+
+struct vpmu_struct {
+    u32 flags;
+    u32 last_pcpu;
+    u32 hw_lapic_lvtpc;
+    void *context;      /* May be shared with PV guest */
+    void *priv_context; /* hypervisor-only */
+    struct arch_vpmu_ops *arch_vpmu_ops;
+    struct xen_pmu_data *xenpmu_data;
+    spinlock_t vpmu_lock;
+};
+
+/* VPMU states */
+#define VPMU_CONTEXT_ALLOCATED              0x1
+#define VPMU_CONTEXT_LOADED                 0x2
+#define VPMU_RUNNING                        0x4
+#define VPMU_CONTEXT_SAVE                   0x8   /* Force context save */
+#define VPMU_FROZEN                         0x10  /* Stop counters while VCPU is not running */
+#define VPMU_PASSIVE_DOMAIN_ALLOCATED       0x20
+/* PV(H) guests: VPMU registers are accessed by guest from shared page */
+#define VPMU_CACHED                         0x40
+
+static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
+{
+    vpmu->flags |= mask;
+}
+static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask)
+{
+    vpmu->flags &= ~mask;
+}
+static inline void vpmu_clear(struct vpmu_struct *vpmu)
+{
+    vpmu->flags = 0;
+}
+static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask)
+{
+    return !!(vpmu->flags & mask);
+}
+static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
+                                      const u32 mask)
+{
+    return !!((vpmu->flags & mask) == mask);
+}
+
+void vpmu_lvtpc_update(uint32_t val);
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+                uint64_t supported, bool_t is_write);
+void vpmu_do_interrupt(struct cpu_user_regs *regs);
+void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
+                                       unsigned int *ecx, unsigned int *edx);
+void vpmu_initialise(struct vcpu *v);
+void vpmu_destroy(struct vcpu *v);
+void vpmu_save(struct vcpu *v);
+int vpmu_load(struct vcpu *v, bool_t from_guest);
+void vpmu_dump(struct vcpu *v);
+
+static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+                                uint64_t supported)
+{
+    return vpmu_do_msr(msr, &msr_content, supported, 1);
+}
+static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+    return vpmu_do_msr(msr, msr_content, 0, 0);
+}
+
+extern int acquire_pmu_ownership(int pmu_ownership);
+extern void release_pmu_ownership(int pmu_ownership);
+
+extern unsigned int vpmu_mode;
+extern unsigned int vpmu_features;
+
+/* Context switch */
+static inline void vpmu_switch_from(struct vcpu *prev)
+{
+    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+        vpmu_save(prev);
+}
+
+static inline void vpmu_switch_to(struct vcpu *next)
+{
+    if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+        vpmu_load(next, 0);
+}
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+