obj-y += intel.o
obj-y += intel_cacheinfo.o
obj-y += mwait-idle.o
+obj-y += vpmu.o vpmu_amd.o vpmu_intel.o
--- /dev/null
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/xenoprof.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/nmi.h>
+#include <asm/p2m.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/apic.h>
+#include <public/pmu.h>
+#include <xsm/xsm.h>
+
+#include <compat/pmu.h>
+CHECK_pmu_cntr_pair;
+CHECK_pmu_data;
+CHECK_pmu_params;
+
+/*
+ * "vpmu" : vpmu generally enabled
+ * "vpmu=off" : vpmu generally disabled
+ * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
+ */
+static unsigned int __read_mostly opt_vpmu_enabled;
+unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
+unsigned int __read_mostly vpmu_features = 0;
+static void parse_vpmu_param(char *s);
+custom_param("vpmu", parse_vpmu_param);
+
+static DEFINE_SPINLOCK(vpmu_lock);
+static unsigned vpmu_count;
+
+static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+
+static void __init parse_vpmu_param(char *s)
+{
+ switch ( parse_bool(s) )
+ {
+ case 0:
+ break;
+ default:
+ if ( !strcmp(s, "bts") )
+ vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+ else if ( *s )
+ {
+ printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+ break;
+ }
+ /* fall through */
+ case 1:
+ /* Default VPMU mode */
+ vpmu_mode = XENPMU_MODE_SELF;
+ opt_vpmu_enabled = 1;
+ break;
+ }
+}
+
+void vpmu_lvtpc_update(uint32_t val)
+{
+ struct vpmu_struct *vpmu;
+ struct vcpu *curr = current;
+
+ if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
+ return;
+
+ vpmu = vcpu_vpmu(curr);
+
+ vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+
+ /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
+ if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
+ !vpmu_is_set(vpmu, VPMU_CACHED) )
+ apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+}
+
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+ uint64_t supported, bool_t is_write)
+{
+ struct vcpu *curr = current;
+ struct vpmu_struct *vpmu;
+ const struct arch_vpmu_ops *ops;
+ int ret = 0;
+
+ if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode & XENPMU_MODE_ALL) &&
+ !is_hardware_domain(current->domain)) )
+ goto nop;
+
+ vpmu = vcpu_vpmu(curr);
+ ops = vpmu->arch_vpmu_ops;
+ if ( !ops )
+ goto nop;
+
+ if ( is_write && ops->do_wrmsr )
+ ret = ops->do_wrmsr(msr, *msr_content, supported);
+ else if ( !is_write && ops->do_rdmsr )
+ ret = ops->do_rdmsr(msr, msr_content);
+ else
+ goto nop;
+
+ /*
+ * We may have received a PMU interrupt while handling MSR access
+ * and since do_wr/rdmsr may load VPMU context we should save
+ * (and unload) it again.
+ */
+ if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
+ vpmu_is_set(vpmu, VPMU_CACHED) )
+ {
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+ ops->arch_vpmu_save(curr, 0);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+ }
+
+ return ret;
+
+ nop:
+ if ( !is_write )
+ *msr_content = 0;
+
+ return 0;
+}
+
+static inline struct vcpu *choose_hwdom_vcpu(void)
+{
+ unsigned idx;
+
+ if ( hardware_domain->max_vcpus == 0 )
+ return NULL;
+
+ idx = smp_processor_id() % hardware_domain->max_vcpus;
+
+ return hardware_domain->vcpu[idx];
+}
+
+void vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+ struct vcpu *sampled = current, *sampling;
+ struct vpmu_struct *vpmu;
+ struct vlapic *vlapic;
+ u32 vlapic_lvtpc;
+
+ /*
+ * dom0 will handle interrupt for special domains (e.g. idle domain) or,
+ * in XENPMU_MODE_ALL, for everyone.
+ */
+ if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+ (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+ {
+ sampling = choose_hwdom_vcpu();
+ if ( !sampling )
+ return;
+ }
+ else
+ sampling = sampled;
+
+ vpmu = vcpu_vpmu(sampling);
+ if ( !vpmu->arch_vpmu_ops )
+ return;
+
+ /* PV(H) guest */
+ if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) )
+ {
+ const struct cpu_user_regs *cur_regs;
+ uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
+ domid_t domid;
+
+ if ( !vpmu->xenpmu_data )
+ return;
+
+ if ( is_pvh_vcpu(sampling) &&
+ !(vpmu_mode & XENPMU_MODE_ALL) &&
+ !vpmu->arch_vpmu_ops->do_interrupt(regs) )
+ return;
+
+ if ( vpmu_is_set(vpmu, VPMU_CACHED) )
+ return;
+
+ /* PV guest will be reading PMU MSRs from xenpmu_data */
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+ vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
+
+ if ( has_hvm_container_vcpu(sampled) )
+ *flags = 0;
+ else
+ *flags = PMU_SAMPLE_PV;
+
+ if ( sampled == sampling )
+ domid = DOMID_SELF;
+ else
+ domid = sampled->domain->domain_id;
+
+ /* Store appropriate registers in xenpmu_data */
+ /* FIXME: 32-bit PVH should go here as well */
+ if ( is_pv_32bit_vcpu(sampling) )
+ {
+ /*
+ * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+ * and therefore we treat it the same way as a non-privileged
+ * PV 32-bit domain.
+ */
+ struct compat_pmu_regs *cmp;
+
+ cur_regs = guest_cpu_user_regs();
+
+ cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
+ cmp->ip = cur_regs->rip;
+ cmp->sp = cur_regs->rsp;
+ cmp->flags = cur_regs->eflags;
+ cmp->ss = cur_regs->ss;
+ cmp->cs = cur_regs->cs;
+ if ( (cmp->cs & 3) > 1 )
+ *flags |= PMU_SAMPLE_USER;
+ }
+ else
+ {
+ struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
+
+ if ( (vpmu_mode & XENPMU_MODE_SELF) )
+ cur_regs = guest_cpu_user_regs();
+ else if ( !guest_mode(regs) &&
+ is_hardware_domain(sampling->domain) )
+ {
+ cur_regs = regs;
+ domid = DOMID_XEN;
+ }
+ else
+ cur_regs = guest_cpu_user_regs();
+
+ r->ip = cur_regs->rip;
+ r->sp = cur_regs->rsp;
+ r->flags = cur_regs->eflags;
+
+ if ( !has_hvm_container_vcpu(sampled) )
+ {
+ r->ss = cur_regs->ss;
+ r->cs = cur_regs->cs;
+ if ( !(sampled->arch.flags & TF_kernel_mode) )
+ *flags |= PMU_SAMPLE_USER;
+ }
+ else
+ {
+ struct segment_register seg;
+
+ hvm_get_segment_register(sampled, x86_seg_cs, &seg);
+ r->cs = seg.sel;
+ hvm_get_segment_register(sampled, x86_seg_ss, &seg);
+ r->ss = seg.sel;
+ r->cpl = seg.attr.fields.dpl;
+ if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
+ *flags |= PMU_SAMPLE_REAL;
+ }
+ }
+
+ vpmu->xenpmu_data->domain_id = domid;
+ vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
+ if ( is_hardware_domain(sampling->domain) )
+ vpmu->xenpmu_data->pcpu_id = smp_processor_id();
+ else
+ vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
+
+ vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
+ apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+ *flags |= PMU_CACHED;
+ vpmu_set(vpmu, VPMU_CACHED);
+
+ send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
+
+ return;
+ }
+
+ /* HVM guests */
+ vlapic = vcpu_vlapic(sampling);
+
+ /* We don't support (yet) HVM dom0 */
+ ASSERT(sampling == sampled);
+
+ if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
+ !is_vlapic_lvtpc_enabled(vlapic) )
+ return;
+
+ vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+
+ switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
+ {
+ case APIC_MODE_FIXED:
+ vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+ break;
+ case APIC_MODE_NMI:
+ sampling->nmi_pending = 1;
+ break;
+ }
+}
+
+void vpmu_do_cpuid(unsigned int input,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+ if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid )
+ vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx);
+}
+
+static void vpmu_save_force(void *arg)
+{
+ struct vcpu *v = (struct vcpu *)arg;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ return;
+
+ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+
+ if ( vpmu->arch_vpmu_ops )
+ (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
+
+ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
+
+ per_cpu(last_vcpu, smp_processor_id()) = NULL;
+}
+
+void vpmu_save(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ int pcpu = smp_processor_id();
+
+ if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
+ return;
+
+ vpmu->last_pcpu = pcpu;
+ per_cpu(last_vcpu, pcpu) = v;
+
+ if ( vpmu->arch_vpmu_ops )
+ if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
+ vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
+ apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+}
+
+int vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ int pcpu = smp_processor_id();
+ struct vcpu *prev = NULL;
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ return 0;
+
+ /* First time this VCPU is running here */
+ if ( vpmu->last_pcpu != pcpu )
+ {
+ /*
+ * Get the context from last pcpu that we ran on. Note that if another
+ * VCPU is running there it must have saved this VPCU's context before
+ * startig to run (see below).
+ * There should be no race since remote pcpu will disable interrupts
+ * before saving the context.
+ */
+ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ {
+ on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+ vpmu_save_force, (void *)v, 1);
+ vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+ }
+ }
+
+ /* Prevent forced context save from remote CPU */
+ local_irq_disable();
+
+ prev = per_cpu(last_vcpu, pcpu);
+
+ if ( prev != v && prev )
+ {
+ vpmu = vcpu_vpmu(prev);
+
+ /* Someone ran here before us */
+ vpmu_save_force(prev);
+ vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
+ vpmu = vcpu_vpmu(v);
+ }
+
+ local_irq_enable();
+
+ /* Only when PMU is counting, we load PMU context immediately. */
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+ (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
+ return 0;
+
+ if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
+ {
+ int ret;
+
+ apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
+ /* Arch code needs to set VPMU_CONTEXT_LOADED */
+ ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
+ if ( ret )
+ {
+ apic_write_around(APIC_LVTPC,
+ vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+void vpmu_initialise(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ uint8_t vendor = current_cpu_data.x86_vendor;
+ int ret;
+ bool_t is_priv_vpmu = is_hardware_domain(v->domain);
+
+ BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
+ BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
+
+ ASSERT(!vpmu->flags && !vpmu->context);
+
+ if ( !is_priv_vpmu )
+ {
+ /*
+ * Count active VPMUs so that we won't try to change vpmu_mode while
+ * they are in use.
+ * vpmu_mode can be safely updated while dom0's VPMUs are active and
+ * so we don't need to include it in the count.
+ */
+ spin_lock(&vpmu_lock);
+ vpmu_count++;
+ spin_unlock(&vpmu_lock);
+ }
+
+ switch ( vendor )
+ {
+ case X86_VENDOR_AMD:
+ ret = svm_vpmu_initialise(v);
+ break;
+
+ case X86_VENDOR_INTEL:
+ ret = vmx_vpmu_initialise(v);
+ break;
+
+ default:
+ if ( vpmu_mode != XENPMU_MODE_OFF )
+ {
+ printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
+ "Disabling VPMU\n", vendor);
+ opt_vpmu_enabled = 0;
+ vpmu_mode = XENPMU_MODE_OFF;
+ }
+ return; /* Don't bother restoring vpmu_count, VPMU is off forever */
+ }
+
+ if ( ret )
+ printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
+
+ /* Intel needs to initialize VPMU ops even if VPMU is not in use */
+ if ( !is_priv_vpmu &&
+ (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
+ (vpmu_mode == XENPMU_MODE_ALL)) )
+ {
+ spin_lock(&vpmu_lock);
+ vpmu_count--;
+ spin_unlock(&vpmu_lock);
+ }
+}
+
+static void vpmu_clear_last(void *arg)
+{
+ if ( this_cpu(last_vcpu) == arg )
+ this_cpu(last_vcpu) = NULL;
+}
+
+void vpmu_destroy(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ return;
+
+ /*
+ * Need to clear last_vcpu in case it points to v.
+ * We can check here non-atomically whether it is 'v' since
+ * last_vcpu can never become 'v' again at this point.
+ * We will test it again in vpmu_clear_last() with interrupts
+ * disabled to make sure we don't clear someone else.
+ */
+ if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v )
+ on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+ vpmu_clear_last, v, 1);
+
+ if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
+ {
+ /* Unload VPMU first. This will stop counters */
+ on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
+ vpmu_save_force, v, 1);
+ vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+ }
+
+ spin_lock(&vpmu_lock);
+ if ( !is_hardware_domain(v->domain) )
+ vpmu_count--;
+ spin_unlock(&vpmu_lock);
+}
+
+static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
+{
+ struct vcpu *v;
+ struct vpmu_struct *vpmu;
+ struct page_info *page;
+ uint64_t gfn = params->val;
+
+ if ( (vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
+ return -EINVAL;
+
+ if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+ return -EINVAL;
+
+ page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+ if ( !page )
+ return -EINVAL;
+
+ if ( !get_page_type(page, PGT_writable_page) )
+ {
+ put_page(page);
+ return -EINVAL;
+ }
+
+ v = d->vcpu[params->vcpu];
+ vpmu = vcpu_vpmu(v);
+
+ spin_lock(&vpmu->vpmu_lock);
+
+ if ( v->arch.vpmu.xenpmu_data )
+ {
+ spin_unlock(&vpmu->vpmu_lock);
+ put_page_and_type(page);
+ return -EEXIST;
+ }
+
+ v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
+ if ( !v->arch.vpmu.xenpmu_data )
+ {
+ spin_unlock(&vpmu->vpmu_lock);
+ put_page_and_type(page);
+ return -ENOMEM;
+ }
+
+ vpmu_initialise(v);
+
+ spin_unlock(&vpmu->vpmu_lock);
+
+ return 0;
+}
+
+static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
+{
+ struct vcpu *v;
+ struct vpmu_struct *vpmu;
+ uint64_t mfn;
+ void *xenpmu_data;
+
+ if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
+ return;
+
+ v = d->vcpu[params->vcpu];
+ if ( v != current )
+ vcpu_pause(v);
+
+ vpmu = vcpu_vpmu(v);
+ spin_lock(&vpmu->vpmu_lock);
+
+ vpmu_destroy(v);
+ xenpmu_data = vpmu->xenpmu_data;
+ vpmu->xenpmu_data = NULL;
+
+ spin_unlock(&vpmu->vpmu_lock);
+
+ if ( xenpmu_data )
+ {
+ mfn = domain_page_map_to_mfn(xenpmu_data);
+ ASSERT(mfn_valid(mfn));
+ unmap_domain_page_global(xenpmu_data);
+ put_page_and_type(mfn_to_page(mfn));
+ }
+
+ if ( v != current )
+ vcpu_unpause(v);
+}
+
+/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
+void vpmu_dump(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
+ vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
+}
+
+long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
+{
+ int ret;
+ struct vcpu *curr;
+ struct xen_pmu_params pmu_params = {.val = 0};
+ struct xen_pmu_data *xenpmu_data;
+ struct vpmu_struct *vpmu;
+
+ if ( !opt_vpmu_enabled )
+ return -EOPNOTSUPP;
+
+ ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
+ if ( ret )
+ return ret;
+
+ /* Check major version when parameters are specified */
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ case XENPMU_feature_set:
+ case XENPMU_init:
+ case XENPMU_finish:
+ if ( copy_from_guest(&pmu_params, arg, 1) )
+ return -EFAULT;
+
+ if ( pmu_params.version.maj != XENPMU_VER_MAJ )
+ return -EINVAL;
+ }
+
+ switch ( op )
+ {
+ case XENPMU_mode_set:
+ {
+ if ( (pmu_params.val &
+ ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
+ (hweight64(pmu_params.val) > 1) )
+ return -EINVAL;
+
+ /* 32-bit dom0 can only sample itself. */
+ if ( is_pv_32bit_vcpu(current) &&
+ (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
+ return -EINVAL;
+
+ spin_lock(&vpmu_lock);
+
+ /*
+ * We can always safely switch between XENPMU_MODE_SELF and
+ * XENPMU_MODE_HV while other VPMUs are active.
+ */
+ if ( (vpmu_count == 0) ||
+ ((vpmu_mode ^ pmu_params.val) ==
+ (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
+ vpmu_mode = pmu_params.val;
+ else if ( vpmu_mode != pmu_params.val )
+ {
+ printk(XENLOG_WARNING
+ "VPMU: Cannot change mode while active VPMUs exist\n");
+ ret = -EBUSY;
+ }
+
+ spin_unlock(&vpmu_lock);
+
+ break;
+ }
+
+ case XENPMU_mode_get:
+ memset(&pmu_params, 0, sizeof(pmu_params));
+ pmu_params.val = vpmu_mode;
+
+ pmu_params.version.maj = XENPMU_VER_MAJ;
+ pmu_params.version.min = XENPMU_VER_MIN;
+
+ if ( copy_to_guest(arg, &pmu_params, 1) )
+ ret = -EFAULT;
+
+ break;
+
+ case XENPMU_feature_set:
+ if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
+ return -EINVAL;
+
+ spin_lock(&vpmu_lock);
+
+ if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
+ vpmu_features = pmu_params.val;
+ else
+ {
+ printk(XENLOG_WARNING "VPMU: Cannot change features while"
+ " active VPMUs exist\n");
+ ret = -EBUSY;
+ }
+
+ spin_unlock(&vpmu_lock);
+
+ break;
+
+ case XENPMU_feature_get:
+ pmu_params.val = vpmu_features;
+ if ( copy_field_to_guest(arg, &pmu_params, val) )
+ ret = -EFAULT;
+
+ break;
+
+ case XENPMU_init:
+ ret = pvpmu_init(current->domain, &pmu_params);
+ break;
+
+ case XENPMU_finish:
+ pvpmu_finish(current->domain, &pmu_params);
+ break;
+
+ case XENPMU_lvtpc_set:
+ xenpmu_data = current->arch.vpmu.xenpmu_data;
+ if ( xenpmu_data != NULL )
+ vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+ else
+ ret = -EINVAL;
+ break;
+
+ case XENPMU_flush:
+ curr = current;
+ vpmu = vcpu_vpmu(curr);
+ xenpmu_data = curr->arch.vpmu.xenpmu_data;
+ if ( xenpmu_data == NULL )
+ return -EINVAL;
+ xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
+ vpmu_reset(vpmu, VPMU_CACHED);
+ vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
+ if ( vpmu_load(curr, 1) )
+ {
+ xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
+ vpmu_set(vpmu, VPMU_CACHED);
+ ret = -EIO;
+ }
+ break ;
+
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int __init vpmu_init(void)
+{
+ int vendor = current_cpu_data.x86_vendor;
+
+ if ( !opt_vpmu_enabled )
+ {
+ printk(XENLOG_INFO "VPMU: disabled\n");
+ return 0;
+ }
+
+ /* NMI watchdog uses LVTPC and HW counter */
+ if ( opt_watchdog && opt_vpmu_enabled )
+ {
+ printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
+ opt_vpmu_enabled = 0;
+ vpmu_mode = XENPMU_MODE_OFF;
+ return 0;
+ }
+
+ switch ( vendor )
+ {
+ case X86_VENDOR_AMD:
+ if ( amd_vpmu_init() )
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
+ case X86_VENDOR_INTEL:
+ if ( core2_vpmu_init() )
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
+ default:
+ printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
+ "Turning VPMU off.\n", vendor);
+ vpmu_mode = XENPMU_MODE_OFF;
+ break;
+ }
+
+ if ( vpmu_mode != XENPMU_MODE_OFF )
+ printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
+ __stringify(XENPMU_VER_MIN) "\n");
+ else
+ opt_vpmu_enabled = 0;
+
+ return 0;
+}
+__initcall(vpmu_init);
--- /dev/null
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2010, Advanced Micro Devices, Inc.
+ * Parts of this code are Copyright (c) 2007, Intel Corporation
+ *
+ * Author: Wei Wang <wei.wang2@amd.com>
+ * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/xenoprof.h>
+#include <xen/hvm/save.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <asm/apic.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/vlapic.h>
+#include <public/pmu.h>
+
+#define MSR_F10H_EVNTSEL_GO_SHIFT 40
+#define MSR_F10H_EVNTSEL_EN_SHIFT 22
+#define MSR_F10H_COUNTER_LENGTH 48
+
+#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
+#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT))
+#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
+#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
+
+static unsigned int __read_mostly num_counters;
+static const u32 __read_mostly *counters;
+static const u32 __read_mostly *ctrls;
+static bool_t __read_mostly k7_counters_mirrored;
+
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+
+#define F10H_NUM_COUNTERS 4
+#define F15H_NUM_COUNTERS 6
+#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
+
+/* PMU Counter MSRs. */
+static const u32 AMD_F10H_COUNTERS[] = {
+ MSR_K7_PERFCTR0,
+ MSR_K7_PERFCTR1,
+ MSR_K7_PERFCTR2,
+ MSR_K7_PERFCTR3
+};
+
+/* PMU Control MSRs. */
+static const u32 AMD_F10H_CTRLS[] = {
+ MSR_K7_EVNTSEL0,
+ MSR_K7_EVNTSEL1,
+ MSR_K7_EVNTSEL2,
+ MSR_K7_EVNTSEL3
+};
+
+static const u32 AMD_F15H_COUNTERS[] = {
+ MSR_AMD_FAM15H_PERFCTR0,
+ MSR_AMD_FAM15H_PERFCTR1,
+ MSR_AMD_FAM15H_PERFCTR2,
+ MSR_AMD_FAM15H_PERFCTR3,
+ MSR_AMD_FAM15H_PERFCTR4,
+ MSR_AMD_FAM15H_PERFCTR5
+};
+
+static const u32 AMD_F15H_CTRLS[] = {
+ MSR_AMD_FAM15H_EVNTSEL0,
+ MSR_AMD_FAM15H_EVNTSEL1,
+ MSR_AMD_FAM15H_EVNTSEL2,
+ MSR_AMD_FAM15H_EVNTSEL3,
+ MSR_AMD_FAM15H_EVNTSEL4,
+ MSR_AMD_FAM15H_EVNTSEL5
+};
+
+/* Bits [63:42], [39:36], 21 and 19 are reserved */
+#define CTRL_RSVD_MASK ((-1ULL & (~((1ULL << 42) - 1))) | \
+ (0xfULL << 36) | (1ULL << 21) | (1ULL << 19))
+static uint64_t __read_mostly ctrl_rsvd[MAX_NUM_COUNTERS];
+
+/* Use private context as a flag for MSR bitmap */
+#define msr_bitmap_on(vpmu) do { \
+ (vpmu)->priv_context = (void *)-1L; \
+ } while (0)
+#define msr_bitmap_off(vpmu) do { \
+ (vpmu)->priv_context = NULL; \
+ } while (0)
+#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
+
+static inline int get_pmu_reg_type(u32 addr, unsigned int *idx)
+{
+ if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) )
+ {
+ *idx = addr - MSR_K7_EVNTSEL0;
+ return MSR_TYPE_CTRL;
+ }
+
+ if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
+ {
+ *idx = addr - MSR_K7_PERFCTR0;
+ return MSR_TYPE_COUNTER;
+ }
+
+ if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
+ (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
+ {
+ *idx = (addr - MSR_AMD_FAM15H_EVNTSEL0) >> 1;
+ if (addr & 1)
+ return MSR_TYPE_COUNTER;
+ else
+ return MSR_TYPE_CTRL;
+ }
+
+ /* unsupported registers */
+ return -1;
+}
+
+static inline u32 get_fam15h_addr(u32 addr)
+{
+ switch ( addr )
+ {
+ case MSR_K7_PERFCTR0:
+ return MSR_AMD_FAM15H_PERFCTR0;
+ case MSR_K7_PERFCTR1:
+ return MSR_AMD_FAM15H_PERFCTR1;
+ case MSR_K7_PERFCTR2:
+ return MSR_AMD_FAM15H_PERFCTR2;
+ case MSR_K7_PERFCTR3:
+ return MSR_AMD_FAM15H_PERFCTR3;
+ case MSR_K7_EVNTSEL0:
+ return MSR_AMD_FAM15H_EVNTSEL0;
+ case MSR_K7_EVNTSEL1:
+ return MSR_AMD_FAM15H_EVNTSEL1;
+ case MSR_K7_EVNTSEL2:
+ return MSR_AMD_FAM15H_EVNTSEL2;
+ case MSR_K7_EVNTSEL3:
+ return MSR_AMD_FAM15H_EVNTSEL3;
+ default:
+ break;
+ }
+
+ return addr;
+}
+
+static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
+{
+ unsigned i;
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ memset(&ctxt->regs[0], 0, regs_sz);
+ for ( i = 0; i < num_counters; i++ )
+ ctrl_regs[i] = ctrl_rsvd[i];
+}
+
+static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE);
+ svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE);
+ }
+
+ msr_bitmap_on(vpmu);
+}
+
+static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW);
+ svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW);
+ }
+
+ msr_bitmap_off(vpmu);
+}
+
+static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+ return 1;
+}
+
+static inline void context_load(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ wrmsrl(counters[i], counter_regs[i]);
+ wrmsrl(ctrls[i], ctrl_regs[i]);
+ }
+}
+
+static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_amd_ctxt *ctxt;
+ uint64_t *ctrl_regs;
+ unsigned int i;
+
+ vpmu_reset(vpmu, VPMU_FROZEN);
+
+ if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ {
+ ctxt = vpmu->context;
+ ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ for ( i = 0; i < num_counters; i++ )
+ wrmsrl(ctrls[i], ctrl_regs[i]);
+
+ return 0;
+ }
+
+ if ( from_guest )
+ {
+ bool_t is_running = 0;
+ struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+
+ ASSERT(!is_hvm_vcpu(v));
+
+ ctxt = vpmu->context;
+ ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
+ {
+ /*
+ * Not necessary to re-init context since we should never load
+ * it until guest provides valid values. But just to be safe.
+ */
+ amd_vpmu_init_regs(ctxt);
+ return -EINVAL;
+ }
+
+ if ( is_pmu_enabled(ctrl_regs[i]) )
+ is_running = 1;
+ }
+
+ if ( is_running )
+ vpmu_set(vpmu, VPMU_RUNNING);
+ else
+ vpmu_reset(vpmu, VPMU_RUNNING);
+ }
+
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+
+ context_load(v);
+
+ return 0;
+}
+
+static inline void context_save(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+
+ /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
+ for ( i = 0; i < num_counters; i++ )
+ rdmsrl(counters[i], counter_regs[i]);
+}
+
+static int amd_vpmu_save(struct vcpu *v, bool_t to_guest)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ unsigned int i;
+
+ /* Stop the counters. */
+ for ( i = 0; i < num_counters; i++ )
+ wrmsrl(ctrls[i], 0);
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
+ {
+ vpmu_set(vpmu, VPMU_FROZEN);
+ return 0;
+ }
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ return 0;
+
+ context_save(v);
+
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
+ has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+ amd_vpmu_unset_msr_bitmap(v);
+
+ if ( to_guest )
+ {
+ struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
+
+ ASSERT(!is_hvm_vcpu(v));
+ ctxt = vpmu->context;
+ guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
+ memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
+ }
+
+ return 1;
+}
+
+static void context_update(unsigned int msr, u64 msr_content)
+{
+ unsigned int i;
+ struct vcpu *v = current;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+
+ if ( k7_counters_mirrored &&
+ ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
+ {
+ msr = get_fam15h_addr(msr);
+ }
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ if ( msr == ctrls[i] )
+ {
+ ctrl_regs[i] = msr_content;
+ return;
+ }
+ else if (msr == counters[i] )
+ {
+ counter_regs[i] = msr_content;
+ return;
+ }
+ }
+}
+
+static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+ uint64_t supported)
+{
+ struct vcpu *v = current;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ unsigned int idx = 0;
+ int type = get_pmu_reg_type(msr, &idx);
+
+ ASSERT(!supported);
+
+ if ( (type == MSR_TYPE_CTRL ) &&
+ ((msr_content & CTRL_RSVD_MASK) != ctrl_rsvd[idx]) )
+ return -EINVAL;
+
+ /* For all counters, enable guest only mode for HVM guest */
+ if ( has_hvm_container_vcpu(v) && (type == MSR_TYPE_CTRL) &&
+ !is_guest_mode(msr_content) )
+ {
+ set_guest_mode(msr_content);
+ }
+
+ /* check if the first counter is enabled */
+ if ( (type == MSR_TYPE_CTRL) &&
+ is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
+ {
+ if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+ return 0;
+ vpmu_set(vpmu, VPMU_RUNNING);
+
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+ amd_vpmu_set_msr_bitmap(v);
+ }
+
+ /* stop saving & restore if guest stops first counter */
+ if ( (type == MSR_TYPE_CTRL) &&
+ (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
+ {
+ vpmu_reset(vpmu, VPMU_RUNNING);
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+ amd_vpmu_unset_msr_bitmap(v);
+ release_pmu_ownship(PMU_OWNER_HVM);
+ }
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
+ || vpmu_is_set(vpmu, VPMU_FROZEN) )
+ {
+ context_load(v);
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+ vpmu_reset(vpmu, VPMU_FROZEN);
+ }
+
+ /* Update vpmu context immediately */
+ context_update(msr, msr_content);
+
+ /* Write to hw counters */
+ wrmsrl(msr, msr_content);
+ return 0;
+}
+
+static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+ struct vcpu *v = current;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
+ || vpmu_is_set(vpmu, VPMU_FROZEN) )
+ {
+ context_load(v);
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+ vpmu_reset(vpmu, VPMU_FROZEN);
+ }
+
+ rdmsrl(msr, *msr_content);
+
+ return 0;
+}
+
+static void amd_vpmu_destroy(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
+ amd_vpmu_unset_msr_bitmap(v);
+
+ xfree(vpmu->context);
+
+ if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
+ release_pmu_ownship(PMU_OWNER_HVM);
+
+ vpmu_clear(vpmu);
+}
+
+/* VPMU part of the 'q' keyhandler */
+static void amd_vpmu_dump(const struct vcpu *v)
+{
+ const struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ const struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
+ const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
+ const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
+ unsigned int i;
+
+ printk(" VPMU state: 0x%x ", vpmu->flags);
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ {
+ printk("\n");
+ return;
+ }
+
+ printk("(");
+ if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) )
+ printk("PASSIVE_DOMAIN_ALLOCATED, ");
+ if ( vpmu_is_set(vpmu, VPMU_FROZEN) )
+ printk("FROZEN, ");
+ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
+ printk("SAVE, ");
+ if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
+ printk("RUNNING, ");
+ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ printk("LOADED, ");
+ printk("ALLOCATED)\n");
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ uint64_t ctrl, cntr;
+
+ rdmsrl(ctrls[i], ctrl);
+ rdmsrl(counters[i], cntr);
+ printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n",
+ ctrls[i], ctrl_regs[i], ctrl,
+ counters[i], counter_regs[i], cntr);
+ }
+}
+
+struct arch_vpmu_ops amd_vpmu_ops = {
+ .do_wrmsr = amd_vpmu_do_wrmsr,
+ .do_rdmsr = amd_vpmu_do_rdmsr,
+ .do_interrupt = amd_vpmu_do_interrupt,
+ .arch_vpmu_destroy = amd_vpmu_destroy,
+ .arch_vpmu_save = amd_vpmu_save,
+ .arch_vpmu_load = amd_vpmu_load,
+ .arch_vpmu_dump = amd_vpmu_dump
+};
+
+int svm_vpmu_initialise(struct vcpu *v)
+{
+ struct xen_pmu_amd_ctxt *ctxt;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( vpmu_mode == XENPMU_MODE_OFF )
+ return 0;
+
+ if ( !counters )
+ return -EINVAL;
+
+ ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
+ if ( !ctxt )
+ {
+ printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
+ " PMU feature is unavailable on domain %d vcpu %d.\n",
+ v->vcpu_id, v->domain->domain_id);
+ return -ENOMEM;
+ }
+
+ ctxt->counters = sizeof(*ctxt);
+ ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
+ amd_vpmu_init_regs(ctxt);
+
+ vpmu->context = ctxt;
+ vpmu->priv_context = NULL;
+
+ if ( !is_hvm_vcpu(v) )
+ {
+ /* Copy register offsets to shared area */
+ ASSERT(vpmu->xenpmu_data);
+ memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt,
+ offsetof(struct xen_pmu_amd_ctxt, regs));
+ }
+
+ vpmu->arch_vpmu_ops = &amd_vpmu_ops;
+
+ vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
+ return 0;
+}
+
+int __init amd_vpmu_init(void)
+{
+ unsigned int i;
+
+ switch ( current_cpu_data.x86 )
+ {
+ case 0x15:
+ num_counters = F15H_NUM_COUNTERS;
+ counters = AMD_F15H_COUNTERS;
+ ctrls = AMD_F15H_CTRLS;
+ k7_counters_mirrored = 1;
+ break;
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x16:
+ num_counters = F10H_NUM_COUNTERS;
+ counters = AMD_F10H_COUNTERS;
+ ctrls = AMD_F10H_CTRLS;
+ k7_counters_mirrored = 0;
+ break;
+ default:
+ printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n",
+ current_cpu_data.x86);
+ return -EINVAL;
+ }
+
+ if ( sizeof(struct xen_pmu_data) +
+ 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
+ {
+ printk(XENLOG_WARNING
+ "VPMU: Register bank does not fit into VPMU shared page\n");
+ counters = ctrls = NULL;
+ num_counters = 0;
+ return -ENOSPC;
+ }
+
+ for ( i = 0; i < num_counters; i++ )
+ {
+ rdmsrl(ctrls[i], ctrl_rsvd[i]);
+ ctrl_rsvd[i] &= CTRL_RSVD_MASK;
+ }
+
+ regs_sz = 2 * sizeof(uint64_t) * num_counters;
+
+ return 0;
+}
+
--- /dev/null
+/*
+ * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/xenoprof.h>
+#include <xen/irq.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/apic.h>
+#include <asm/traps.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/vpmu.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <public/pmu.h>
+
+/*
+ * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
+ * instruction.
+ * cpuid 0xa - Architectural Performance Monitoring Leaf
+ * Register eax
+ */
+#define PMU_VERSION_SHIFT 0 /* Version ID */
+#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */
+#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
+
+#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */
+#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */
+#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
+
+#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */
+#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */
+#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
+/* Register edx */
+#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */
+#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */
+#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
+
+#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */
+#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */
+#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
+
+/* Alias registers (0x4c1) for full-width writes to PMCs */
+#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
+static bool_t __read_mostly full_width_write;
+
+/* Intel-specific VPMU features */
+#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */
+#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */
+
+/*
+ * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
+ * counters. 4 bits for every counter.
+ */
+#define FIXED_CTR_CTRL_BITS 4
+#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
+
+#define ARCH_CNTR_ENABLED (1ULL << 22)
+
+/* Number of general-purpose and fixed performance counters */
+static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
+
+/* Masks used for testing whether and MSR is valid */
+#define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21))
+static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
+static uint64_t __read_mostly global_ovf_ctrl_mask;
+
+/* Total size of PMU registers block (copied to/from PV(H) guest) */
+static unsigned int __read_mostly regs_sz;
+/* Offset into context of the beginning of PMU register block */
+static const unsigned int regs_off =
+ sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
+ sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
+
+/*
+ * QUIRK to workaround an issue on various family 6 cpus.
+ * The issue leads to endless PMC interrupt loops on the processor.
+ * If the interrupt handler is running and a pmc reaches the value 0, this
+ * value remains forever and it triggers immediately a new interrupt after
+ * finishing the handler.
+ * A workaround is to read all flagged counters and if the value is 0 write
+ * 1 (or another value != 0) into it.
+ * There exist no errata and the real cause of this behaviour is unknown.
+ */
+bool_t __read_mostly is_pmc_quirk;
+
+static void check_pmc_quirk(void)
+{
+ if ( current_cpu_data.x86 == 6 )
+ is_pmc_quirk = 1;
+ else
+ is_pmc_quirk = 0;
+}
+
+static void handle_pmc_quirk(u64 msr_content)
+{
+ int i;
+ u64 val;
+
+ if ( !is_pmc_quirk )
+ return;
+
+ val = msr_content;
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ if ( val & 0x1 )
+ {
+ u64 cnt;
+ rdmsrl(MSR_P6_PERFCTR(i), cnt);
+ if ( cnt == 0 )
+ wrmsrl(MSR_P6_PERFCTR(i), 1);
+ }
+ val >>= 1;
+ }
+ val = msr_content >> 32;
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ if ( val & 0x1 )
+ {
+ u64 cnt;
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
+ if ( cnt == 0 )
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
+ }
+ val >>= 1;
+ }
+}
+
+/*
+ * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
+ */
+static int core2_get_arch_pmc_count(void)
+{
+ u32 eax;
+
+ eax = cpuid_eax(0xa);
+ return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
+}
+
+/*
+ * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
+ */
+static int core2_get_fixed_pmc_count(void)
+{
+ u32 eax;
+
+ eax = cpuid_eax(0xa);
+ return MASK_EXTR(eax, PMU_FIXED_NR_MASK);
+}
+
+/* edx bits 5-12: Bit width of fixed-function performance counters */
+static int core2_get_bitwidth_fix_count(void)
+{
+ u32 edx;
+
+ edx = cpuid_edx(0xa);
+ return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
+}
+
+static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+ u32 msr_index_pmc;
+
+ switch ( msr_index )
+ {
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ case MSR_IA32_DS_AREA:
+ case MSR_IA32_PEBS_ENABLE:
+ *type = MSR_TYPE_CTRL;
+ return 1;
+
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ *type = MSR_TYPE_GLOBAL;
+ return 1;
+
+ default:
+
+ if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
+ (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
+ {
+ *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
+ *type = MSR_TYPE_COUNTER;
+ return 1;
+ }
+
+ if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
+ (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
+ {
+ *index = msr_index - MSR_P6_EVNTSEL(0);
+ *type = MSR_TYPE_ARCH_CTRL;
+ return 1;
+ }
+
+ msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
+ if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
+ (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
+ {
+ *type = MSR_TYPE_ARCH_COUNTER;
+ *index = msr_index_pmc - MSR_IA32_PERFCTR0;
+ return 1;
+ }
+ return 0;
+ }
+}
+
+static inline int msraddr_to_bitpos(int x)
+{
+ ASSERT(x == (x & 0x1fff));
+ return x;
+}
+
+static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap)
+{
+ int i;
+
+ /* Allow Read/Write PMU Counters MSR Directly. */
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
+ clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+ }
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+ clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+
+ if ( full_width_write )
+ {
+ clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
+ clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+ }
+ }
+
+ /* Allow Read PMU Non-global Controls Directly. */
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
+
+ clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
+ clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
+ clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
+}
+
+static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap)
+{
+ int i;
+
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
+ set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+ }
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap);
+ set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+
+ if ( full_width_write )
+ {
+ set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
+ set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
+ msr_bitmap + 0x800/BYTES_PER_LONG);
+ }
+ }
+
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
+
+ set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
+ set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
+ set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
+}
+
+static inline void __core2_vpmu_save(struct vcpu *v)
+{
+ int i;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
+
+ if ( !has_hvm_container_vcpu(v) )
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
+}
+
+static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( !has_hvm_container_vcpu(v) )
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
+ return 0;
+
+ __core2_vpmu_save(v);
+
+ /* Unset PMU MSR bitmap to trap lazy load. */
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
+ has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
+ core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+ if ( to_guest )
+ {
+ ASSERT(!is_hvm_vcpu(v));
+ memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
+ vpmu->context + regs_off, regs_sz);
+ }
+
+ return 1;
+}
+
+static inline void __core2_vpmu_load(struct vcpu *v)
+{
+ unsigned int i, pmc_start;
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
+
+ if ( full_width_write )
+ pmc_start = MSR_IA32_A_PERFCTR0;
+ else
+ pmc_start = MSR_IA32_PERFCTR0;
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
+ wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
+ }
+
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
+ wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
+ wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable);
+
+ if ( !has_hvm_container_vcpu(v) )
+ {
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
+ core2_vpmu_cxt->global_ovf_ctrl = 0;
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+ }
+}
+
+static int core2_vpmu_verify(struct vcpu *v)
+{
+ unsigned int i;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+ uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+ uint64_t fixed_ctrl;
+ uint64_t *priv_context = vpmu->priv_context;
+ uint64_t enabled_cntrs = 0;
+
+ if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
+ return -EINVAL;
+
+ fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
+ if ( fixed_ctrl & fixed_ctrl_mask )
+ return -EINVAL;
+
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ if ( fixed_counters[i] & fixed_counters_mask )
+ return -EINVAL;
+ if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
+ enabled_cntrs |= (1ULL << i);
+ }
+ enabled_cntrs <<= 32;
+
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ {
+ uint64_t control = xen_pmu_cntr_pair[i].control;
+
+ if ( control & ARCH_CTRL_MASK )
+ return -EINVAL;
+ if ( control & ARCH_CNTR_ENABLED )
+ enabled_cntrs |= (1ULL << i);
+ }
+
+ if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
+ !is_canonical_address(core2_vpmu_cxt->ds_area) )
+ return -EINVAL;
+
+ if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
+ (core2_vpmu_cxt->ds_area != 0) )
+ vpmu_set(vpmu, VPMU_RUNNING);
+ else
+ vpmu_reset(vpmu, VPMU_RUNNING);
+
+ *priv_context = enabled_cntrs;
+
+ return 0;
+}
+
+static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ return 0;
+
+ if ( from_guest )
+ {
+ int ret;
+
+ ASSERT(!is_hvm_vcpu(v));
+
+ memcpy(vpmu->context + regs_off,
+ (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
+ regs_sz);
+
+ ret = core2_vpmu_verify(v);
+ if ( ret )
+ {
+ /*
+ * Not necessary since we should never load the context until
+ * guest provides valid values. But just to be safe.
+ */
+ memset(vpmu->context + regs_off, 0, regs_sz);
+ return ret;
+ }
+ }
+
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+
+ __core2_vpmu_load(v);
+
+ return 0;
+}
+
+static int core2_vpmu_alloc_resource(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
+ uint64_t *p = NULL;
+
+ if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+ return 0;
+
+ if ( has_hvm_container_vcpu(v) )
+ {
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+ goto out_err;
+
+ if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
+ goto out_err;
+ vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ }
+
+ core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
+ sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) *
+ arch_pmc_cnt);
+ p = xzalloc(uint64_t);
+ if ( !core2_vpmu_cxt || !p )
+ goto out_err;
+
+ core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
+ core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
+ sizeof(uint64_t) * fixed_pmc_cnt;
+
+ vpmu->context = core2_vpmu_cxt;
+ vpmu->priv_context = p;
+
+ if ( !is_hvm_vcpu(v) )
+ {
+ /* Copy fixed/arch register offsets to shared area */
+ ASSERT(vpmu->xenpmu_data);
+ memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
+ }
+
+ vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
+
+ return 1;
+
+out_err:
+ release_pmu_ownship(PMU_OWNER_HVM);
+
+ xfree(core2_vpmu_cxt);
+ xfree(p);
+
+ printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
+ v->vcpu_id, v->domain->domain_id);
+
+ return 0;
+}
+
+static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+ if ( !is_core2_vpmu_msr(msr_index, type, index) )
+ return 0;
+
+ if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
+ !core2_vpmu_alloc_resource(current) )
+ return 0;
+
+ /* Do the lazy load staff. */
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ {
+ __core2_vpmu_load(current);
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+ if ( has_hvm_container_vcpu(current) &&
+ cpu_has_vmx_msr_bitmap )
+ core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+ }
+ return 1;
+}
+
+static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+ uint64_t supported)
+{
+ int i, tmp;
+ int type = -1, index = -1;
+ struct vcpu *v = current;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
+ uint64_t *enabled_cntrs;
+
+ if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
+ {
+ /* Special handling for BTS */
+ if ( msr == MSR_IA32_DEBUGCTLMSR )
+ {
+ supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
+ IA32_DEBUGCTLMSR_BTINT;
+
+ if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
+ supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
+ IA32_DEBUGCTLMSR_BTS_OFF_USR;
+ if ( !(msr_content & ~supported) &&
+ vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+ return 0;
+ if ( (msr_content & supported) &&
+ !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+ printk(XENLOG_G_WARNING
+ "%pv: Debug Store unsupported on this CPU\n",
+ current);
+ }
+ return -EINVAL;
+ }
+
+ ASSERT(!supported);
+
+ if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
+ /* Writing unsupported bits to a fixed counter */
+ return -EINVAL;
+
+ core2_vpmu_cxt = vpmu->context;
+ enabled_cntrs = vpmu->priv_context;
+ switch ( msr )
+ {
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ if ( msr_content & global_ovf_ctrl_mask )
+ return -EINVAL;
+ core2_vpmu_cxt->global_status &= ~msr_content;
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
+ return 0;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
+ "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+ return -EINVAL;
+ case MSR_IA32_PEBS_ENABLE:
+ if ( msr_content & 1 )
+ gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, "
+ "which is not supported.\n");
+ core2_vpmu_cxt->pebs_enable = msr_content;
+ return 0;
+ case MSR_IA32_DS_AREA:
+ if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
+ {
+ if ( !is_canonical_address(msr_content) )
+ {
+ gdprintk(XENLOG_WARNING,
+ "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
+ msr_content);
+ return -EINVAL;
+ }
+ core2_vpmu_cxt->ds_area = msr_content;
+ break;
+ }
+ gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+ return 0;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ core2_vpmu_cxt->global_ctrl = msr_content;
+ break;
+ case MSR_CORE_PERF_FIXED_CTR_CTRL:
+ if ( msr_content & fixed_ctrl_mask )
+ return -EINVAL;
+
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+ &core2_vpmu_cxt->global_ctrl);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+ *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
+ if ( msr_content != 0 )
+ {
+ u64 val = msr_content;
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ if ( val & 3 )
+ *enabled_cntrs |= (1ULL << 32) << i;
+ val >>= FIXED_CTR_CTRL_BITS;
+ }
+ }
+
+ core2_vpmu_cxt->fixed_ctrl = msr_content;
+ break;
+ default:
+ tmp = msr - MSR_P6_EVNTSEL(0);
+ if ( tmp >= 0 && tmp < arch_pmc_cnt )
+ {
+ struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
+ vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+
+ if ( msr_content & ARCH_CTRL_MASK )
+ return -EINVAL;
+
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+ &core2_vpmu_cxt->global_ctrl);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+
+ if ( msr_content & ARCH_CNTR_ENABLED )
+ *enabled_cntrs |= 1ULL << tmp;
+ else
+ *enabled_cntrs &= ~(1ULL << tmp);
+
+ xen_pmu_cntr_pair[tmp].control = msr_content;
+ }
+ }
+
+ if ( type != MSR_TYPE_GLOBAL )
+ wrmsrl(msr, msr_content);
+ else
+ {
+ if ( has_hvm_container_vcpu(v) )
+ vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ else
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ }
+
+ if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
+ (core2_vpmu_cxt->ds_area != 0) )
+ vpmu_set(vpmu, VPMU_RUNNING);
+ else
+ vpmu_reset(vpmu, VPMU_RUNNING);
+
+ return 0;
+}
+
+static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+ int type = -1, index = -1;
+ struct vcpu *v = current;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
+
+ if ( core2_vpmu_msr_common_check(msr, &type, &index) )
+ {
+ core2_vpmu_cxt = vpmu->context;
+ switch ( msr )
+ {
+ case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+ *msr_content = 0;
+ break;
+ case MSR_CORE_PERF_GLOBAL_STATUS:
+ *msr_content = core2_vpmu_cxt->global_status;
+ break;
+ case MSR_CORE_PERF_GLOBAL_CTRL:
+ if ( has_hvm_container_vcpu(v) )
+ vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+ else
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
+ break;
+ default:
+ rdmsrl(msr, *msr_content);
+ }
+ }
+ else if ( msr == MSR_IA32_MISC_ENABLE )
+ {
+ /* Extension for BTS */
+ if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+ *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
+ }
+
+ return 0;
+}
+
+static void core2_vpmu_do_cpuid(unsigned int input,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ if (input == 0x1)
+ {
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+ if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
+ {
+ /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */
+ *edx |= cpufeat_mask(X86_FEATURE_DS);
+ if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) )
+ *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
+ if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
+ *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
+ }
+ }
+}
+
+/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
+static void core2_vpmu_dump(const struct vcpu *v)
+{
+ const struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ unsigned int i;
+ const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
+ u64 val;
+ uint64_t *fixed_counters;
+ struct xen_pmu_cntr_pair *cntr_pair;
+
+ if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ return;
+
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
+ {
+ if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+ printk(" vPMU loaded\n");
+ else
+ printk(" vPMU allocated\n");
+ return;
+ }
+
+ printk(" vPMU running\n");
+
+ cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
+ fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
+
+ /* Print the contents of the counter and its configuration msr. */
+ for ( i = 0; i < arch_pmc_cnt; i++ )
+ printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
+ i, cntr_pair[i].counter, cntr_pair[i].control);
+
+ /*
+ * The configuration of the fixed counter is 4 bits each in the
+ * MSR_CORE_PERF_FIXED_CTR_CTRL.
+ */
+ val = core2_vpmu_cxt->fixed_ctrl;
+ for ( i = 0; i < fixed_pmc_cnt; i++ )
+ {
+ printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
+ i, fixed_counters[i],
+ val & FIXED_CTR_CTRL_MASK);
+ val >>= FIXED_CTR_CTRL_BITS;
+ }
+}
+
+static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ u64 msr_content;
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
+
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+ if ( msr_content )
+ {
+ if ( is_pmc_quirk )
+ handle_pmc_quirk(msr_content);
+ core2_vpmu_cxt->global_status |= msr_content;
+ msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
+ }
+ else
+ {
+ /* No PMC overflow but perhaps a Trace Message interrupt. */
+ __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
+ if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
+ return 0;
+ }
+
+ return 1;
+}
+
+static void core2_vpmu_destroy(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ xfree(vpmu->context);
+ xfree(vpmu->priv_context);
+ if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
+ core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+ release_pmu_ownship(PMU_OWNER_HVM);
+ vpmu_clear(vpmu);
+}
+
+struct arch_vpmu_ops core2_vpmu_ops = {
+ .do_wrmsr = core2_vpmu_do_wrmsr,
+ .do_rdmsr = core2_vpmu_do_rdmsr,
+ .do_interrupt = core2_vpmu_do_interrupt,
+ .do_cpuid = core2_vpmu_do_cpuid,
+ .arch_vpmu_destroy = core2_vpmu_destroy,
+ .arch_vpmu_save = core2_vpmu_save,
+ .arch_vpmu_load = core2_vpmu_load,
+ .arch_vpmu_dump = core2_vpmu_dump
+};
+
+static void core2_no_vpmu_do_cpuid(unsigned int input,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /*
+ * As in this case the vpmu is not enabled reset some bits in the
+ * architectural performance monitoring related part.
+ */
+ if ( input == 0xa )
+ {
+ *eax &= ~PMU_VERSION_MASK;
+ *eax &= ~PMU_GENERAL_NR_MASK;
+ *eax &= ~PMU_GENERAL_WIDTH_MASK;
+
+ *edx &= ~PMU_FIXED_NR_MASK;
+ *edx &= ~PMU_FIXED_WIDTH_MASK;
+ }
+}
+
+/*
+ * If its a vpmu msr set it to 0.
+ */
+static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+ int type = -1, index = -1;
+ if ( !is_core2_vpmu_msr(msr, &type, &index) )
+ return -EINVAL;
+ *msr_content = 0;
+ return 0;
+}
+
+/*
+ * These functions are used in case vpmu is not enabled.
+ */
+struct arch_vpmu_ops core2_no_vpmu_ops = {
+ .do_rdmsr = core2_no_vpmu_do_rdmsr,
+ .do_cpuid = core2_no_vpmu_do_cpuid,
+};
+
+int vmx_vpmu_initialise(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ u64 msr_content;
+ static bool_t ds_warned;
+
+ vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
+ if ( vpmu_mode == XENPMU_MODE_OFF )
+ return 0;
+
+ if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
+ return -EINVAL;
+
+ if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
+ goto func_out;
+ /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
+ while ( boot_cpu_has(X86_FEATURE_DS) )
+ {
+ if ( !boot_cpu_has(X86_FEATURE_DTES64) )
+ {
+ if ( !ds_warned )
+ printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
+ " - Debug Store disabled for guests\n");
+ break;
+ }
+ vpmu_set(vpmu, VPMU_CPU_HAS_DS);
+ rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+ if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
+ {
+ /* If BTS_UNAVAIL is set reset the DS feature. */
+ vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
+ if ( !ds_warned )
+ printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
+ " - Debug Store disabled for guests\n");
+ break;
+ }
+
+ vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
+ if ( !ds_warned )
+ {
+ if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
+ printk(XENLOG_G_INFO
+ "vpmu: CPU doesn't support CPL-Qualified BTS\n");
+ printk("******************************************************\n");
+ printk("** WARNING: Emulation of BTS Feature is switched on **\n");
+ printk("** Using this processor feature in a virtualized **\n");
+ printk("** environment is not 100%% safe. **\n");
+ printk("** Setting the DS buffer address with wrong values **\n");
+ printk("** may lead to hypervisor hangs or crashes. **\n");
+ printk("** It is NOT recommended for production use! **\n");
+ printk("******************************************************\n");
+ }
+ break;
+ }
+ ds_warned = 1;
+ func_out:
+
+ /* PV domains can allocate resources immediately */
+ if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
+ return -EIO;
+
+ vpmu->arch_vpmu_ops = &core2_vpmu_ops;
+
+ return 0;
+}
+
+int __init core2_vpmu_init(void)
+{
+ u64 caps;
+
+ if ( current_cpu_data.x86 != 6 )
+ {
+ printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
+ return -EINVAL;
+ }
+
+ switch ( current_cpu_data.x86_model )
+ {
+ /* Core2: */
+ case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+ case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+ case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+ case 0x1d: /* six-core 45 nm xeon "Dunnington" */
+
+ case 0x2a: /* SandyBridge */
+ case 0x2d: /* SandyBridge, "Romley-EP" */
+
+ /* Nehalem: */
+ case 0x1a: /* 45 nm nehalem, "Bloomfield" */
+ case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */
+ case 0x2e: /* 45 nm nehalem-ex, "Beckton" */
+
+ /* Westmere: */
+ case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */
+ case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */
+ case 0x2f: /* 32 nm Westmere-EX */
+
+ case 0x3a: /* IvyBridge */
+ case 0x3e: /* IvyBridge EP */
+
+ /* Haswell: */
+ case 0x3c:
+ case 0x3f:
+ case 0x45:
+ case 0x46:
+
+ /* Broadwell */
+ case 0x3d:
+ case 0x4f:
+ case 0x56:
+
+ /* future: */
+ case 0x4e:
+
+ /* next gen Xeon Phi */
+ case 0x57:
+ break;
+
+ default:
+ printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n",
+ current_cpu_data.x86_model);
+ return -EINVAL;
+ }
+
+ arch_pmc_cnt = core2_get_arch_pmc_count();
+ fixed_pmc_cnt = core2_get_fixed_pmc_count();
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
+ full_width_write = (caps >> 13) & 1;
+
+ fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
+ fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
+ global_ovf_ctrl_mask = ~(0xC000000000000000 |
+ (((1ULL << fixed_pmc_cnt) - 1) << 32) |
+ ((1ULL << arch_pmc_cnt) - 1));
+
+ regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
+ sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
+
+ check_pmc_quirk();
+
+ if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
+ sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
+ {
+ printk(XENLOG_WARNING
+ "VPMU: Register bank does not fit into VPMU share page\n");
+ arch_pmc_cnt = fixed_pmc_cnt = 0;
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
obj-y += vmsi.o
obj-y += vpic.o
obj-y += vpt.o
-obj-y += vpmu.o
obj-y += svm.o
obj-y += svmdebug.o
obj-y += vmcb.o
-obj-y += vpmu.o
+++ /dev/null
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2010, Advanced Micro Devices, Inc.
- * Parts of this code are Copyright (c) 2007, Intel Corporation
- *
- * Author: Wei Wang <wei.wang2@amd.com>
- * Tested by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/xenoprof.h>
-#include <xen/hvm/save.h>
-#include <xen/sched.h>
-#include <xen/irq.h>
-#include <asm/apic.h>
-#include <asm/hvm/vlapic.h>
-#include <asm/hvm/vpmu.h>
-#include <public/pmu.h>
-
-#define MSR_F10H_EVNTSEL_GO_SHIFT 40
-#define MSR_F10H_EVNTSEL_EN_SHIFT 22
-#define MSR_F10H_COUNTER_LENGTH 48
-
-#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
-#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT))
-#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT))
-#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1))))
-
-static unsigned int __read_mostly num_counters;
-static const u32 __read_mostly *counters;
-static const u32 __read_mostly *ctrls;
-static bool_t __read_mostly k7_counters_mirrored;
-
-/* Total size of PMU registers block (copied to/from PV(H) guest) */
-static unsigned int __read_mostly regs_sz;
-
-#define F10H_NUM_COUNTERS 4
-#define F15H_NUM_COUNTERS 6
-#define MAX_NUM_COUNTERS F15H_NUM_COUNTERS
-
-/* PMU Counter MSRs. */
-static const u32 AMD_F10H_COUNTERS[] = {
- MSR_K7_PERFCTR0,
- MSR_K7_PERFCTR1,
- MSR_K7_PERFCTR2,
- MSR_K7_PERFCTR3
-};
-
-/* PMU Control MSRs. */
-static const u32 AMD_F10H_CTRLS[] = {
- MSR_K7_EVNTSEL0,
- MSR_K7_EVNTSEL1,
- MSR_K7_EVNTSEL2,
- MSR_K7_EVNTSEL3
-};
-
-static const u32 AMD_F15H_COUNTERS[] = {
- MSR_AMD_FAM15H_PERFCTR0,
- MSR_AMD_FAM15H_PERFCTR1,
- MSR_AMD_FAM15H_PERFCTR2,
- MSR_AMD_FAM15H_PERFCTR3,
- MSR_AMD_FAM15H_PERFCTR4,
- MSR_AMD_FAM15H_PERFCTR5
-};
-
-static const u32 AMD_F15H_CTRLS[] = {
- MSR_AMD_FAM15H_EVNTSEL0,
- MSR_AMD_FAM15H_EVNTSEL1,
- MSR_AMD_FAM15H_EVNTSEL2,
- MSR_AMD_FAM15H_EVNTSEL3,
- MSR_AMD_FAM15H_EVNTSEL4,
- MSR_AMD_FAM15H_EVNTSEL5
-};
-
-/* Bits [63:42], [39:36], 21 and 19 are reserved */
-#define CTRL_RSVD_MASK ((-1ULL & (~((1ULL << 42) - 1))) | \
- (0xfULL << 36) | (1ULL << 21) | (1ULL << 19))
-static uint64_t __read_mostly ctrl_rsvd[MAX_NUM_COUNTERS];
-
-/* Use private context as a flag for MSR bitmap */
-#define msr_bitmap_on(vpmu) do { \
- (vpmu)->priv_context = (void *)-1L; \
- } while (0)
-#define msr_bitmap_off(vpmu) do { \
- (vpmu)->priv_context = NULL; \
- } while (0)
-#define is_msr_bitmap_on(vpmu) ((vpmu)->priv_context != NULL)
-
-static inline int get_pmu_reg_type(u32 addr, unsigned int *idx)
-{
- if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) )
- {
- *idx = addr - MSR_K7_EVNTSEL0;
- return MSR_TYPE_CTRL;
- }
-
- if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) )
- {
- *idx = addr - MSR_K7_PERFCTR0;
- return MSR_TYPE_COUNTER;
- }
-
- if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) &&
- (addr <= MSR_AMD_FAM15H_PERFCTR5 ) )
- {
- *idx = (addr - MSR_AMD_FAM15H_EVNTSEL0) >> 1;
- if (addr & 1)
- return MSR_TYPE_COUNTER;
- else
- return MSR_TYPE_CTRL;
- }
-
- /* unsupported registers */
- return -1;
-}
-
-static inline u32 get_fam15h_addr(u32 addr)
-{
- switch ( addr )
- {
- case MSR_K7_PERFCTR0:
- return MSR_AMD_FAM15H_PERFCTR0;
- case MSR_K7_PERFCTR1:
- return MSR_AMD_FAM15H_PERFCTR1;
- case MSR_K7_PERFCTR2:
- return MSR_AMD_FAM15H_PERFCTR2;
- case MSR_K7_PERFCTR3:
- return MSR_AMD_FAM15H_PERFCTR3;
- case MSR_K7_EVNTSEL0:
- return MSR_AMD_FAM15H_EVNTSEL0;
- case MSR_K7_EVNTSEL1:
- return MSR_AMD_FAM15H_EVNTSEL1;
- case MSR_K7_EVNTSEL2:
- return MSR_AMD_FAM15H_EVNTSEL2;
- case MSR_K7_EVNTSEL3:
- return MSR_AMD_FAM15H_EVNTSEL3;
- default:
- break;
- }
-
- return addr;
-}
-
-static void amd_vpmu_init_regs(struct xen_pmu_amd_ctxt *ctxt)
-{
- unsigned i;
- uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
- memset(&ctxt->regs[0], 0, regs_sz);
- for ( i = 0; i < num_counters; i++ )
- ctrl_regs[i] = ctrl_rsvd[i];
-}
-
-static void amd_vpmu_set_msr_bitmap(struct vcpu *v)
-{
- unsigned int i;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- for ( i = 0; i < num_counters; i++ )
- {
- svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE);
- svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE);
- }
-
- msr_bitmap_on(vpmu);
-}
-
-static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
-{
- unsigned int i;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- for ( i = 0; i < num_counters; i++ )
- {
- svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW);
- svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW);
- }
-
- msr_bitmap_off(vpmu);
-}
-
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
- return 1;
-}
-
-static inline void context_load(struct vcpu *v)
-{
- unsigned int i;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
- uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
- uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
- for ( i = 0; i < num_counters; i++ )
- {
- wrmsrl(counters[i], counter_regs[i]);
- wrmsrl(ctrls[i], ctrl_regs[i]);
- }
-}
-
-static int amd_vpmu_load(struct vcpu *v, bool_t from_guest)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_amd_ctxt *ctxt;
- uint64_t *ctrl_regs;
- unsigned int i;
-
- vpmu_reset(vpmu, VPMU_FROZEN);
-
- if ( !from_guest && vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- {
- ctxt = vpmu->context;
- ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
- for ( i = 0; i < num_counters; i++ )
- wrmsrl(ctrls[i], ctrl_regs[i]);
-
- return 0;
- }
-
- if ( from_guest )
- {
- bool_t is_running = 0;
- struct xen_pmu_amd_ctxt *guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
-
- ASSERT(!is_hvm_vcpu(v));
-
- ctxt = vpmu->context;
- ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
- memcpy(&ctxt->regs[0], &guest_ctxt->regs[0], regs_sz);
-
- for ( i = 0; i < num_counters; i++ )
- {
- if ( (ctrl_regs[i] & CTRL_RSVD_MASK) != ctrl_rsvd[i] )
- {
- /*
- * Not necessary to re-init context since we should never load
- * it until guest provides valid values. But just to be safe.
- */
- amd_vpmu_init_regs(ctxt);
- return -EINVAL;
- }
-
- if ( is_pmu_enabled(ctrl_regs[i]) )
- is_running = 1;
- }
-
- if ( is_running )
- vpmu_set(vpmu, VPMU_RUNNING);
- else
- vpmu_reset(vpmu, VPMU_RUNNING);
- }
-
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-
- context_load(v);
-
- return 0;
-}
-
-static inline void context_save(struct vcpu *v)
-{
- unsigned int i;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
- uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
-
- /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */
- for ( i = 0; i < num_counters; i++ )
- rdmsrl(counters[i], counter_regs[i]);
-}
-
-static int amd_vpmu_save(struct vcpu *v, bool_t to_guest)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- unsigned int i;
-
- /* Stop the counters. */
- for ( i = 0; i < num_counters; i++ )
- wrmsrl(ctrls[i], 0);
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
- {
- vpmu_set(vpmu, VPMU_FROZEN);
- return 0;
- }
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- return 0;
-
- context_save(v);
-
- if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
- has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
- amd_vpmu_unset_msr_bitmap(v);
-
- if ( to_guest )
- {
- struct xen_pmu_amd_ctxt *guest_ctxt, *ctxt;
-
- ASSERT(!is_hvm_vcpu(v));
- ctxt = vpmu->context;
- guest_ctxt = &vpmu->xenpmu_data->pmu.c.amd;
- memcpy(&guest_ctxt->regs[0], &ctxt->regs[0], regs_sz);
- }
-
- return 1;
-}
-
-static void context_update(unsigned int msr, u64 msr_content)
-{
- unsigned int i;
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
- uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
- uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
-
- if ( k7_counters_mirrored &&
- ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) )
- {
- msr = get_fam15h_addr(msr);
- }
-
- for ( i = 0; i < num_counters; i++ )
- {
- if ( msr == ctrls[i] )
- {
- ctrl_regs[i] = msr_content;
- return;
- }
- else if (msr == counters[i] )
- {
- counter_regs[i] = msr_content;
- return;
- }
- }
-}
-
-static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
- uint64_t supported)
-{
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- unsigned int idx = 0;
- int type = get_pmu_reg_type(msr, &idx);
-
- ASSERT(!supported);
-
- if ( (type == MSR_TYPE_CTRL ) &&
- ((msr_content & CTRL_RSVD_MASK) != ctrl_rsvd[idx]) )
- return -EINVAL;
-
- /* For all counters, enable guest only mode for HVM guest */
- if ( has_hvm_container_vcpu(v) && (type == MSR_TYPE_CTRL) &&
- !is_guest_mode(msr_content) )
- {
- set_guest_mode(msr_content);
- }
-
- /* check if the first counter is enabled */
- if ( (type == MSR_TYPE_CTRL) &&
- is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) )
- {
- if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
- return 0;
- vpmu_set(vpmu, VPMU_RUNNING);
-
- if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
- amd_vpmu_set_msr_bitmap(v);
- }
-
- /* stop saving & restore if guest stops first counter */
- if ( (type == MSR_TYPE_CTRL) &&
- (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
- {
- vpmu_reset(vpmu, VPMU_RUNNING);
- if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
- amd_vpmu_unset_msr_bitmap(v);
- release_pmu_ownship(PMU_OWNER_HVM);
- }
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
- || vpmu_is_set(vpmu, VPMU_FROZEN) )
- {
- context_load(v);
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
- vpmu_reset(vpmu, VPMU_FROZEN);
- }
-
- /* Update vpmu context immediately */
- context_update(msr, msr_content);
-
- /* Write to hw counters */
- wrmsrl(msr, msr_content);
- return 0;
-}
-
-static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED)
- || vpmu_is_set(vpmu, VPMU_FROZEN) )
- {
- context_load(v);
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
- vpmu_reset(vpmu, VPMU_FROZEN);
- }
-
- rdmsrl(msr, *msr_content);
-
- return 0;
-}
-
-static void amd_vpmu_destroy(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( has_hvm_container_vcpu(v) && is_msr_bitmap_on(vpmu) )
- amd_vpmu_unset_msr_bitmap(v);
-
- xfree(vpmu->context);
-
- if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
- release_pmu_ownship(PMU_OWNER_HVM);
-
- vpmu_clear(vpmu);
-}
-
-/* VPMU part of the 'q' keyhandler */
-static void amd_vpmu_dump(const struct vcpu *v)
-{
- const struct vpmu_struct *vpmu = vcpu_vpmu(v);
- const struct xen_pmu_amd_ctxt *ctxt = vpmu->context;
- const uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters);
- const uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls);
- unsigned int i;
-
- printk(" VPMU state: 0x%x ", vpmu->flags);
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
- {
- printk("\n");
- return;
- }
-
- printk("(");
- if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) )
- printk("PASSIVE_DOMAIN_ALLOCATED, ");
- if ( vpmu_is_set(vpmu, VPMU_FROZEN) )
- printk("FROZEN, ");
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
- printk("SAVE, ");
- if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
- printk("RUNNING, ");
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- printk("LOADED, ");
- printk("ALLOCATED)\n");
-
- for ( i = 0; i < num_counters; i++ )
- {
- uint64_t ctrl, cntr;
-
- rdmsrl(ctrls[i], ctrl);
- rdmsrl(counters[i], cntr);
- printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n",
- ctrls[i], ctrl_regs[i], ctrl,
- counters[i], counter_regs[i], cntr);
- }
-}
-
-struct arch_vpmu_ops amd_vpmu_ops = {
- .do_wrmsr = amd_vpmu_do_wrmsr,
- .do_rdmsr = amd_vpmu_do_rdmsr,
- .do_interrupt = amd_vpmu_do_interrupt,
- .arch_vpmu_destroy = amd_vpmu_destroy,
- .arch_vpmu_save = amd_vpmu_save,
- .arch_vpmu_load = amd_vpmu_load,
- .arch_vpmu_dump = amd_vpmu_dump
-};
-
-int svm_vpmu_initialise(struct vcpu *v)
-{
- struct xen_pmu_amd_ctxt *ctxt;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu_mode == XENPMU_MODE_OFF )
- return 0;
-
- if ( !counters )
- return -EINVAL;
-
- ctxt = xmalloc_bytes(sizeof(*ctxt) + regs_sz);
- if ( !ctxt )
- {
- printk(XENLOG_G_WARNING "Insufficient memory for PMU, "
- " PMU feature is unavailable on domain %d vcpu %d.\n",
- v->vcpu_id, v->domain->domain_id);
- return -ENOMEM;
- }
-
- ctxt->counters = sizeof(*ctxt);
- ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * num_counters;
- amd_vpmu_init_regs(ctxt);
-
- vpmu->context = ctxt;
- vpmu->priv_context = NULL;
-
- if ( !is_hvm_vcpu(v) )
- {
- /* Copy register offsets to shared area */
- ASSERT(vpmu->xenpmu_data);
- memcpy(&vpmu->xenpmu_data->pmu.c.amd, ctxt,
- offsetof(struct xen_pmu_amd_ctxt, regs));
- }
-
- vpmu->arch_vpmu_ops = &amd_vpmu_ops;
-
- vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
- return 0;
-}
-
-int __init amd_vpmu_init(void)
-{
- unsigned int i;
-
- switch ( current_cpu_data.x86 )
- {
- case 0x15:
- num_counters = F15H_NUM_COUNTERS;
- counters = AMD_F15H_COUNTERS;
- ctrls = AMD_F15H_CTRLS;
- k7_counters_mirrored = 1;
- break;
- case 0x10:
- case 0x12:
- case 0x14:
- case 0x16:
- num_counters = F10H_NUM_COUNTERS;
- counters = AMD_F10H_COUNTERS;
- ctrls = AMD_F10H_CTRLS;
- k7_counters_mirrored = 0;
- break;
- default:
- printk(XENLOG_WARNING "VPMU: Unsupported CPU family %#x\n",
- current_cpu_data.x86);
- return -EINVAL;
- }
-
- if ( sizeof(struct xen_pmu_data) +
- 2 * sizeof(uint64_t) * num_counters > PAGE_SIZE )
- {
- printk(XENLOG_WARNING
- "VPMU: Register bank does not fit into VPMU shared page\n");
- counters = ctrls = NULL;
- num_counters = 0;
- return -ENOSPC;
- }
-
- for ( i = 0; i < num_counters; i++ )
- {
- rdmsrl(ctrls[i], ctrl_rsvd[i]);
- ctrl_rsvd[i] &= CTRL_RSVD_MASK;
- }
-
- regs_sz = 2 * sizeof(uint64_t) * num_counters;
-
- return 0;
-}
-
#include <asm/page.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <asm/vpmu.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/nestedhvm.h>
-#include <asm/hvm/vpmu.h>
#include <public/hvm/ioreq.h>
#include <public/hvm/params.h>
obj-y += realmode.o
obj-y += vmcs.o
obj-y += vmx.o
-obj-y += vpmu_core2.o
obj-y += vvmx.o
+++ /dev/null
-/*
- * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/xenoprof.h>
-#include <xen/irq.h>
-#include <asm/system.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/apic.h>
-#include <asm/traps.h>
-#include <asm/msr.h>
-#include <asm/msr-index.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vlapic.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <public/sched.h>
-#include <public/hvm/save.h>
-#include <public/pmu.h>
-#include <asm/hvm/vpmu.h>
-
-/*
- * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
- * instruction.
- * cpuid 0xa - Architectural Performance Monitoring Leaf
- * Register eax
- */
-#define PMU_VERSION_SHIFT 0 /* Version ID */
-#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */
-#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
-
-#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */
-#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */
-#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
-
-#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */
-#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */
-#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
-/* Register edx */
-#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */
-#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */
-#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
-
-#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */
-#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */
-#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
-
-/* Alias registers (0x4c1) for full-width writes to PMCs */
-#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
-static bool_t __read_mostly full_width_write;
-
-/* Intel-specific VPMU features */
-#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */
-#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */
-
-/*
- * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
- * counters. 4 bits for every counter.
- */
-#define FIXED_CTR_CTRL_BITS 4
-#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
-
-#define ARCH_CNTR_ENABLED (1ULL << 22)
-
-/* Number of general-purpose and fixed performance counters */
-static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
-
-/* Masks used for testing whether and MSR is valid */
-#define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21))
-static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
-static uint64_t __read_mostly global_ovf_ctrl_mask;
-
-/* Total size of PMU registers block (copied to/from PV(H) guest) */
-static unsigned int __read_mostly regs_sz;
-/* Offset into context of the beginning of PMU register block */
-static const unsigned int regs_off =
- sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
- sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
-
-/*
- * QUIRK to workaround an issue on various family 6 cpus.
- * The issue leads to endless PMC interrupt loops on the processor.
- * If the interrupt handler is running and a pmc reaches the value 0, this
- * value remains forever and it triggers immediately a new interrupt after
- * finishing the handler.
- * A workaround is to read all flagged counters and if the value is 0 write
- * 1 (or another value != 0) into it.
- * There exist no errata and the real cause of this behaviour is unknown.
- */
-bool_t __read_mostly is_pmc_quirk;
-
-static void check_pmc_quirk(void)
-{
- if ( current_cpu_data.x86 == 6 )
- is_pmc_quirk = 1;
- else
- is_pmc_quirk = 0;
-}
-
-static void handle_pmc_quirk(u64 msr_content)
-{
- int i;
- u64 val;
-
- if ( !is_pmc_quirk )
- return;
-
- val = msr_content;
- for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- if ( val & 0x1 )
- {
- u64 cnt;
- rdmsrl(MSR_P6_PERFCTR(i), cnt);
- if ( cnt == 0 )
- wrmsrl(MSR_P6_PERFCTR(i), 1);
- }
- val >>= 1;
- }
- val = msr_content >> 32;
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- if ( val & 0x1 )
- {
- u64 cnt;
- rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
- if ( cnt == 0 )
- wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
- }
- val >>= 1;
- }
-}
-
-/*
- * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
- */
-static int core2_get_arch_pmc_count(void)
-{
- u32 eax;
-
- eax = cpuid_eax(0xa);
- return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
-}
-
-/*
- * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
- */
-static int core2_get_fixed_pmc_count(void)
-{
- u32 eax;
-
- eax = cpuid_eax(0xa);
- return MASK_EXTR(eax, PMU_FIXED_NR_MASK);
-}
-
-/* edx bits 5-12: Bit width of fixed-function performance counters */
-static int core2_get_bitwidth_fix_count(void)
-{
- u32 edx;
-
- edx = cpuid_edx(0xa);
- return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
-}
-
-static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
-{
- u32 msr_index_pmc;
-
- switch ( msr_index )
- {
- case MSR_CORE_PERF_FIXED_CTR_CTRL:
- case MSR_IA32_DS_AREA:
- case MSR_IA32_PEBS_ENABLE:
- *type = MSR_TYPE_CTRL;
- return 1;
-
- case MSR_CORE_PERF_GLOBAL_CTRL:
- case MSR_CORE_PERF_GLOBAL_STATUS:
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- *type = MSR_TYPE_GLOBAL;
- return 1;
-
- default:
-
- if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
- (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
- {
- *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
- *type = MSR_TYPE_COUNTER;
- return 1;
- }
-
- if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
- (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
- {
- *index = msr_index - MSR_P6_EVNTSEL(0);
- *type = MSR_TYPE_ARCH_CTRL;
- return 1;
- }
-
- msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
- if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
- (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
- {
- *type = MSR_TYPE_ARCH_COUNTER;
- *index = msr_index_pmc - MSR_IA32_PERFCTR0;
- return 1;
- }
- return 0;
- }
-}
-
-static inline int msraddr_to_bitpos(int x)
-{
- ASSERT(x == (x & 0x1fff));
- return x;
-}
-
-static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap)
-{
- int i;
-
- /* Allow Read/Write PMU Counters MSR Directly. */
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
- clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
- }
- for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
- clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
-
- if ( full_width_write )
- {
- clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
- clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
- }
- }
-
- /* Allow Read PMU Non-global Controls Directly. */
- for ( i = 0; i < arch_pmc_cnt; i++ )
- clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
-
- clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
- clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
- clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
-}
-
-static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap)
-{
- int i;
-
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap);
- set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
- }
- for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap);
- set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
-
- if ( full_width_write )
- {
- set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap);
- set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i),
- msr_bitmap + 0x800/BYTES_PER_LONG);
- }
- }
-
- for ( i = 0; i < arch_pmc_cnt; i++ )
- set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL(i)), msr_bitmap);
-
- set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap);
- set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap);
- set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap);
-}
-
-static inline void __core2_vpmu_save(struct vcpu *v)
-{
- int i;
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
- uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
- struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
- vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
- for ( i = 0; i < arch_pmc_cnt; i++ )
- rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
-
- if ( !has_hvm_container_vcpu(v) )
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
-}
-
-static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( !has_hvm_container_vcpu(v) )
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
- if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
- return 0;
-
- __core2_vpmu_save(v);
-
- /* Unset PMU MSR bitmap to trap lazy load. */
- if ( !vpmu_is_set(vpmu, VPMU_RUNNING) &&
- has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
- core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
-
- if ( to_guest )
- {
- ASSERT(!is_hvm_vcpu(v));
- memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
- vpmu->context + regs_off, regs_sz);
- }
-
- return 1;
-}
-
-static inline void __core2_vpmu_load(struct vcpu *v)
-{
- unsigned int i, pmc_start;
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
- uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
- struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
- vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
-
- if ( full_width_write )
- pmc_start = MSR_IA32_A_PERFCTR0;
- else
- pmc_start = MSR_IA32_PERFCTR0;
- for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
- wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
- }
-
- wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
- wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
- wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable);
-
- if ( !has_hvm_container_vcpu(v) )
- {
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
- core2_vpmu_cxt->global_ovf_ctrl = 0;
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
- }
-}
-
-static int core2_vpmu_verify(struct vcpu *v)
-{
- unsigned int i;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
- uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
- struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
- vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
- uint64_t fixed_ctrl;
- uint64_t *priv_context = vpmu->priv_context;
- uint64_t enabled_cntrs = 0;
-
- if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
- return -EINVAL;
-
- fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
- if ( fixed_ctrl & fixed_ctrl_mask )
- return -EINVAL;
-
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- if ( fixed_counters[i] & fixed_counters_mask )
- return -EINVAL;
- if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
- enabled_cntrs |= (1ULL << i);
- }
- enabled_cntrs <<= 32;
-
- for ( i = 0; i < arch_pmc_cnt; i++ )
- {
- uint64_t control = xen_pmu_cntr_pair[i].control;
-
- if ( control & ARCH_CTRL_MASK )
- return -EINVAL;
- if ( control & ARCH_CNTR_ENABLED )
- enabled_cntrs |= (1ULL << i);
- }
-
- if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) &&
- !is_canonical_address(core2_vpmu_cxt->ds_area) )
- return -EINVAL;
-
- if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
- (core2_vpmu_cxt->ds_area != 0) )
- vpmu_set(vpmu, VPMU_RUNNING);
- else
- vpmu_reset(vpmu, VPMU_RUNNING);
-
- *priv_context = enabled_cntrs;
-
- return 0;
-}
-
-static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- return 0;
-
- if ( from_guest )
- {
- int ret;
-
- ASSERT(!is_hvm_vcpu(v));
-
- memcpy(vpmu->context + regs_off,
- (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
- regs_sz);
-
- ret = core2_vpmu_verify(v);
- if ( ret )
- {
- /*
- * Not necessary since we should never load the context until
- * guest provides valid values. But just to be safe.
- */
- memset(vpmu->context + regs_off, 0, regs_sz);
- return ret;
- }
- }
-
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
-
- __core2_vpmu_load(v);
-
- return 0;
-}
-
-static int core2_vpmu_alloc_resource(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
- uint64_t *p = NULL;
-
- if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
- return 0;
-
- if ( has_hvm_container_vcpu(v) )
- {
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
- goto out_err;
-
- if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
- goto out_err;
- vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- }
-
- core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
- sizeof(uint64_t) * fixed_pmc_cnt +
- sizeof(struct xen_pmu_cntr_pair) *
- arch_pmc_cnt);
- p = xzalloc(uint64_t);
- if ( !core2_vpmu_cxt || !p )
- goto out_err;
-
- core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
- core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
- sizeof(uint64_t) * fixed_pmc_cnt;
-
- vpmu->context = core2_vpmu_cxt;
- vpmu->priv_context = p;
-
- if ( !is_hvm_vcpu(v) )
- {
- /* Copy fixed/arch register offsets to shared area */
- ASSERT(vpmu->xenpmu_data);
- memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
- }
-
- vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
-
- return 1;
-
-out_err:
- release_pmu_ownship(PMU_OWNER_HVM);
-
- xfree(core2_vpmu_cxt);
- xfree(p);
-
- printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
- v->vcpu_id, v->domain->domain_id);
-
- return 0;
-}
-
-static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( !is_core2_vpmu_msr(msr_index, type, index) )
- return 0;
-
- if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
- !core2_vpmu_alloc_resource(current) )
- return 0;
-
- /* Do the lazy load staff. */
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- {
- __core2_vpmu_load(current);
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
- if ( has_hvm_container_vcpu(current) &&
- cpu_has_vmx_msr_bitmap )
- core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
- }
- return 1;
-}
-
-static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
- uint64_t supported)
-{
- int i, tmp;
- int type = -1, index = -1;
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
- uint64_t *enabled_cntrs;
-
- if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
- {
- /* Special handling for BTS */
- if ( msr == MSR_IA32_DEBUGCTLMSR )
- {
- supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
- IA32_DEBUGCTLMSR_BTINT;
-
- if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
- supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
- IA32_DEBUGCTLMSR_BTS_OFF_USR;
- if ( !(msr_content & ~supported) &&
- vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
- return 0;
- if ( (msr_content & supported) &&
- !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
- printk(XENLOG_G_WARNING
- "%pv: Debug Store unsupported on this CPU\n",
- current);
- }
- return -EINVAL;
- }
-
- ASSERT(!supported);
-
- if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
- /* Writing unsupported bits to a fixed counter */
- return -EINVAL;
-
- core2_vpmu_cxt = vpmu->context;
- enabled_cntrs = vpmu->priv_context;
- switch ( msr )
- {
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if ( msr_content & global_ovf_ctrl_mask )
- return -EINVAL;
- core2_vpmu_cxt->global_status &= ~msr_content;
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
- return 0;
- case MSR_CORE_PERF_GLOBAL_STATUS:
- gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
- "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
- return -EINVAL;
- case MSR_IA32_PEBS_ENABLE:
- if ( msr_content & 1 )
- gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, "
- "which is not supported.\n");
- core2_vpmu_cxt->pebs_enable = msr_content;
- return 0;
- case MSR_IA32_DS_AREA:
- if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
- {
- if ( !is_canonical_address(msr_content) )
- {
- gdprintk(XENLOG_WARNING,
- "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
- msr_content);
- return -EINVAL;
- }
- core2_vpmu_cxt->ds_area = msr_content;
- break;
- }
- gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
- return 0;
- case MSR_CORE_PERF_GLOBAL_CTRL:
- core2_vpmu_cxt->global_ctrl = msr_content;
- break;
- case MSR_CORE_PERF_FIXED_CTR_CTRL:
- if ( msr_content & fixed_ctrl_mask )
- return -EINVAL;
-
- if ( has_hvm_container_vcpu(v) )
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
- &core2_vpmu_cxt->global_ctrl);
- else
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
- *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
- if ( msr_content != 0 )
- {
- u64 val = msr_content;
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- if ( val & 3 )
- *enabled_cntrs |= (1ULL << 32) << i;
- val >>= FIXED_CTR_CTRL_BITS;
- }
- }
-
- core2_vpmu_cxt->fixed_ctrl = msr_content;
- break;
- default:
- tmp = msr - MSR_P6_EVNTSEL(0);
- if ( tmp >= 0 && tmp < arch_pmc_cnt )
- {
- struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
- vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
-
- if ( msr_content & ARCH_CTRL_MASK )
- return -EINVAL;
-
- if ( has_hvm_container_vcpu(v) )
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
- &core2_vpmu_cxt->global_ctrl);
- else
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
-
- if ( msr_content & ARCH_CNTR_ENABLED )
- *enabled_cntrs |= 1ULL << tmp;
- else
- *enabled_cntrs &= ~(1ULL << tmp);
-
- xen_pmu_cntr_pair[tmp].control = msr_content;
- }
- }
-
- if ( type != MSR_TYPE_GLOBAL )
- wrmsrl(msr, msr_content);
- else
- {
- if ( has_hvm_container_vcpu(v) )
- vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
- else
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
- }
-
- if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
- (core2_vpmu_cxt->ds_area != 0) )
- vpmu_set(vpmu, VPMU_RUNNING);
- else
- vpmu_reset(vpmu, VPMU_RUNNING);
-
- return 0;
-}
-
-static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
- int type = -1, index = -1;
- struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
-
- if ( core2_vpmu_msr_common_check(msr, &type, &index) )
- {
- core2_vpmu_cxt = vpmu->context;
- switch ( msr )
- {
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- *msr_content = 0;
- break;
- case MSR_CORE_PERF_GLOBAL_STATUS:
- *msr_content = core2_vpmu_cxt->global_status;
- break;
- case MSR_CORE_PERF_GLOBAL_CTRL:
- if ( has_hvm_container_vcpu(v) )
- vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
- else
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
- break;
- default:
- rdmsrl(msr, *msr_content);
- }
- }
- else if ( msr == MSR_IA32_MISC_ENABLE )
- {
- /* Extension for BTS */
- if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
- *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
- }
-
- return 0;
-}
-
-static void core2_vpmu_do_cpuid(unsigned int input,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- if (input == 0x1)
- {
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
- {
- /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */
- *edx |= cpufeat_mask(X86_FEATURE_DS);
- if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) )
- *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
- if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
- *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
- }
- }
-}
-
-/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
-static void core2_vpmu_dump(const struct vcpu *v)
-{
- const struct vpmu_struct *vpmu = vcpu_vpmu(v);
- unsigned int i;
- const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
- u64 val;
- uint64_t *fixed_counters;
- struct xen_pmu_cntr_pair *cntr_pair;
-
- if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
- return;
-
- if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
- {
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- printk(" vPMU loaded\n");
- else
- printk(" vPMU allocated\n");
- return;
- }
-
- printk(" vPMU running\n");
-
- cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
- fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
-
- /* Print the contents of the counter and its configuration msr. */
- for ( i = 0; i < arch_pmc_cnt; i++ )
- printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
- i, cntr_pair[i].counter, cntr_pair[i].control);
-
- /*
- * The configuration of the fixed counter is 4 bits each in the
- * MSR_CORE_PERF_FIXED_CTR_CTRL.
- */
- val = core2_vpmu_cxt->fixed_ctrl;
- for ( i = 0; i < fixed_pmc_cnt; i++ )
- {
- printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
- i, fixed_counters[i],
- val & FIXED_CTR_CTRL_MASK);
- val >>= FIXED_CTR_CTRL_BITS;
- }
-}
-
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
- struct vcpu *v = current;
- u64 msr_content;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
-
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
- if ( msr_content )
- {
- if ( is_pmc_quirk )
- handle_pmc_quirk(msr_content);
- core2_vpmu_cxt->global_status |= msr_content;
- msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1);
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
- }
- else
- {
- /* No PMC overflow but perhaps a Trace Message interrupt. */
- __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
- if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
- return 0;
- }
-
- return 1;
-}
-
-static void core2_vpmu_destroy(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- xfree(vpmu->context);
- xfree(vpmu->priv_context);
- if ( has_hvm_container_vcpu(v) && cpu_has_vmx_msr_bitmap )
- core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
- release_pmu_ownship(PMU_OWNER_HVM);
- vpmu_clear(vpmu);
-}
-
-struct arch_vpmu_ops core2_vpmu_ops = {
- .do_wrmsr = core2_vpmu_do_wrmsr,
- .do_rdmsr = core2_vpmu_do_rdmsr,
- .do_interrupt = core2_vpmu_do_interrupt,
- .do_cpuid = core2_vpmu_do_cpuid,
- .arch_vpmu_destroy = core2_vpmu_destroy,
- .arch_vpmu_save = core2_vpmu_save,
- .arch_vpmu_load = core2_vpmu_load,
- .arch_vpmu_dump = core2_vpmu_dump
-};
-
-static void core2_no_vpmu_do_cpuid(unsigned int input,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /*
- * As in this case the vpmu is not enabled reset some bits in the
- * architectural performance monitoring related part.
- */
- if ( input == 0xa )
- {
- *eax &= ~PMU_VERSION_MASK;
- *eax &= ~PMU_GENERAL_NR_MASK;
- *eax &= ~PMU_GENERAL_WIDTH_MASK;
-
- *edx &= ~PMU_FIXED_NR_MASK;
- *edx &= ~PMU_FIXED_WIDTH_MASK;
- }
-}
-
-/*
- * If its a vpmu msr set it to 0.
- */
-static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
- int type = -1, index = -1;
- if ( !is_core2_vpmu_msr(msr, &type, &index) )
- return -EINVAL;
- *msr_content = 0;
- return 0;
-}
-
-/*
- * These functions are used in case vpmu is not enabled.
- */
-struct arch_vpmu_ops core2_no_vpmu_ops = {
- .do_rdmsr = core2_no_vpmu_do_rdmsr,
- .do_cpuid = core2_no_vpmu_do_cpuid,
-};
-
-int vmx_vpmu_initialise(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- u64 msr_content;
- static bool_t ds_warned;
-
- vpmu->arch_vpmu_ops = &core2_no_vpmu_ops;
- if ( vpmu_mode == XENPMU_MODE_OFF )
- return 0;
-
- if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
- return -EINVAL;
-
- if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
- goto func_out;
- /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
- while ( boot_cpu_has(X86_FEATURE_DS) )
- {
- if ( !boot_cpu_has(X86_FEATURE_DTES64) )
- {
- if ( !ds_warned )
- printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
- " - Debug Store disabled for guests\n");
- break;
- }
- vpmu_set(vpmu, VPMU_CPU_HAS_DS);
- rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
- if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
- {
- /* If BTS_UNAVAIL is set reset the DS feature. */
- vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
- if ( !ds_warned )
- printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
- " - Debug Store disabled for guests\n");
- break;
- }
-
- vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
- if ( !ds_warned )
- {
- if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
- printk(XENLOG_G_INFO
- "vpmu: CPU doesn't support CPL-Qualified BTS\n");
- printk("******************************************************\n");
- printk("** WARNING: Emulation of BTS Feature is switched on **\n");
- printk("** Using this processor feature in a virtualized **\n");
- printk("** environment is not 100%% safe. **\n");
- printk("** Setting the DS buffer address with wrong values **\n");
- printk("** may lead to hypervisor hangs or crashes. **\n");
- printk("** It is NOT recommended for production use! **\n");
- printk("******************************************************\n");
- }
- break;
- }
- ds_warned = 1;
- func_out:
-
- /* PV domains can allocate resources immediately */
- if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
- return -EIO;
-
- vpmu->arch_vpmu_ops = &core2_vpmu_ops;
-
- return 0;
-}
-
-int __init core2_vpmu_init(void)
-{
- u64 caps;
-
- if ( current_cpu_data.x86 != 6 )
- {
- printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
- return -EINVAL;
- }
-
- switch ( current_cpu_data.x86_model )
- {
- /* Core2: */
- case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
- case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
- case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
- case 0x1d: /* six-core 45 nm xeon "Dunnington" */
-
- case 0x2a: /* SandyBridge */
- case 0x2d: /* SandyBridge, "Romley-EP" */
-
- /* Nehalem: */
- case 0x1a: /* 45 nm nehalem, "Bloomfield" */
- case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */
- case 0x2e: /* 45 nm nehalem-ex, "Beckton" */
-
- /* Westmere: */
- case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */
- case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */
- case 0x2f: /* 32 nm Westmere-EX */
-
- case 0x3a: /* IvyBridge */
- case 0x3e: /* IvyBridge EP */
-
- /* Haswell: */
- case 0x3c:
- case 0x3f:
- case 0x45:
- case 0x46:
-
- /* Broadwell */
- case 0x3d:
- case 0x4f:
- case 0x56:
-
- /* future: */
- case 0x4e:
-
- /* next gen Xeon Phi */
- case 0x57:
- break;
-
- default:
- printk(XENLOG_WARNING "VPMU: Unsupported CPU model %#x\n",
- current_cpu_data.x86_model);
- return -EINVAL;
- }
-
- arch_pmc_cnt = core2_get_arch_pmc_count();
- fixed_pmc_cnt = core2_get_fixed_pmc_count();
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
- full_width_write = (caps >> 13) & 1;
-
- fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
- fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
- global_ovf_ctrl_mask = ~(0xC000000000000000 |
- (((1ULL << fixed_pmc_cnt) - 1) << 32) |
- ((1ULL << arch_pmc_cnt) - 1));
-
- regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
- sizeof(uint64_t) * fixed_pmc_cnt +
- sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
-
- check_pmc_quirk();
-
- if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
- sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
- {
- printk(XENLOG_WARNING
- "VPMU: Register bank does not fit into VPMU share page\n");
- arch_pmc_cnt = fixed_pmc_cnt = 0;
- return -ENOSPC;
- }
-
- return 0;
-}
-
+++ /dev/null
-/*
- * vpmu.c: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/xenoprof.h>
-#include <xen/event.h>
-#include <xen/guest_access.h>
-#include <asm/regs.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/nmi.h>
-#include <asm/p2m.h>
-#include <asm/hvm/support.h>
-#include <asm/hvm/vmx/vmx.h>
-#include <asm/hvm/vmx/vmcs.h>
-#include <asm/hvm/vpmu.h>
-#include <asm/hvm/svm/svm.h>
-#include <asm/hvm/svm/vmcb.h>
-#include <asm/apic.h>
-#include <public/pmu.h>
-#include <xsm/xsm.h>
-
-#include <compat/pmu.h>
-CHECK_pmu_cntr_pair;
-CHECK_pmu_data;
-CHECK_pmu_params;
-
-/*
- * "vpmu" : vpmu generally enabled
- * "vpmu=off" : vpmu generally disabled
- * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
- */
-static unsigned int __read_mostly opt_vpmu_enabled;
-unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
-unsigned int __read_mostly vpmu_features = 0;
-static void parse_vpmu_param(char *s);
-custom_param("vpmu", parse_vpmu_param);
-
-static DEFINE_SPINLOCK(vpmu_lock);
-static unsigned vpmu_count;
-
-static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
-
-static void __init parse_vpmu_param(char *s)
-{
- switch ( parse_bool(s) )
- {
- case 0:
- break;
- default:
- if ( !strcmp(s, "bts") )
- vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
- else if ( *s )
- {
- printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
- break;
- }
- /* fall through */
- case 1:
- /* Default VPMU mode */
- vpmu_mode = XENPMU_MODE_SELF;
- opt_vpmu_enabled = 1;
- break;
- }
-}
-
-void vpmu_lvtpc_update(uint32_t val)
-{
- struct vpmu_struct *vpmu;
- struct vcpu *curr = current;
-
- if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
- return;
-
- vpmu = vcpu_vpmu(curr);
-
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
-
- /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
- if ( is_hvm_vcpu(curr) || !vpmu->xenpmu_data ||
- !vpmu_is_set(vpmu, VPMU_CACHED) )
- apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
-}
-
-int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
- uint64_t supported, bool_t is_write)
-{
- struct vcpu *curr = current;
- struct vpmu_struct *vpmu;
- const struct arch_vpmu_ops *ops;
- int ret = 0;
-
- if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
- ((vpmu_mode & XENPMU_MODE_ALL) &&
- !is_hardware_domain(current->domain)) )
- goto nop;
-
- vpmu = vcpu_vpmu(curr);
- ops = vpmu->arch_vpmu_ops;
- if ( !ops )
- goto nop;
-
- if ( is_write && ops->do_wrmsr )
- ret = ops->do_wrmsr(msr, *msr_content, supported);
- else if ( !is_write && ops->do_rdmsr )
- ret = ops->do_rdmsr(msr, msr_content);
- else
- goto nop;
-
- /*
- * We may have received a PMU interrupt while handling MSR access
- * and since do_wr/rdmsr may load VPMU context we should save
- * (and unload) it again.
- */
- if ( !is_hvm_vcpu(curr) && vpmu->xenpmu_data &&
- vpmu_is_set(vpmu, VPMU_CACHED) )
- {
- vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
- ops->arch_vpmu_save(curr, 0);
- vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
- }
-
- return ret;
-
- nop:
- if ( !is_write )
- *msr_content = 0;
-
- return 0;
-}
-
-static inline struct vcpu *choose_hwdom_vcpu(void)
-{
- unsigned idx;
-
- if ( hardware_domain->max_vcpus == 0 )
- return NULL;
-
- idx = smp_processor_id() % hardware_domain->max_vcpus;
-
- return hardware_domain->vcpu[idx];
-}
-
-void vpmu_do_interrupt(struct cpu_user_regs *regs)
-{
- struct vcpu *sampled = current, *sampling;
- struct vpmu_struct *vpmu;
- struct vlapic *vlapic;
- u32 vlapic_lvtpc;
-
- /*
- * dom0 will handle interrupt for special domains (e.g. idle domain) or,
- * in XENPMU_MODE_ALL, for everyone.
- */
- if ( (vpmu_mode & XENPMU_MODE_ALL) ||
- (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
- {
- sampling = choose_hwdom_vcpu();
- if ( !sampling )
- return;
- }
- else
- sampling = sampled;
-
- vpmu = vcpu_vpmu(sampling);
- if ( !vpmu->arch_vpmu_ops )
- return;
-
- /* PV(H) guest */
- if ( !is_hvm_vcpu(sampling) || (vpmu_mode & XENPMU_MODE_ALL) )
- {
- const struct cpu_user_regs *cur_regs;
- uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
- domid_t domid;
-
- if ( !vpmu->xenpmu_data )
- return;
-
- if ( is_pvh_vcpu(sampling) &&
- !(vpmu_mode & XENPMU_MODE_ALL) &&
- !vpmu->arch_vpmu_ops->do_interrupt(regs) )
- return;
-
- if ( vpmu_is_set(vpmu, VPMU_CACHED) )
- return;
-
- /* PV guest will be reading PMU MSRs from xenpmu_data */
- vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
- vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
- vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-
- if ( has_hvm_container_vcpu(sampled) )
- *flags = 0;
- else
- *flags = PMU_SAMPLE_PV;
-
- if ( sampled == sampling )
- domid = DOMID_SELF;
- else
- domid = sampled->domain->domain_id;
-
- /* Store appropriate registers in xenpmu_data */
- /* FIXME: 32-bit PVH should go here as well */
- if ( is_pv_32bit_vcpu(sampling) )
- {
- /*
- * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
- * and therefore we treat it the same way as a non-privileged
- * PV 32-bit domain.
- */
- struct compat_pmu_regs *cmp;
-
- cur_regs = guest_cpu_user_regs();
-
- cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
- cmp->ip = cur_regs->rip;
- cmp->sp = cur_regs->rsp;
- cmp->flags = cur_regs->eflags;
- cmp->ss = cur_regs->ss;
- cmp->cs = cur_regs->cs;
- if ( (cmp->cs & 3) > 1 )
- *flags |= PMU_SAMPLE_USER;
- }
- else
- {
- struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
-
- if ( (vpmu_mode & XENPMU_MODE_SELF) )
- cur_regs = guest_cpu_user_regs();
- else if ( !guest_mode(regs) &&
- is_hardware_domain(sampling->domain) )
- {
- cur_regs = regs;
- domid = DOMID_XEN;
- }
- else
- cur_regs = guest_cpu_user_regs();
-
- r->ip = cur_regs->rip;
- r->sp = cur_regs->rsp;
- r->flags = cur_regs->eflags;
-
- if ( !has_hvm_container_vcpu(sampled) )
- {
- r->ss = cur_regs->ss;
- r->cs = cur_regs->cs;
- if ( !(sampled->arch.flags & TF_kernel_mode) )
- *flags |= PMU_SAMPLE_USER;
- }
- else
- {
- struct segment_register seg;
-
- hvm_get_segment_register(sampled, x86_seg_cs, &seg);
- r->cs = seg.sel;
- hvm_get_segment_register(sampled, x86_seg_ss, &seg);
- r->ss = seg.sel;
- r->cpl = seg.attr.fields.dpl;
- if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
- *flags |= PMU_SAMPLE_REAL;
- }
- }
-
- vpmu->xenpmu_data->domain_id = domid;
- vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
- if ( is_hardware_domain(sampling->domain) )
- vpmu->xenpmu_data->pcpu_id = smp_processor_id();
- else
- vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
-
- vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
- apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
- *flags |= PMU_CACHED;
- vpmu_set(vpmu, VPMU_CACHED);
-
- send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
-
- return;
- }
-
- /* HVM guests */
- vlapic = vcpu_vlapic(sampling);
-
- /* We don't support (yet) HVM dom0 */
- ASSERT(sampling == sampled);
-
- if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
- !is_vlapic_lvtpc_enabled(vlapic) )
- return;
-
- vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
-
- switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
- {
- case APIC_MODE_FIXED:
- vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
- break;
- case APIC_MODE_NMI:
- sampling->nmi_pending = 1;
- break;
- }
-}
-
-void vpmu_do_cpuid(unsigned int input,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(current);
-
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid )
- vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx);
-}
-
-static void vpmu_save_force(void *arg)
-{
- struct vcpu *v = (struct vcpu *)arg;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- return;
-
- vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
-
- if ( vpmu->arch_vpmu_ops )
- (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
-
- vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
-
- per_cpu(last_vcpu, smp_processor_id()) = NULL;
-}
-
-void vpmu_save(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- int pcpu = smp_processor_id();
-
- if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
- return;
-
- vpmu->last_pcpu = pcpu;
- per_cpu(last_vcpu, pcpu) = v;
-
- if ( vpmu->arch_vpmu_ops )
- if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
- vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
-}
-
-int vpmu_load(struct vcpu *v, bool_t from_guest)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- int pcpu = smp_processor_id();
- struct vcpu *prev = NULL;
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
- return 0;
-
- /* First time this VCPU is running here */
- if ( vpmu->last_pcpu != pcpu )
- {
- /*
- * Get the context from last pcpu that we ran on. Note that if another
- * VCPU is running there it must have saved this VPCU's context before
- * startig to run (see below).
- * There should be no race since remote pcpu will disable interrupts
- * before saving the context.
- */
- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
- {
- on_selected_cpus(cpumask_of(vpmu->last_pcpu),
- vpmu_save_force, (void *)v, 1);
- vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
- }
- }
-
- /* Prevent forced context save from remote CPU */
- local_irq_disable();
-
- prev = per_cpu(last_vcpu, pcpu);
-
- if ( prev != v && prev )
- {
- vpmu = vcpu_vpmu(prev);
-
- /* Someone ran here before us */
- vpmu_save_force(prev);
- vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-
- vpmu = vcpu_vpmu(v);
- }
-
- local_irq_enable();
-
- /* Only when PMU is counting, we load PMU context immediately. */
- if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
- (!is_hvm_vcpu(vpmu_vcpu(vpmu)) && vpmu_is_set(vpmu, VPMU_CACHED)) )
- return 0;
-
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
- {
- int ret;
-
- apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
- /* Arch code needs to set VPMU_CONTEXT_LOADED */
- ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
- if ( ret )
- {
- apic_write_around(APIC_LVTPC,
- vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
- return ret;
- }
- }
-
- return 0;
-}
-
-void vpmu_initialise(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
- uint8_t vendor = current_cpu_data.x86_vendor;
- int ret;
- bool_t is_priv_vpmu = is_hardware_domain(v->domain);
-
- BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
- BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
- BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
- BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
-
- ASSERT(!vpmu->flags && !vpmu->context);
-
- if ( !is_priv_vpmu )
- {
- /*
- * Count active VPMUs so that we won't try to change vpmu_mode while
- * they are in use.
- * vpmu_mode can be safely updated while dom0's VPMUs are active and
- * so we don't need to include it in the count.
- */
- spin_lock(&vpmu_lock);
- vpmu_count++;
- spin_unlock(&vpmu_lock);
- }
-
- switch ( vendor )
- {
- case X86_VENDOR_AMD:
- ret = svm_vpmu_initialise(v);
- break;
-
- case X86_VENDOR_INTEL:
- ret = vmx_vpmu_initialise(v);
- break;
-
- default:
- if ( vpmu_mode != XENPMU_MODE_OFF )
- {
- printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
- "Disabling VPMU\n", vendor);
- opt_vpmu_enabled = 0;
- vpmu_mode = XENPMU_MODE_OFF;
- }
- return; /* Don't bother restoring vpmu_count, VPMU is off forever */
- }
-
- if ( ret )
- printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
-
- /* Intel needs to initialize VPMU ops even if VPMU is not in use */
- if ( !is_priv_vpmu &&
- (ret || (vpmu_mode == XENPMU_MODE_OFF) ||
- (vpmu_mode == XENPMU_MODE_ALL)) )
- {
- spin_lock(&vpmu_lock);
- vpmu_count--;
- spin_unlock(&vpmu_lock);
- }
-}
-
-static void vpmu_clear_last(void *arg)
-{
- if ( this_cpu(last_vcpu) == arg )
- this_cpu(last_vcpu) = NULL;
-}
-
-void vpmu_destroy(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
- return;
-
- /*
- * Need to clear last_vcpu in case it points to v.
- * We can check here non-atomically whether it is 'v' since
- * last_vcpu can never become 'v' again at this point.
- * We will test it again in vpmu_clear_last() with interrupts
- * disabled to make sure we don't clear someone else.
- */
- if ( per_cpu(last_vcpu, vpmu->last_pcpu) == v )
- on_selected_cpus(cpumask_of(vpmu->last_pcpu),
- vpmu_clear_last, v, 1);
-
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
- {
- /* Unload VPMU first. This will stop counters */
- on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
- vpmu_save_force, v, 1);
- vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
- }
-
- spin_lock(&vpmu_lock);
- if ( !is_hardware_domain(v->domain) )
- vpmu_count--;
- spin_unlock(&vpmu_lock);
-}
-
-static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
-{
- struct vcpu *v;
- struct vpmu_struct *vpmu;
- struct page_info *page;
- uint64_t gfn = params->val;
-
- if ( (vpmu_mode == XENPMU_MODE_OFF) ||
- ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
- return -EINVAL;
-
- if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
- return -EINVAL;
-
- page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
- if ( !page )
- return -EINVAL;
-
- if ( !get_page_type(page, PGT_writable_page) )
- {
- put_page(page);
- return -EINVAL;
- }
-
- v = d->vcpu[params->vcpu];
- vpmu = vcpu_vpmu(v);
-
- spin_lock(&vpmu->vpmu_lock);
-
- if ( v->arch.vpmu.xenpmu_data )
- {
- spin_unlock(&vpmu->vpmu_lock);
- put_page_and_type(page);
- return -EEXIST;
- }
-
- v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
- if ( !v->arch.vpmu.xenpmu_data )
- {
- spin_unlock(&vpmu->vpmu_lock);
- put_page_and_type(page);
- return -ENOMEM;
- }
-
- vpmu_initialise(v);
-
- spin_unlock(&vpmu->vpmu_lock);
-
- return 0;
-}
-
-static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
-{
- struct vcpu *v;
- struct vpmu_struct *vpmu;
- uint64_t mfn;
- void *xenpmu_data;
-
- if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
- return;
-
- v = d->vcpu[params->vcpu];
- if ( v != current )
- vcpu_pause(v);
-
- vpmu = vcpu_vpmu(v);
- spin_lock(&vpmu->vpmu_lock);
-
- vpmu_destroy(v);
- xenpmu_data = vpmu->xenpmu_data;
- vpmu->xenpmu_data = NULL;
-
- spin_unlock(&vpmu->vpmu_lock);
-
- if ( xenpmu_data )
- {
- mfn = domain_page_map_to_mfn(xenpmu_data);
- ASSERT(mfn_valid(mfn));
- unmap_domain_page_global(xenpmu_data);
- put_page_and_type(mfn_to_page(mfn));
- }
-
- if ( v != current )
- vcpu_unpause(v);
-}
-
-/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
-void vpmu_dump(struct vcpu *v)
-{
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
-
- if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
- vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
-}
-
-long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
-{
- int ret;
- struct vcpu *curr;
- struct xen_pmu_params pmu_params = {.val = 0};
- struct xen_pmu_data *xenpmu_data;
- struct vpmu_struct *vpmu;
-
- if ( !opt_vpmu_enabled )
- return -EOPNOTSUPP;
-
- ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
- if ( ret )
- return ret;
-
- /* Check major version when parameters are specified */
- switch ( op )
- {
- case XENPMU_mode_set:
- case XENPMU_feature_set:
- case XENPMU_init:
- case XENPMU_finish:
- if ( copy_from_guest(&pmu_params, arg, 1) )
- return -EFAULT;
-
- if ( pmu_params.version.maj != XENPMU_VER_MAJ )
- return -EINVAL;
- }
-
- switch ( op )
- {
- case XENPMU_mode_set:
- {
- if ( (pmu_params.val &
- ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
- (hweight64(pmu_params.val) > 1) )
- return -EINVAL;
-
- /* 32-bit dom0 can only sample itself. */
- if ( is_pv_32bit_vcpu(current) &&
- (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
- return -EINVAL;
-
- spin_lock(&vpmu_lock);
-
- /*
- * We can always safely switch between XENPMU_MODE_SELF and
- * XENPMU_MODE_HV while other VPMUs are active.
- */
- if ( (vpmu_count == 0) ||
- ((vpmu_mode ^ pmu_params.val) ==
- (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
- vpmu_mode = pmu_params.val;
- else if ( vpmu_mode != pmu_params.val )
- {
- printk(XENLOG_WARNING
- "VPMU: Cannot change mode while active VPMUs exist\n");
- ret = -EBUSY;
- }
-
- spin_unlock(&vpmu_lock);
-
- break;
- }
-
- case XENPMU_mode_get:
- memset(&pmu_params, 0, sizeof(pmu_params));
- pmu_params.val = vpmu_mode;
-
- pmu_params.version.maj = XENPMU_VER_MAJ;
- pmu_params.version.min = XENPMU_VER_MIN;
-
- if ( copy_to_guest(arg, &pmu_params, 1) )
- ret = -EFAULT;
-
- break;
-
- case XENPMU_feature_set:
- if ( pmu_params.val & ~XENPMU_FEATURE_INTEL_BTS )
- return -EINVAL;
-
- spin_lock(&vpmu_lock);
-
- if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
- vpmu_features = pmu_params.val;
- else
- {
- printk(XENLOG_WARNING "VPMU: Cannot change features while"
- " active VPMUs exist\n");
- ret = -EBUSY;
- }
-
- spin_unlock(&vpmu_lock);
-
- break;
-
- case XENPMU_feature_get:
- pmu_params.val = vpmu_features;
- if ( copy_field_to_guest(arg, &pmu_params, val) )
- ret = -EFAULT;
-
- break;
-
- case XENPMU_init:
- ret = pvpmu_init(current->domain, &pmu_params);
- break;
-
- case XENPMU_finish:
- pvpmu_finish(current->domain, &pmu_params);
- break;
-
- case XENPMU_lvtpc_set:
- xenpmu_data = current->arch.vpmu.xenpmu_data;
- if ( xenpmu_data != NULL )
- vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
- else
- ret = -EINVAL;
- break;
-
- case XENPMU_flush:
- curr = current;
- vpmu = vcpu_vpmu(curr);
- xenpmu_data = curr->arch.vpmu.xenpmu_data;
- if ( xenpmu_data == NULL )
- return -EINVAL;
- xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
- vpmu_reset(vpmu, VPMU_CACHED);
- vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
- if ( vpmu_load(curr, 1) )
- {
- xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
- vpmu_set(vpmu, VPMU_CACHED);
- ret = -EIO;
- }
- break ;
-
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-static int __init vpmu_init(void)
-{
- int vendor = current_cpu_data.x86_vendor;
-
- if ( !opt_vpmu_enabled )
- {
- printk(XENLOG_INFO "VPMU: disabled\n");
- return 0;
- }
-
- /* NMI watchdog uses LVTPC and HW counter */
- if ( opt_watchdog && opt_vpmu_enabled )
- {
- printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
- opt_vpmu_enabled = 0;
- vpmu_mode = XENPMU_MODE_OFF;
- return 0;
- }
-
- switch ( vendor )
- {
- case X86_VENDOR_AMD:
- if ( amd_vpmu_init() )
- vpmu_mode = XENPMU_MODE_OFF;
- break;
- case X86_VENDOR_INTEL:
- if ( core2_vpmu_init() )
- vpmu_mode = XENPMU_MODE_OFF;
- break;
- default:
- printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
- "Turning VPMU off.\n", vendor);
- vpmu_mode = XENPMU_MODE_OFF;
- break;
- }
-
- if ( vpmu_mode != XENPMU_MODE_OFF )
- printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
- __stringify(XENPMU_VER_MIN) "\n");
- else
- opt_vpmu_enabled = 0;
-
- return 0;
-}
-__initcall(vpmu_init);
#include <asm/processor.h>
#include <asm/regs.h>
#include <asm/current.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
#include "op_x86_model.h"
#include "op_counter.h"
#include <asm/apic.h>
#include <asm/mc146818rtc.h>
#include <asm/hpet.h>
-#include <asm/hvm/vpmu.h>
+#include <asm/vpmu.h>
#include <public/arch-x86/cpuid.h>
#include <xsm/xsm.h>
#ifndef __ASM_X86_HVM_VMX_VMCS_H__
#define __ASM_X86_HVM_VMX_VMCS_H__
+#include <asm/vpmu.h>
#include <asm/hvm/io.h>
-#include <asm/hvm/vpmu.h>
#include <irq_vectors.h>
extern void vmcs_dump_vcpu(struct vcpu *v);
+++ /dev/null
-/*
- * vpmu.h: PMU virtualization for HVM domain.
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Author: Haitao Shan <haitao.shan@intel.com>
- */
-
-#ifndef __ASM_X86_HVM_VPMU_H_
-#define __ASM_X86_HVM_VPMU_H_
-
-#include <public/pmu.h>
-
-#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu)
-#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu)
-
-#define MSR_TYPE_COUNTER 0
-#define MSR_TYPE_CTRL 1
-#define MSR_TYPE_GLOBAL 2
-#define MSR_TYPE_ARCH_COUNTER 3
-#define MSR_TYPE_ARCH_CTRL 4
-
-/* Start of PMU register bank */
-#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \
- (uintptr_t)ctxt->offset))
-
-/* Arch specific operations shared by all vpmus */
-struct arch_vpmu_ops {
- int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
- uint64_t supported);
- int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
- int (*do_interrupt)(struct cpu_user_regs *regs);
- void (*do_cpuid)(unsigned int input,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx);
- void (*arch_vpmu_destroy)(struct vcpu *v);
- int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
- int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
- void (*arch_vpmu_dump)(const struct vcpu *);
-};
-
-int core2_vpmu_init(void);
-int vmx_vpmu_initialise(struct vcpu *);
-int amd_vpmu_init(void);
-int svm_vpmu_initialise(struct vcpu *);
-
-struct vpmu_struct {
- u32 flags;
- u32 last_pcpu;
- u32 hw_lapic_lvtpc;
- void *context; /* May be shared with PV guest */
- void *priv_context; /* hypervisor-only */
- struct arch_vpmu_ops *arch_vpmu_ops;
- struct xen_pmu_data *xenpmu_data;
- spinlock_t vpmu_lock;
-};
-
-/* VPMU states */
-#define VPMU_CONTEXT_ALLOCATED 0x1
-#define VPMU_CONTEXT_LOADED 0x2
-#define VPMU_RUNNING 0x4
-#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */
-#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */
-#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
-/* PV(H) guests: VPMU registers are accessed by guest from shared page */
-#define VPMU_CACHED 0x40
-
-static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
-{
- vpmu->flags |= mask;
-}
-static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask)
-{
- vpmu->flags &= ~mask;
-}
-static inline void vpmu_clear(struct vpmu_struct *vpmu)
-{
- vpmu->flags = 0;
-}
-static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask)
-{
- return !!(vpmu->flags & mask);
-}
-static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
- const u32 mask)
-{
- return !!((vpmu->flags & mask) == mask);
-}
-
-void vpmu_lvtpc_update(uint32_t val);
-int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
- uint64_t supported, bool_t is_write);
-void vpmu_do_interrupt(struct cpu_user_regs *regs);
-void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx);
-void vpmu_initialise(struct vcpu *v);
-void vpmu_destroy(struct vcpu *v);
-void vpmu_save(struct vcpu *v);
-int vpmu_load(struct vcpu *v, bool_t from_guest);
-void vpmu_dump(struct vcpu *v);
-
-static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
- uint64_t supported)
-{
- return vpmu_do_msr(msr, &msr_content, supported, 1);
-}
-static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
- return vpmu_do_msr(msr, msr_content, 0, 0);
-}
-
-extern int acquire_pmu_ownership(int pmu_ownership);
-extern void release_pmu_ownership(int pmu_ownership);
-
-extern unsigned int vpmu_mode;
-extern unsigned int vpmu_features;
-
-/* Context switch */
-static inline void vpmu_switch_from(struct vcpu *prev)
-{
- if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
- vpmu_save(prev);
-}
-
-static inline void vpmu_switch_to(struct vcpu *next)
-{
- if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
- vpmu_load(next, 0);
-}
-
-#endif /* __ASM_X86_HVM_VPMU_H_*/
-
--- /dev/null
+/*
+ * vpmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#include <public/pmu.h>
+
+#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu)
+#define vpmu_vcpu(vpmu) container_of((vpmu), struct vcpu, arch.vpmu)
+
+#define MSR_TYPE_COUNTER 0
+#define MSR_TYPE_CTRL 1
+#define MSR_TYPE_GLOBAL 2
+#define MSR_TYPE_ARCH_COUNTER 3
+#define MSR_TYPE_ARCH_CTRL 4
+
+/* Start of PMU register bank */
+#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \
+ (uintptr_t)ctxt->offset))
+
+/* Arch specific operations shared by all vpmus */
+struct arch_vpmu_ops {
+ int (*do_wrmsr)(unsigned int msr, uint64_t msr_content,
+ uint64_t supported);
+ int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
+ int (*do_interrupt)(struct cpu_user_regs *regs);
+ void (*do_cpuid)(unsigned int input,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx);
+ void (*arch_vpmu_destroy)(struct vcpu *v);
+ int (*arch_vpmu_save)(struct vcpu *v, bool_t to_guest);
+ int (*arch_vpmu_load)(struct vcpu *v, bool_t from_guest);
+ void (*arch_vpmu_dump)(const struct vcpu *);
+};
+
+int core2_vpmu_init(void);
+int vmx_vpmu_initialise(struct vcpu *);
+int amd_vpmu_init(void);
+int svm_vpmu_initialise(struct vcpu *);
+
+struct vpmu_struct {
+ u32 flags;
+ u32 last_pcpu;
+ u32 hw_lapic_lvtpc;
+ void *context; /* May be shared with PV guest */
+ void *priv_context; /* hypervisor-only */
+ struct arch_vpmu_ops *arch_vpmu_ops;
+ struct xen_pmu_data *xenpmu_data;
+ spinlock_t vpmu_lock;
+};
+
+/* VPMU states */
+#define VPMU_CONTEXT_ALLOCATED 0x1
+#define VPMU_CONTEXT_LOADED 0x2
+#define VPMU_RUNNING 0x4
+#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */
+#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */
+#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
+/* PV(H) guests: VPMU registers are accessed by guest from shared page */
+#define VPMU_CACHED 0x40
+
+static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
+{
+ vpmu->flags |= mask;
+}
+static inline void vpmu_reset(struct vpmu_struct *vpmu, const u32 mask)
+{
+ vpmu->flags &= ~mask;
+}
+static inline void vpmu_clear(struct vpmu_struct *vpmu)
+{
+ vpmu->flags = 0;
+}
+static inline bool_t vpmu_is_set(const struct vpmu_struct *vpmu, const u32 mask)
+{
+ return !!(vpmu->flags & mask);
+}
+static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
+ const u32 mask)
+{
+ return !!((vpmu->flags & mask) == mask);
+}
+
+void vpmu_lvtpc_update(uint32_t val);
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+ uint64_t supported, bool_t is_write);
+void vpmu_do_interrupt(struct cpu_user_regs *regs);
+void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx);
+void vpmu_initialise(struct vcpu *v);
+void vpmu_destroy(struct vcpu *v);
+void vpmu_save(struct vcpu *v);
+int vpmu_load(struct vcpu *v, bool_t from_guest);
+void vpmu_dump(struct vcpu *v);
+
+static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+ uint64_t supported)
+{
+ return vpmu_do_msr(msr, &msr_content, supported, 1);
+}
+static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+ return vpmu_do_msr(msr, msr_content, 0, 0);
+}
+
+extern int acquire_pmu_ownership(int pmu_ownership);
+extern void release_pmu_ownership(int pmu_ownership);
+
+extern unsigned int vpmu_mode;
+extern unsigned int vpmu_features;
+
+/* Context switch */
+static inline void vpmu_switch_from(struct vcpu *prev)
+{
+ if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+ vpmu_save(prev);
+}
+
+static inline void vpmu_switch_to(struct vcpu *next)
+{
+ if ( vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV) )
+ vpmu_load(next, 0);
+}
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+