From: Keir Fraser Date: Mon, 22 Dec 2008 08:12:33 +0000 (+0000) Subject: Enable CMCI for Intel CPUs X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14026^2~3 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=8c9a4d8e4c8688f0306eb043e883ed5a589cbbe3;p=xen.git Enable CMCI for Intel CPUs Signed-off-by Yunhong Jiang Signed-off-by Liping Ke --- diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c index 55a25bce39..7376c66e7c 100644 --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -99,8 +99,11 @@ void __init apic_intr_init(void) /* Performance Counters Interrupt */ set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt); - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL + /* CMCI Correctable Machine Check Interrupt */ + set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt); + + /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/ +#ifdef CONFIG_X86_MCE_THERMAL set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif } @@ -172,12 +175,17 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_MCE_THERMAL if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); } #endif + + if (maxlvt >= 6) { + v = apic_read(APIC_CMCI); + apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED); + } /* * Clean APIC state for other OSs: */ @@ -189,10 +197,13 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL +#ifdef CONFIG_X86_MCE_THERMAL if (maxlvt >= 5) apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); #endif + if (maxlvt >= 6) + apic_write_around(APIC_CMCI, APIC_LVT_MASKED); + v = GET_APIC_VERSION(apic_read(APIC_LVR)); if (APIC_INTEGRATED(v)) { /* !82489DX */ if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ @@ -597,6 +608,7 @@ static struct { unsigned int apic_spiv; unsigned int apic_lvtt; unsigned int apic_lvtpc; + unsigned int apic_lvtcmci; unsigned int apic_lvt0; unsigned int apic_lvt1; unsigned int apic_lvterr; @@ -608,7 +620,7 @@ static struct { int lapic_suspend(void) { unsigned long flags; - + int maxlvt = get_maxlvt(); if (!apic_pm_state.active) return 0; @@ -620,6 +632,11 @@ int lapic_suspend(void) apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + + if (maxlvt >= 6) { + apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI); + } + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); @@ -637,6 +654,7 @@ int lapic_resume(void) { unsigned int l, h; unsigned long flags; + int maxlvt = get_maxlvt(); if (!apic_pm_state.active) return 0; @@ -669,6 +687,11 @@ int lapic_resume(void) apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); + + if (maxlvt >= 6) { + apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci); + } + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); diff --git a/xen/arch/x86/cpu/mcheck/Makefile b/xen/arch/x86/cpu/mcheck/Makefile index 3ecc791402..15fed6eb0b 100644 --- a/xen/arch/x86/cpu/mcheck/Makefile +++ b/xen/arch/x86/cpu/mcheck/Makefile @@ -3,8 +3,7 @@ obj-y += k7.o obj-y += amd_k8.o obj-y += amd_f10.o obj-y += mce.o +obj-y += mce_intel.o obj-y += non-fatal.o -obj-y += p4.o obj-$(x86_32) += p5.o -obj-$(x86_32) += p6.o obj-$(x86_32) += winchip.o diff --git a/xen/arch/x86/cpu/mcheck/k7.c b/xen/arch/x86/cpu/mcheck/k7.c index 045dd433a2..aedd0a0e1f 100644 --- a/xen/arch/x86/cpu/mcheck/k7.c +++ b/xen/arch/x86/cpu/mcheck/k7.c @@ -14,6 +14,7 @@ #include #include "mce.h" +#include "x86_mca.h" /* Machine Check Handler For AMD Athlon/Duron */ static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code) diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c index d4c6f05751..2331f5d670 100644 --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ * to physical cpus present in the machine. * The more physical cpus are available, the more entries you need. */ -#define MAX_MCINFO 10 +#define MAX_MCINFO 20 struct mc_machine_notify { struct mc_info mc; @@ -110,6 +110,22 @@ static void amd_mcheck_init(struct cpuinfo_x86 *ci) } } +/*check the existence of Machine Check*/ +int mce_available(struct cpuinfo_x86 *c) +{ + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); +} + +/*Make sure there are no machine check on offlined or suspended CPUs*/ +void mce_disable_cpu(void) +{ + if (!mce_available(¤t_cpu_data) || mce_disabled == 1) + return; + printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id()); + clear_in_cr4(X86_CR4_MCE); +} + + /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) { @@ -135,11 +151,13 @@ void mcheck_init(struct cpuinfo_x86 *c) #ifndef CONFIG_X86_64 if (c->x86==5) intel_p5_mcheck_init(c); - if (c->x86==6) - intel_p6_mcheck_init(c); #endif - if (c->x86==15) - intel_p4_mcheck_init(c); + /*If it is P6 or P4 family, including CORE 2 DUO series*/ + if (c->x86 == 6 || c->x86==15) + { + printk(KERN_DEBUG "MCE: Intel newly family MC Init\n"); + intel_mcheck_init(c); + } break; #ifndef CONFIG_X86_64 @@ -413,7 +431,7 @@ void x86_mcinfo_dump(struct mc_info *mi) if (mic == NULL) return; if (mic->type != MC_TYPE_BANK) - continue; + goto next; mc_bank = (struct mcinfo_bank *)mic; @@ -426,6 +444,7 @@ void x86_mcinfo_dump(struct mc_info *mi) printk(" at %16"PRIx64, mc_bank->mc_addr); printk("\n"); +next: mic = x86_mcinfo_next(mic); /* next entry */ if ((mic == NULL) || (mic->size == 0)) break; diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h index 8a48ef5fd1..d588a6dc05 100644 --- a/xen/arch/x86/cpu/mcheck/mce.h +++ b/xen/arch/x86/cpu/mcheck/mce.h @@ -1,14 +1,22 @@ #include +#include #include +#include +#include + /* Init functions */ void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); void amd_k7_mcheck_init(struct cpuinfo_x86 *c); void amd_k8_mcheck_init(struct cpuinfo_x86 *c); void amd_f10_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); + + +void intel_mcheck_timer(struct cpuinfo_x86 *c); void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void intel_mcheck_init(struct cpuinfo_x86 *c); +void mce_intel_feature_init(struct cpuinfo_x86 *c); + void winchip_mcheck_init(struct cpuinfo_x86 *c); /* Function pointer used in the handlers to collect additional information @@ -19,6 +27,7 @@ extern int (*mc_callback_bank_extended)(struct mc_info *mi, uint16_t bank, uint64_t status); +int mce_available(struct cpuinfo_x86 *c); /* Helper functions used for collecting error telemetry */ struct mc_info *x86_mcinfo_getptr(void); void x86_mcinfo_clear(struct mc_info *mi); @@ -26,6 +35,3 @@ int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); void x86_mcinfo_dump(struct mc_info *mi); void mc_panic(char *s); -/* Global variables */ -extern int mce_disabled; -extern unsigned int nr_mce_banks; diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c new file mode 100644 index 0000000000..c053f6b0ee --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -0,0 +1,681 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mce.h" +#include "x86_mca.h" + +DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); + +static int nr_intel_ext_msrs = 0; +static int cmci_support = 0; +extern int firstbank; + +#ifdef CONFIG_X86_MCE_THERMAL +static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) +{ + printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct cpu_user_regs *regs) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + static s_time_t next[NR_CPUS]; + + ack_APIC_irq(); + if (NOW() < next[cpu]) + return; + + next[cpu] = NOW() + MILLISECS(5000); + rdmsr(MSR_IA32_THERM_STATUS, l, h); + if (l & 0x1) { + printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); + printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", + cpu); + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + } +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) + = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) +{ + irq_enter(); + vendor_thermal_interrupt(regs); + irq_exit(); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + int tm2 = 0; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu); + return; /* -EBUSY */ + } + + if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + tm2 = 1; + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", + cpu, tm2 ? "TM2" : "TM1"); + return; +} +#endif /* CONFIG_X86_MCE_THERMAL */ + +static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext) +{ + if (nr_intel_ext_msrs == 0) + return; + + /*this function will called when CAP(9).MCG_EXT_P = 1*/ + memset(mc_ext, 0, sizeof(struct mcinfo_extended)); + mc_ext->common.type = MC_TYPE_EXTENDED; + mc_ext->common.size = sizeof(mc_ext); + mc_ext->mc_msrs = 10; + + mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX; + rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value); + mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX; + rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value); + mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX; + rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value); + + mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX; + rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value); + mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI; + rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value); + mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI; + rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value); + + mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP; + rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value); + mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP; + rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value); + mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS; + rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value); + mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP; + rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value); +} + +/* machine_check_poll might be called by following types: + * 1. called when do mcheck_init. + * 2. called in cmci interrupt handler + * 3. called in polling handler + * It will generate a new mc_info item if found CE/UC errors. DOM0 is the + * consumer. +*/ +static int machine_check_poll(struct mc_info *mi, int calltype) +{ + int exceptions = (read_cr4() & X86_CR4_MCE); + int i, nr_unit = 0, uc = 0, pcc = 0; + uint64_t status, addr; + struct mcinfo_global mcg; + struct mcinfo_extended mce; + unsigned int cpu; + struct domain *d; + + cpu = smp_processor_id(); + + if (!mi) { + printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n"); + return 0; + } + x86_mcinfo_clear(mi); + + memset(&mcg, 0, sizeof(mcg)); + mcg.common.type = MC_TYPE_GLOBAL; + mcg.common.size = sizeof(mcg); + /*If called from cpu-reset check, don't need to fill them. + *If called from cmci context, we'll try to fill domid by memory addr + */ + mcg.mc_domid = -1; + mcg.mc_vcpuid = -1; + if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET) + mcg.mc_flags = MC_FLAG_POLLED; + else if (calltype == MC_FLAG_CMCI) + mcg.mc_flags = MC_FLAG_CMCI; + mcg.mc_socketid = phys_proc_id[cpu]; + mcg.mc_coreid = cpu_core_id[cpu]; + mcg.mc_apicid = cpu_physical_id(cpu); + mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); + rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); + + for ( i = 0; i < nr_mce_banks; i++ ) { + struct mcinfo_bank mcb; + /*For CMCI, only owners checks the owned MSRs*/ + if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) && + (calltype & MC_FLAG_CMCI) ) + continue; + rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status); + + if (! (status & MCi_STATUS_VAL) ) + continue; + /* + * Uncorrected events are handled by the exception + * handler when it is enabled. But when the exception + * is disabled such as when mcheck_init, log everything. + */ + if ((status & MCi_STATUS_UC) && exceptions) + continue; + + if (status & MCi_STATUS_UC) + uc = 1; + if (status & MCi_STATUS_PCC) + pcc = 1; + + memset(&mcb, 0, sizeof(mcb)); + mcb.common.type = MC_TYPE_BANK; + mcb.common.size = sizeof(mcb); + mcb.mc_bank = i; + mcb.mc_status = status; + if (status & MCi_STATUS_MISCV) + rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc); + if (status & MCi_STATUS_ADDRV) { + rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr); + d = maddr_get_owner(addr); + if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) ) + mcb.mc_domid = d->domain_id; + } + if (cmci_support) + rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2); + if (calltype == MC_FLAG_CMCI) + rdtscll(mcb.mc_tsc); + x86_mcinfo_add(mi, &mcb); + nr_unit++; + add_taint(TAINT_MACHINE_CHECK); + /*Clear state for this bank */ + wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0); + printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%lx]\n", + i, cpu, status); + printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], " + "thread[%d]\n", cpu, mcg.mc_socketid, + mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid); + + } + /*if pcc = 1, uc must be 1*/ + if (pcc) + mcg.mc_flags |= MC_FLAG_UNCORRECTABLE; + else if (uc) + mcg.mc_flags |= MC_FLAG_RECOVERABLE; + else /*correctable*/ + mcg.mc_flags |= MC_FLAG_CORRECTABLE; + + if (nr_unit && nr_intel_ext_msrs && + (mcg.mc_gstatus & MCG_STATUS_EIPV)) { + intel_get_extended_msrs(&mce); + x86_mcinfo_add(mi, &mce); + } + if (nr_unit) + x86_mcinfo_add(mi, &mcg); + /*Clear global state*/ + return nr_unit; +} + +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) +{ + /* MACHINE CHECK Error handler will be sent in another patch, + * simply copy old solutions here. This code will be replaced + * by upcoming machine check patches + */ + + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; ivcpu[0], VIRQ_MCA)) + send_guest_global_virq(dom0, VIRQ_MCA); + } + irq_exit(); +} + +void mce_intel_feature_init(struct cpuinfo_x86 *c) +{ + +#ifdef CONFIG_X86_MCE_THERMAL + intel_init_thermal(c); +#endif + intel_init_cmci(c); +} + +static void mce_cap_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + rdmsr (MSR_IA32_MCG_CAP, l, h); + if ((l & MCG_CMCI_P) && cpu_has_apic) + cmci_support = 1; + + nr_mce_banks = l & 0xff; + if (nr_mce_banks > MAX_NR_BANKS) + printk(KERN_WARNING "MCE: exceed max mce banks\n"); + if (l & MCG_EXT_P) + { + nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff; + printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n", + smp_processor_id(), nr_intel_ext_msrs); + } + /* for most of p6 family, bank 0 is an alias bios MSR. + * But after model>1a, bank 0 is available*/ + if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL + && c->x86_model < 0x1A) + firstbank = 1; + else + firstbank = 0; +} + +static void mce_init(void) +{ + u32 l, h; + int i, nr_unit; + struct mc_info *mi = x86_mcinfo_getptr(); + clear_in_cr4(X86_CR4_MCE); + /* log the machine checks left over from the previous reset. + * This also clears all registers*/ + + nr_unit = machine_check_poll(mi, MC_FLAG_RESET); + /*in the boot up stage, not expect inject to DOM0, but go print out + */ + if (nr_unit > 0) + x86_mcinfo_dump(mi); + + set_in_cr4(X86_CR4_MCE); + rdmsr (MSR_IA32_MCG_CAP, l, h); + if (l & MCG_CTL_P) /* Control register present ? */ + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + for (i = firstbank; i < nr_mce_banks; i++) + { + /*Some banks are shared across cores, use MCi_CTRL to judge whether + * this bank has been initialized by other cores already.*/ + rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h); + if (!l & !h) + { + /*if ctl is 0, this bank is never initialized*/ + printk(KERN_DEBUG "mce_init: init bank%d\n", i); + wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff); + wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0); + } + } + if (firstbank) /*if cmci enabled, firstbank = 0*/ + wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); +} + +/*p4/p6 faimily has similar MCA initialization process*/ +void intel_mcheck_init(struct cpuinfo_x86 *c) +{ + + mce_cap_init(c); + printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); + /* machine check is available */ + machine_check_vector = intel_machine_check; + mce_init(); + mce_intel_feature_init(c); + mce_set_owner(); +} + +/* + * Periodic polling timer for "silent" machine check errors. If the + * poller finds an MCE, poll faster. When the poller finds no more + * errors, poll slower +*/ +static struct timer mce_timer; + +#define MCE_PERIOD 4000 +#define MCE_MIN 2000 +#define MCE_MAX 32000 + +static u64 period = MCE_PERIOD; +static int adjust = 0; + +static void mce_intel_checkregs(void *info) +{ + int nr_unit; + struct mc_info *mi = x86_mcinfo_getptr(); + + if( !mce_available(¤t_cpu_data)) + return; + nr_unit = machine_check_poll(mi, MC_FLAG_POLLED); + if (nr_unit) + { + x86_mcinfo_dump(mi); + adjust++; + if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) + send_guest_global_virq(dom0, VIRQ_MCA); + } +} + +static void mce_intel_work_fn(void *data) +{ + on_each_cpu(mce_intel_checkregs, data, 1, 1); + if (adjust) { + period = period / (adjust + 1); + printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval to %ld", + period); + } + else { + period *= 2; + } + if (period > MCE_MAX) + period = MCE_MAX; + if (period < MCE_MIN) + period = MCE_MIN; + set_timer(&mce_timer, NOW() + MILLISECS(period)); + adjust = 0; +} + +void intel_mcheck_timer(struct cpuinfo_x86 *c) +{ + printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n"); + init_timer(&mce_timer, mce_intel_work_fn, NULL, 0); + set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD)); +} + diff --git a/xen/arch/x86/cpu/mcheck/non-fatal.c b/xen/arch/x86/cpu/mcheck/non-fatal.c index 4984eed757..35982a461b 100644 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c @@ -19,8 +19,8 @@ #include #include "mce.h" - -static int firstbank; +#include "x86_mca.h" +int firstbank = 0; static struct timer mce_timer; #define MCE_PERIOD MILLISECS(15000) @@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_checker(void) struct cpuinfo_x86 *c = &boot_cpu_data; /* Check for MCE support */ - if (!cpu_has(c, X86_FEATURE_MCE)) - return -ENODEV; - - /* Check for PPro style MCA */ - if (!cpu_has(c, X86_FEATURE_MCA)) + if (!mce_available(c)) return -ENODEV; - /* * Check for non-fatal errors every MCE_RATE s */ @@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_checker(void) break; case X86_VENDOR_INTEL: - init_timer(&mce_timer, mce_work_fn, NULL, 0); - set_timer(&mce_timer, NOW() + MCE_PERIOD); + /* p5 family is different. P4/P6 and latest CPUs shares the + * same polling methods + */ + if ( c->x86 != 5 ) + { + /* some CPUs or banks don't support cmci, we need to + * enable this feature anyway + */ + intel_mcheck_timer(c); + } break; } - printk(KERN_INFO "MCA: Machine check polling timer started.\n"); + printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n"); return 0; } __initcall(init_nonfatal_mce_checker); diff --git a/xen/arch/x86/cpu/mcheck/p4.c b/xen/arch/x86/cpu/mcheck/p4.c deleted file mode 100644 index bb758e8e37..0000000000 --- a/xen/arch/x86/cpu/mcheck/p4.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * P4 specific Machine Check Exception Reporting - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mce.h" - -/* as supported by the P4/Xeon family */ -struct intel_mce_extended_msrs { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 esp; - u32 eflags; - u32 eip; - /* u32 *reserved[]; */ -}; - -static int mce_num_extended_msrs = 0; - - -#ifdef CONFIG_X86_MCE_P4THERMAL -static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler */ -static void intel_thermal_interrupt(struct cpu_user_regs *regs) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - static s_time_t next[NR_CPUS]; - - ack_APIC_irq(); - - if (NOW() < next[cpu]) - return; - - next[cpu] = NOW() + MILLISECS(5000); - rdmsr(MSR_IA32_THERM_STATUS, l, h); - if (l & 0x1) { - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", - cpu); - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); - } -} - -/* Thermal interrupt handler for this CPU setup */ -static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = unexpected_thermal_interrupt; - -fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) -{ - irq_enter(); - vendor_thermal_interrupt(regs); - irq_exit(); -} - -/* P4/Xeon Thermal regulation detect and init */ -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - - /* Thermal monitoring */ - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ - - /* Clock modulation */ - if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ - - /* first check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already -zwanem. - */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", - cpu); - return; /* -EBUSY */ - } - - /* check whether a vector already exists, temporarily masked? */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; /* -EBUSY */ - } - - /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ - apic_write_around(APIC_LVTTHMR, h); - - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); - - /* ok we're good to go... */ - vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); - return; -} -#endif /* CONFIG_X86_MCE_P4THERMAL */ - - -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) -{ - u32 h; - - if (mce_num_extended_msrs == 0) - goto done; - - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); - - /* can we rely on kmalloc to do a dynamic - * allocation for the reserved registers? - */ -done: - return mce_num_extended_msrs; -} - -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - struct intel_mce_extended_msrs dbg; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (intel_get_extended_msrs(&dbg)) { - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i=0; i> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" - " available\n", - smp_processor_id(), mce_num_extended_msrs); - -#ifdef CONFIG_X86_MCE_P4THERMAL - /* Check for P4/Xeon Thermal monitor */ - intel_init_thermal(c); -#endif - } -} diff --git a/xen/arch/x86/cpu/mcheck/p6.c b/xen/arch/x86/cpu/mcheck/p6.c deleted file mode 100644 index a2f631ed27..0000000000 --- a/xen/arch/x86/cpu/mcheck/p6.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * P6 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine Check Handler For PII/PIII */ -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=0; i>18)&0x3) #define SET_APIC_TIMER_BASE(x) (((x)<<18)) diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index aa9b234370..b490887a64 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -22,7 +22,7 @@ #define CONFIG_X86_IO_APIC 1 #define CONFIG_X86_PM_TIMER 1 #define CONFIG_HPET_TIMER 1 -#define CONFIG_X86_MCE_P4THERMAL 1 +#define CONFIG_X86_MCE_THERMAL 1 #define CONFIG_NUMA 1 #define CONFIG_DISCONTIGMEM 1 #define CONFIG_NUMA_EMU 1 diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h index 920ac7f85e..f0f4bfda43 100644 --- a/xen/include/asm-x86/irq.h +++ b/xen/include/asm-x86/irq.h @@ -33,6 +33,7 @@ fastcall void error_interrupt(void); fastcall void pmu_apic_interrupt(void); fastcall void spurious_interrupt(void); fastcall void thermal_interrupt(void); +fastcall void cmci_interrupt(void); void disable_8259A_irq(unsigned int irq); void enable_8259A_irq(unsigned int irq); diff --git a/xen/include/asm-x86/mach-default/irq_vectors.h b/xen/include/asm-x86/mach-default/irq_vectors.h index 057b2a35b8..30c3f89daf 100644 --- a/xen/include/asm-x86/mach-default/irq_vectors.h +++ b/xen/include/asm-x86/mach-default/irq_vectors.h @@ -10,13 +10,13 @@ #define THERMAL_APIC_VECTOR 0xfa #define LOCAL_TIMER_VECTOR 0xf9 #define PMU_APIC_VECTOR 0xf8 - +#define CMCI_APIC_VECTOR 0xf7 /* * High-priority dynamically-allocated vectors. For interrupts that * must be higher priority than any guest-bound interrupt. */ #define FIRST_HIPRIORITY_VECTOR 0xf0 -#define LAST_HIPRIORITY_VECTOR 0xf7 +#define LAST_HIPRIORITY_VECTOR 0xf6 /* Legacy PIC uses vectors 0xe0-0xef. */ #define FIRST_LEGACY_VECTOR 0xe0 diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h index d3fc446271..4f91ca152d 100644 --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -92,8 +92,10 @@ #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_MISC 0x00000403 +#define MSR_IA32_MC0_CTL2 0x00000280 +#define CMCI_EN (1UL<<30) +#define CMCI_THRESHOLD_MASK 0x7FFF -#define MSR_IA32_MC1_CTL 0x00000404 #define MSR_IA32_MC1_STATUS 0x00000405 #define MSR_IA32_MC1_ADDR 0x00000406 #define MSR_IA32_MC1_MISC 0x00000407 diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 2078d441ec..e48526ae95 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -101,7 +101,7 @@ static __inline int logical_smp_processor_id(void) #endif -extern int __cpu_disable(void); +extern int __cpu_disable(int down_cpu); extern void __cpu_die(unsigned int cpu); #endif /* !__ASSEMBLY__ */ diff --git a/xen/include/public/arch-x86/xen-mca.h b/xen/include/public/arch-x86/xen-mca.h index 103d41fd3d..60ae871947 100644 --- a/xen/include/public/arch-x86/xen-mca.h +++ b/xen/include/public/arch-x86/xen-mca.h @@ -106,7 +106,10 @@ struct mcinfo_common { #define MC_FLAG_CORRECTABLE (1 << 0) #define MC_FLAG_UNCORRECTABLE (1 << 1) - +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) /* contains global x86 mc information */ struct mcinfo_global { struct mcinfo_common common; @@ -115,6 +118,7 @@ struct mcinfo_global { uint16_t mc_domid; uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ + uint8_t mc_apicid; uint16_t mc_core_threadid; /* core thread of physical core */ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint64_t mc_gstatus; /* global status */ @@ -132,6 +136,8 @@ struct mcinfo_bank { uint64_t mc_addr; /* bank address, only valid * if addr bit is set in mc_status */ uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; }; @@ -150,7 +156,12 @@ struct mcinfo_extended { * multiple times. */ uint32_t mc_msrs; /* Number of msr with valid values. */ - struct mcinfo_msr mc_msr[5]; + /* + * Currently Intel extended MSR (32/64) including all gp registers + * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10 + * of them might be useful. So expend this array to 10. + */ + struct mcinfo_msr mc_msr[10]; }; #define MCINFO_HYPERCALLSIZE 1024 diff --git a/xen/include/xen/stop_machine.h b/xen/include/xen/stop_machine.h index 7d4506869f..750e19ad13 100644 --- a/xen/include/xen/stop_machine.h +++ b/xen/include/xen/stop_machine.h @@ -5,7 +5,7 @@ * stop_machine_run: freeze the machine on all CPUs and run this function * @fn: the function to run * @data: the data ptr for the @fn() - * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS). + * @cpus: cpus to run @fn() on. * * Description: This causes every other cpu to enter a safe point, with * each of which disables interrupts, and finally interrupts are disabled @@ -14,6 +14,6 @@ * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); +int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu); #endif /* __XEN_STOP_MACHINE_H__ */