x86: Enable Supervisor Mode Execution Protection (SMEP)
authorKeir Fraser <keir@xen.org>
Fri, 3 Jun 2011 20:39:00 +0000 (21:39 +0100)
committerKeir Fraser <keir@xen.org>
Fri, 3 Jun 2011 20:39:00 +0000 (21:39 +0100)
Intel new CPU supports SMEP (Supervisor Mode Execution
Protection). SMEP prevents software operating with CPL < 3 (supervisor
mode) from fetching instructions from any linear address with a valid
translation for which the U/S flag (bit 2) is 1 in every
paging-structure entry controlling the translation for the linear
address.

This patch enables SMEP in Xen to protect Xen hypervisor from
executing pv guest instructions, whose translation paging-structure
entries' U/S flags are all set.

Signed-off-by: Yang Wei <wei.y.yang@intel.com>
Signed-off-by: Shan Haitao <haitao.shan@intel.com>
Signed-off-by: Li Xin <xin.li@intel.com>
Signed-off-by: Keir Fraser <keir@xen.org>
xen/arch/x86/setup.c
xen/arch/x86/traps.c
xen/include/asm-x86/cpufeature.h
xen/include/asm-x86/domain.h
xen/include/asm-x86/processor.h

index a872ae98e597aa417e7bf98b022f51cb3b1a9713..7a338d6d01de41f51e00a1686562fe2895f84ddf 100644 (file)
@@ -57,6 +57,10 @@ integer_param("maxcpus", max_cpus);
 static bool_t __initdata opt_watchdog;
 boolean_param("watchdog", opt_watchdog);
 
+/* smep: Enable/disable Supervisor Mode Execution Protection (default on). */
+static bool_t __initdata disable_smep;
+invbool_param("smep", disable_smep);
+
 /* **** Linux config option: propagated to domain0. */
 /* "acpi=off":    Sisables both ACPI table parsing and interpreter. */
 /* "acpi=force":  Override the disable blacklist.                   */
@@ -1200,11 +1204,17 @@ void __init __start_xen(unsigned long mbi_p)
     arch_init_memory();
 
     identify_cpu(&boot_cpu_data);
+
     if ( cpu_has_fxsr )
         set_in_cr4(X86_CR4_OSFXSR);
     if ( cpu_has_xmm )
         set_in_cr4(X86_CR4_OSXMMEXCPT);
 
+    if ( disable_smep )
+        setup_clear_cpu_cap(X86_FEATURE_SMEP);
+    if ( cpu_has_smep )
+        set_in_cr4(X86_CR4_SMEP);
+
     local_irq_enable();
 
 #ifdef CONFIG_X86_64
index 5a5bac36a6654f99b9a641efec3618f6c3fa4fce..383fb5e72a25ca4f2f8ae828a6af56c14dfec90e 100644 (file)
@@ -1139,7 +1139,13 @@ static int handle_gdt_ldt_mapping_fault(
     (((va) >= HYPERVISOR_VIRT_START))
 #endif
 
-static int __spurious_page_fault(
+enum pf_type {
+    real_fault,
+    smep_fault,
+    spurious_fault
+};
+
+static enum pf_type __page_fault_type(
     unsigned long addr, unsigned int error_code)
 {
     unsigned long mfn, cr3 = read_cr3();
@@ -1151,7 +1157,7 @@ static int __spurious_page_fault(
 #endif
     l2_pgentry_t l2e, *l2t;
     l1_pgentry_t l1e, *l1t;
-    unsigned int required_flags, disallowed_flags;
+    unsigned int required_flags, disallowed_flags, page_user;
 
     /*
      * We do not take spurious page faults in IRQ handlers as we do not
@@ -1159,11 +1165,11 @@ static int __spurious_page_fault(
      * map_domain_page() is not IRQ-safe.
      */
     if ( in_irq() )
-        return 0;
+        return real_fault;
 
     /* Reserved bit violations are never spurious faults. */
     if ( error_code & PFEC_reserved_bit )
-        return 0;
+        return real_fault;
 
     required_flags  = _PAGE_PRESENT;
     if ( error_code & PFEC_write_access )
@@ -1175,6 +1181,8 @@ static int __spurious_page_fault(
     if ( error_code & PFEC_insn_fetch )
         disallowed_flags |= _PAGE_NX_BIT;
 
+    page_user = _PAGE_USER;
+
     mfn = cr3 >> PAGE_SHIFT;
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1184,7 +1192,8 @@ static int __spurious_page_fault(
     unmap_domain_page(l4t);
     if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
          (l4e_get_flags(l4e) & disallowed_flags) )
-        return 0;
+        return real_fault;
+    page_user &= l4e_get_flags(l4e);
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 3
@@ -1197,13 +1206,14 @@ static int __spurious_page_fault(
     unmap_domain_page(l3t);
 #if CONFIG_PAGING_LEVELS == 3
     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
+        return real_fault;
 #else
     if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
          (l3e_get_flags(l3e) & disallowed_flags) )
-        return 0;
+        return real_fault;
+    page_user &= l3e_get_flags(l3e);
     if ( l3e_get_flags(l3e) & _PAGE_PSE )
-        return 1;
+        goto leaf;
 #endif
 #endif
 
@@ -1213,9 +1223,10 @@ static int __spurious_page_fault(
     unmap_domain_page(l2t);
     if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
          (l2e_get_flags(l2e) & disallowed_flags) )
-        return 0;
+        return real_fault;
+    page_user &= l2e_get_flags(l2e);
     if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
+        goto leaf;
 
     l1t = map_domain_page(mfn);
     l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
@@ -1223,26 +1234,36 @@ static int __spurious_page_fault(
     unmap_domain_page(l1t);
     if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
          (l1e_get_flags(l1e) & disallowed_flags) )
-        return 0;
+        return real_fault;
+    page_user &= l1e_get_flags(l1e);
 
-    return 1;
+leaf:
+    /*
+     * Supervisor Mode Execution Protection (SMEP):
+     * Disallow supervisor execution from user-accessible mappings
+     */
+    if ( (read_cr4() & X86_CR4_SMEP) && page_user &&
+         ((error_code & (PFEC_insn_fetch|PFEC_user_mode)) == PFEC_insn_fetch) )
+        return smep_fault;
+
+    return spurious_fault;
 }
 
-static int spurious_page_fault(
+static enum pf_type spurious_page_fault(
     unsigned long addr, unsigned int error_code)
 {
     unsigned long flags;
-    int           is_spurious;
+    enum pf_type pf_type;
 
     /*
      * Disabling interrupts prevents TLB flushing, and hence prevents
      * page tables from becoming invalid under our feet during the walk.
      */
     local_irq_save(flags);
-    is_spurious = __spurious_page_fault(addr, error_code);
+    pf_type = __page_fault_type(addr, error_code);
     local_irq_restore(flags);
 
-    return is_spurious;
+    return pf_type;
 }
 
 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
@@ -1317,6 +1338,7 @@ asmlinkage void do_page_fault(struct cpu_user_regs *regs)
 {
     unsigned long addr, fixup;
     unsigned int error_code;
+    enum pf_type pf_type;
 
     addr = read_cr2();
 
@@ -1332,7 +1354,9 @@ asmlinkage void do_page_fault(struct cpu_user_regs *regs)
 
     if ( unlikely(!guest_mode(regs)) )
     {
-        if ( spurious_page_fault(addr, error_code) )
+        pf_type = spurious_page_fault(addr, error_code);
+        BUG_ON(pf_type == smep_fault);
+        if ( pf_type != real_fault )
             return;
 
         if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
@@ -1354,9 +1378,17 @@ asmlinkage void do_page_fault(struct cpu_user_regs *regs)
               error_code, _p(addr));
     }
 
-    if ( unlikely(current->domain->arch.suppress_spurious_page_faults
-                  && spurious_page_fault(addr, error_code)) )
-        return;
+    if ( unlikely(current->domain->arch.suppress_spurious_page_faults) )
+    {
+        pf_type = spurious_page_fault(addr, error_code);
+        if ( pf_type == smep_fault )
+        {
+            gdprintk(XENLOG_ERR, "Fatal SMEP fault\n");
+            domain_crash(current->domain);
+        }
+        if ( pf_type != real_fault )
+            return;
+    }
 
     propagate_page_fault(addr, regs->error_code);
 }
index e274664be7766868d7b89bc09624c1b132065fe4..8ef7978e4d4404a7ce1c4c3a31b6bc7d9a0822e9 100644 (file)
 #define X86_FEATURE_TBM         (6*32+21) /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     (6*32+22) /* topology extensions CPUID leafs */
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
 #define X86_FEATURE_FSGSBASE   (7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_SMEP       (7*32+ 7) /* Supervisor Mode Execution Protection */
 
 #define cpu_has(c, bit)                test_bit(bit, (c)->x86_capability)
 #define boot_cpu_has(bit)      test_bit(bit, boot_cpu_data.x86_capability)
 #define cpu_has_fsgsbase       boot_cpu_has(X86_FEATURE_FSGSBASE)
 #endif
 
+#define cpu_has_smep            boot_cpu_has(X86_FEATURE_SMEP)
+
 #define cpu_has_ffxsr           ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \
                                  && boot_cpu_has(X86_FEATURE_FFXSR))
 
index 0488e655bb2d66f6ba347b31c5c03cc53036b7b5..fe877d758895220f80b608fef457ac7c2756da79 100644 (file)
@@ -527,12 +527,14 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
 /* Convert between guest-visible and real CR4 values. */
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
-      | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))    \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)         \
-      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))              \
-      & ~X86_CR4_DE)
-#define real_cr4_to_pv_guest_cr4(c) \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
+      | (mmu_cr4_features                                   \
+         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP))      \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)          \
+      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))          \
+     & ~X86_CR4_DE)
+#define real_cr4_to_pv_guest_cr4(c)                         \
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
+             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
index ff807f725099d35194d1a7baba8d17e40604da7e..d64c1428f1405afd64a6e843e0405c07953567b0 100644 (file)
@@ -85,6 +85,7 @@
 #define X86_CR4_SMXE           0x4000  /* enable SMX */
 #define X86_CR4_FSGSBASE       0x10000 /* enable {rd,wr}{fs,gs}base */
 #define X86_CR4_OSXSAVE        0x40000 /* enable XSAVE/XRSTOR */
+#define X86_CR4_SMEP           0x100000/* enable SMEP */
 
 /*
  * Trap/fault mnemonics.