VMX: fix cr0.cd handling
authorLiu Jinsong <jinsong.liu@intel.com>
Wed, 6 Nov 2013 09:12:36 +0000 (10:12 +0100)
committerJan Beulich <jbeulich@suse.com>
Wed, 6 Nov 2013 09:12:36 +0000 (10:12 +0100)
This patch solves XSA-60 security hole:
1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
do nothing, since hardware snoop mechanism has ensured cache coherency.

2. For guest with VT-d but non-snooped, cache coherency can not be
guaranteed by h/w snoop, therefore it need emulate UC type to guest:
2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
guest memory type are all UC.
2.2). if it works w/ shadow, drop all shadows so that any new ones would
be created on demand w/ UC.

This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
small window between cache flush and TLB invalidation, resulting in possilbe
cache pollution. This patch pause vcpus so that no vcpus context involved
into the window.

This is CVE-2013-2212 / XSA-60.

Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Tim Deegan <tim@xen.org>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Jun Nakajima <jun.nakajima@intel.com>
Acked-by: Keir Fraser <keir@xen.org>
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/vmx/vmcs.c
xen/arch/x86/hvm/vmx/vmx.c
xen/common/domain.c
xen/include/asm-x86/hvm/hvm.h
xen/include/asm-x86/hvm/support.h
xen/include/xen/sched.h

index 090dfaddad2c1514c76f55e4bc56ea01bae3aad0..73d1cf0291a465ccdbdc09bbb82adaa7302359f5 100644 (file)
@@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
     return X86EMUL_UNHANDLEABLE;
 }
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( value & X86_CR0_CD )
+    {
+        /* Entering no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+        {
+            domain_pause_nosync(v->domain);
+
+            /* Flush physical caches. */
+            on_each_cpu(local_flush_cache, NULL, 1);
+            hvm_set_uc_mode(v, 1);
+
+            domain_unpause(v->domain);
+        }
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+    else if ( !(value & X86_CR0_CD) &&
+              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+    {
+        /* Exit from no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+        if ( domain_exit_uc_mode(v) )
+            hvm_set_uc_mode(v, 0);
+
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+}
+
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
@@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
         }
     }
 
-    if ( has_arch_mmios(v->domain) )
-    {
-        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
-        {
-            /* Entering no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
-
-            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
-            {
-                /* Flush physical caches. */
-                on_each_cpu(local_flush_cache, NULL, 1);
-                hvm_set_uc_mode(v, 1);
-            }
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
-                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
-        {
-            /* Exit from no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
-
-            if ( domain_exit_uc_mode(v) )
-                hvm_set_uc_mode(v, 0);
-
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-    }
+    /*
+     * When cr0.cd setting
+     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not
+     * do anything, since hardware snoop mechanism has ensured cache coherency;
+     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
+     * guaranteed by h/w so need emulate UC memory type to guest.
+     */
+    if ( ((value ^ old_value) & X86_CR0_CD) &&
+           has_arch_pdevs(v->domain) &&
+           iommu_enabled && !iommu_snoop &&
+           hvm_funcs.handle_cd )
+        hvm_funcs.handle_cd(v, value);
 
     v->arch.hvm_vcpu.guest_cr[0] = value;
     hvm_update_guest_cr(v, 0);
index 6916c6d10c57e28179887aeea495fe40285ab2d5..7a01b1ffae018696344325dd683eaf3e4410e91c 100644 (file)
@@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W);
-        if ( paging_mode_hap(d) )
+        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
             vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W);
     }
 
index 2a5b3acd73913d5a30e71863414e54b8c29d32eb..75be62eec079e76555df96c20c73da5d4d172253 100644 (file)
@@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
             __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
     }
 
+    /* For guest cr0.cd setting, do not use potentially polluted cache */
+    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+        wbinvd();
+
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 }
@@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v)
 
 static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 
 static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
     return 1;
 }
 
+static void vmx_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( !paging_mode_hap(v->domain) )
+    {
+        /*
+         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
+         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
+         * so that any new ones would be created on demand.
+         */
+        hvm_shadow_handle_cd(v, value);
+    }
+    else
+    {
+        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
+
+        if ( value & X86_CR0_CD )
+        {
+            /*
+             * For EPT, set guest IA32_PAT fields as UC so that guest
+             * memory type are all UC.
+             */
+            u64 uc_pat =
+                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
+
+            vmx_get_guest_pat(v, pat);
+            vmx_set_guest_pat(v, uc_pat);
+
+            wbinvd();               /* flush possibly polluted cache */
+            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
+            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+        }
+        else
+        {
+            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+            vmx_set_guest_pat(v, *pat);
+            hvm_asid_flush_vcpu(v); /* no need to flush cache */
+        }
+    }
+}
+
 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
 {
     vmx_vmcs_enter(v);
@@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .msr_read_intercept   = vmx_msr_read_intercept,
     .msr_write_intercept  = vmx_msr_write_intercept,
     .invlpg_intercept     = vmx_invlpg_intercept,
+    .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
index 59997793a6c6a530ee4b3c981d452699b1bd43bf..ce20323fe41b86f22d44994fe2593218d780cad2 100644 (file)
@@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
         vcpu_sleep_sync(v);
 }
 
+void domain_pause_nosync(struct domain *d)
+{
+    struct vcpu *v;
+
+    atomic_inc(&d->pause_count);
+
+    for_each_vcpu( d, v )
+        vcpu_sleep_nosync(v);
+}
+
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
index 8dd2b40d3a4e79813ce61872a65a53e8e0f084be..c9afb56cee37d0f3b7fbe48c499b7655ac0ff7a0 100644 (file)
@@ -156,6 +156,7 @@ struct hvm_function_table {
     int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
     int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
     void (*invlpg_intercept)(unsigned long vaddr);
+    void (*handle_cd)(struct vcpu *v, unsigned long value);
     void (*set_info_guest)(struct vcpu *v);
     void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
 
index 7ddc806637f1e3790ca5fd6145ce8c90414b5750..52aef1f1ce709af3570c3120a9f3e752902d5252 100644 (file)
@@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
 
 int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
+
 /* These functions all return X86EMUL return codes. */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value);
index 25bf63776a1da1dbcf65704cafa1d326adbcabb2..06287271604d2f9a5257ee2a205cba6e87baac75 100644 (file)
@@ -725,6 +725,7 @@ void vcpu_unblock(struct vcpu *v);
 void vcpu_pause(struct vcpu *v);
 void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
+void domain_pause_nosync(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
 void domain_pause_by_systemcontroller(struct domain *d);