{
__context_switch();
- if ( is_pv_domain(nextd) &&
- (is_idle_domain(prevd) ||
- is_hvm_domain(prevd) ||
- is_pv_32bit_domain(prevd) != is_pv_32bit_domain(nextd)) )
- {
- uint64_t efer = read_efer();
- if ( !(efer & EFER_SCE) )
- write_efer(efer | EFER_SCE);
- }
-
/* Re-enable interrupts before restoring state which may fault. */
local_irq_enable();
}
min = VM_EXIT_ACK_INTR_ON_EXIT;
- opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
- VM_EXIT_CLEAR_BNDCFGS;
+ opt = (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
+ VM_EXIT_LOAD_HOST_EFER | VM_EXIT_CLEAR_BNDCFGS);
min |= VM_EXIT_IA32E_MODE;
_vmx_vmexit_control = adjust_vmx_controls(
"VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch);
_vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS;
min = 0;
- opt = VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_BNDCFGS;
+ opt = (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER |
+ VM_ENTRY_LOAD_BNDCFGS);
_vmx_vmentry_control = adjust_vmx_controls(
"VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch);
v->arch.hvm_vmx.host_cr0 |= X86_CR0_TS;
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
__vmwrite(HOST_CR4, mmu_cr4_features);
+ if ( cpu_has_vmx_efer )
+ __vmwrite(HOST_EFER, read_efer());
/* Host CS:RIP. */
__vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
vmentry_ctl = vmr32(VM_ENTRY_CONTROLS),
vmexit_ctl = vmr32(VM_EXIT_CONTROLS);
cr4 = vmr(GUEST_CR4);
- efer = vmr(GUEST_EFER);
+
+ /*
+ * The guests EFER setting comes from the GUEST_EFER VMCS field whenever
+ * available, or the guest load-only MSR list on Gen1 hardware, the entry
+ * for which may be elided for performance reasons if identical to Xen's
+ * setting.
+ */
+ if ( cpu_has_vmx_efer )
+ efer = vmr(GUEST_EFER);
+ else if ( vmx_read_guest_loadonly_msr(v, MSR_EFER, &efer) )
+ efer = read_efer();
printk("*** Guest State ***\n");
printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
vmx_dump_sel(" TR", GUEST_TR_SELECTOR);
- if ( (vmexit_ctl & (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_SAVE_GUEST_EFER)) ||
- (vmentry_ctl & (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER)) )
- printk("EFER = 0x%016lx PAT = 0x%016lx\n", efer, vmr(GUEST_PAT));
+ printk("EFER(%s) = 0x%016lx PAT = 0x%016lx\n",
+ cpu_has_vmx_efer ? "VMCS" : "MSR LL", efer, vmr(GUEST_PAT));
printk("PreemptionTimer = 0x%08x SM Base = 0x%08x\n",
vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE));
printk("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n",
wrmsrl(MSR_LSTAR, v->arch.hvm_vmx.lstar);
wrmsrl(MSR_SYSCALL_MASK, v->arch.hvm_vmx.sfmask);
- if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_SCE )
- {
- HVM_DBG_LOG(DBG_LEVEL_2,
- "restore guest's EFER with value %lx",
- v->arch.hvm_vcpu.guest_efer);
- write_efer((read_efer() & ~EFER_SCE) |
- (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
- }
-
if ( cpu_has_rdtscp )
wrmsr_tsc_aux(hvm_msr_tsc_aux(v));
}
static void vmx_update_guest_efer(struct vcpu *v)
{
- unsigned long vm_entry_value;
+ unsigned long entry_ctls, guest_efer = v->arch.hvm_vcpu.guest_efer,
+ xen_efer = read_efer();
+
+ if ( paging_mode_shadow(v->domain) )
+ {
+ /*
+ * When using shadow pagetables, EFER.NX is a Xen-owned bit and is not
+ * under guest control.
+ */
+ guest_efer &= ~EFER_NX;
+ guest_efer |= xen_efer & EFER_NX;
+ }
+
+ if ( !vmx_unrestricted_guest(v) )
+ {
+ /*
+ * When Unrestricted Guest is not enabled in the VMCS, hardware does
+ * not tolerate the LME and LMA settings being different. As writes
+ * to CR0 are intercepted, it is safe to leave LME clear at this
+ * point, and fix up both LME and LMA when CR0.PG is set.
+ */
+ if ( !(guest_efer & EFER_LMA) )
+ guest_efer &= ~EFER_LME;
+ }
vmx_vmcs_enter(v);
- __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
- if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA )
- vm_entry_value |= VM_ENTRY_IA32E_MODE;
+ /*
+ * The intended guest running mode is derived from VM_ENTRY_IA32E_MODE,
+ * which (architecturally) is the guest's LMA setting.
+ */
+ __vmread(VM_ENTRY_CONTROLS, &entry_ctls);
+
+ entry_ctls &= ~VM_ENTRY_IA32E_MODE;
+ if ( guest_efer & EFER_LMA )
+ entry_ctls |= VM_ENTRY_IA32E_MODE;
+
+ __vmwrite(VM_ENTRY_CONTROLS, entry_ctls);
+
+ /* We expect to use EFER loading in the common case, but... */
+ if ( likely(cpu_has_vmx_efer) )
+ __vmwrite(GUEST_EFER, guest_efer);
+
+ /* ... on Gen1 VT-x hardware, we have to use MSR load/save lists instead. */
else
- vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
- __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+ {
+ /*
+ * When the guests choice of EFER matches Xen's, remove the load/save
+ * list entries. It is unnecessary overhead, especially as this is
+ * expected to be the common case for 64bit guests.
+ */
+ if ( guest_efer == xen_efer )
+ {
+ vmx_del_msr(v, MSR_EFER, VMX_MSR_HOST);
+ vmx_del_msr(v, MSR_EFER, VMX_MSR_GUEST_LOADONLY);
+ }
+ else
+ {
+ vmx_add_msr(v, MSR_EFER, xen_efer, VMX_MSR_HOST);
+ vmx_add_msr(v, MSR_EFER, guest_efer, VMX_MSR_GUEST_LOADONLY);
+ }
+ }
vmx_vmcs_exit(v);
- if ( v == current )
- write_efer((read_efer() & ~EFER_SCE) |
- (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
+ /*
+ * If the guests virtualised view of MSR_EFER matches the value loaded
+ * into hardware, clear the read intercept to avoid unnecessary VMExits.
+ */
+ if ( guest_efer == v->arch.hvm_vcpu.guest_efer )
+ vmx_clear_msr_intercept(v, MSR_EFER, VMX_MSR_R);
+ else
+ vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
}
void nvmx_enqueue_n2_exceptions(struct vcpu *v,
(hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_SMEP))
#define hvm_smap_enabled(v) \
(hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_SMAP))
-/* HVM guests on Intel hardware leak Xen's NX settings into guest context. */
#define hvm_nx_enabled(v) \
- ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && cpu_has_nx) || \
- ((v)->arch.hvm_vcpu.guest_efer & EFER_NX))
+ ((v)->arch.hvm_vcpu.guest_efer & EFER_NX)
#define hvm_pku_enabled(v) \
(hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PKE))
(vmx_cpu_based_exec_control & CPU_BASED_MONITOR_TRAP_FLAG)
#define cpu_has_vmx_pat \
(vmx_vmentry_control & VM_ENTRY_LOAD_GUEST_PAT)
+#define cpu_has_vmx_efer \
+ (vmx_vmentry_control & VM_ENTRY_LOAD_GUEST_EFER)
#define cpu_has_vmx_unrestricted_guest \
(vmx_secondary_exec_control & SECONDARY_EXEC_UNRESTRICTED_GUEST)
#define vmx_unrestricted_guest(v) \
return 0;
}
+static inline int vmx_read_guest_loadonly_msr(
+ const struct vcpu *v, uint32_t msr, uint64_t *val)
+{
+ const struct vmx_msr_entry *ent =
+ vmx_find_msr(v, msr, VMX_MSR_GUEST_LOADONLY);
+
+ if ( !ent )
+ return -ESRCH;
+
+ *val = ent->data;
+
+ return 0;
+}
+
static inline int vmx_write_guest_msr(struct vcpu *v, uint32_t msr,
uint64_t val)
{