in 64-bit pv guests and 32on64.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
if (c->x86 < 6)
clear_bit(X86_FEATURE_MCE, c->x86_capability);
+#ifdef __x86_64__
+ /* AMD CPUs do not support SYSENTER outside of legacy mode. */
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
+#endif
+
/* Prevent TSC drift in non single-processor, single-core platforms. */
if ((smp_processor_id() == 1) && c1_ramping_may_cause_clock_drift(c))
disable_c1_ramping();
v->arch.perdomain_ptes =
d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
+#ifdef __x86_64__
+ v->arch.sysexit_cs = 3;
+#endif
+
return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
}
is_pv_32on64_vcpu(prev) != is_pv_32on64_vcpu(next)) )
{
uint64_t efer = read_efer();
-
+ if ( !(efer & EFER_SCE) )
+ write_efer(efer | EFER_SCE);
flush_tlb_one_local(GDT_VIRT_START(next) +
FIRST_RESERVED_GDT_BYTE);
-
- if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
- write_efer(efer ^ EFER_SCE);
}
#endif
clear_bit(X86_FEATURE_DE, &d);
clear_bit(X86_FEATURE_PSE, &d);
clear_bit(X86_FEATURE_PGE, &d);
+ if ( !cpu_has_sep )
+ clear_bit(X86_FEATURE_SEP, &d);
+#ifdef __i386__
if ( !supervisor_mode_kernel )
clear_bit(X86_FEATURE_SEP, &d);
+#endif
if ( !IS_PRIV(current->domain) )
clear_bit(X86_FEATURE_MTRR, &d);
}
else if ( regs->eax == 0x80000001 )
{
/* Modify Feature Information. */
- if ( is_pv_32bit_vcpu(current) )
- clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#ifdef __i386__
+ clear_bit(X86_FEATURE_SYSCALL % 32, &d);
+#endif
clear_bit(X86_FEATURE_RDTSCP % 32, &d);
}
else
if ( !guest_mode(regs) )
{
+#ifdef __x86_64__
+ void sysenter_entry(void);
+ void sysenter_eflags_saved(void);
+ /* In SYSENTER entry path we cannot zap TF until EFLAGS is saved. */
+ if ( (regs->rip >= (unsigned long)sysenter_entry) &&
+ (regs->rip < (unsigned long)sysenter_eflags_saved) )
+ goto out;
+ WARN_ON(regs->rip != (unsigned long)sysenter_eflags_saved);
+#else
+ WARN_ON(1);
+#endif
/* Clear TF just for absolute sanity. */
regs->eflags &= ~EF_TF;
/*
break;
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- case CALLBACKTYPE_sysenter:
- if ( ! cpu_has_sep )
+ case CALLBACKTYPE_sysenter_deprecated:
+ if ( !cpu_has_sep )
ret = -EINVAL;
else if ( on_each_cpu(do_update_sysenter, ®->address, 1, 1) != 0 )
ret = -EIO;
break;
+
+ case CALLBACKTYPE_sysenter:
+ if ( !cpu_has_sep )
+ ret = -EINVAL;
+ else
+ do_update_sysenter(®->address);
+ break;
#endif
case CALLBACKTYPE_nmi:
case CALLBACKTYPE_event:
case CALLBACKTYPE_failsafe:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
+ case CALLBACKTYPE_sysenter_deprecated:
case CALLBACKTYPE_sysenter:
#endif
ret = -EINVAL;
arch.guest_context.failsafe_callback_cs);
OFFSET(VCPU_syscall_addr, struct vcpu,
arch.guest_context.syscall_callback_eip);
+ OFFSET(VCPU_syscall32_addr, struct vcpu, arch.syscall32_callback_eip);
+ OFFSET(VCPU_syscall32_sel, struct vcpu, arch.syscall32_callback_cs);
+ OFFSET(VCPU_syscall32_disables_events, struct vcpu,
+ arch.syscall32_disables_events);
+ OFFSET(VCPU_sysenter_addr, struct vcpu, arch.sysenter_callback_eip);
+ OFFSET(VCPU_sysenter_sel, struct vcpu, arch.sysenter_callback_cs);
+ OFFSET(VCPU_sysenter_disables_events, struct vcpu,
+ arch.sysenter_disables_events);
+ OFFSET(VCPU_sysexit_addr, struct vcpu, arch.sysexit_eip);
+ OFFSET(VCPU_sysexit_sel, struct vcpu, arch.sysexit_cs);
+ OFFSET(VCPU_gp_fault_addr, struct vcpu,
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].address);
+ OFFSET(VCPU_gp_fault_sel, struct vcpu,
+ arch.guest_context.trap_ctxt[TRAP_gp_fault].cs);
OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp);
OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
movb $0,TRAPBOUNCE_flags(%rdx)
jmp compat_test_all_events
+ENTRY(compat_syscall)
+ cmpb $0,VCPU_syscall32_disables_events(%rbx)
+ movzwl VCPU_syscall32_sel(%rbx),%esi
+ movq VCPU_syscall32_addr(%rbx),%rax
+ setne %cl
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testl $~3,%esi
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ jz 2f
+1: movq %rax,TRAPBOUNCE_eip(%rdx)
+ movw %si,TRAPBOUNCE_cs(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ call compat_create_bounce_frame
+ jmp compat_test_all_events
+2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+ movzwl VCPU_gp_fault_sel(%rbx),%esi
+ movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $0,TRAPBOUNCE_error_code(%rdx)
+ jmp 1b
+
+ENTRY(compat_sysenter)
+ cmpl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ movzwl VCPU_sysenter_sel(%rbx),%eax
+ movzwl VCPU_gp_fault_sel(%rbx),%ecx
+ cmovel %ecx,%eax
+ testl $~3,%eax
+ movl $FLAT_COMPAT_USER_SS,UREGS_ss(%rsp)
+ cmovzl %ecx,%eax
+ movw %ax,TRAPBOUNCE_cs(%rdx)
+ call compat_create_bounce_frame
+ jmp compat_test_all_events
+
ENTRY(compat_int80_direct_trap)
call compat_create_bounce_frame
jmp compat_test_all_events
setz %ch # %ch == !saved_upcall_mask
movl UREGS_eflags+8(%rsp),%eax
andl $~X86_EFLAGS_IF,%eax
- shlb $1,%ch # Bit 9 (EFLAGS.IF)
+ addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
.Lft6: movl %eax,%fs:2*4(%rsi) # EFLAGS
movl UREGS_rip+8(%rsp),%eax
&v->arch.guest_context.flags);
break;
+ case CALLBACKTYPE_syscall32:
+ v->arch.syscall32_callback_cs = reg->address.cs;
+ v->arch.syscall32_callback_eip = reg->address.eip;
+ v->arch.syscall32_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ break;
+
+ case CALLBACKTYPE_sysenter:
+ v->arch.sysenter_callback_cs = reg->address.cs;
+ v->arch.sysenter_callback_eip = reg->address.eip;
+ v->arch.sysenter_disables_events =
+ (reg->flags & CALLBACKF_mask_events) != 0;
+ break;
+
+ case CALLBACKTYPE_sysexit:
+ v->arch.sysexit_cs = reg->address.cs | 3;
+ v->arch.sysexit_eip = reg->address.eip;
+ break;
+
case CALLBACKTYPE_nmi:
ret = register_guest_nmi_callback(reg->address.eip);
break;
default:
- ret = -EINVAL;
+ ret = -ENOSYS;
break;
}
switch ( unreg->type )
{
+ case CALLBACKTYPE_event:
+ case CALLBACKTYPE_failsafe:
+ case CALLBACKTYPE_syscall32:
+ case CALLBACKTYPE_sysenter:
+ case CALLBACKTYPE_sysexit:
+ ret = -EINVAL;
+ break;
+
case CALLBACKTYPE_nmi:
ret = unregister_guest_nmi_callback();
break;
default:
- ret = -EINVAL;
+ ret = -ENOSYS;
break;
}
/* %rbx: struct vcpu */
switch_to_kernel:
leaq VCPU_trap_bounce(%rbx),%rdx
- movq VCPU_syscall_addr(%rbx),%rax
+ /* TB_eip = (32-bit syscall && syscall32_addr) ?
+ * syscall32_addr : syscall_addr */
+ xor %eax,%eax
+ cmpw $FLAT_USER_CS32,UREGS_cs(%rsp)
+ cmoveq VCPU_syscall32_addr(%rbx),%rax
+ testq %rax,%rax
+ cmovzq VCPU_syscall_addr(%rbx),%rax
movq %rax,TRAPBOUNCE_eip(%rdx)
- movb $0,TRAPBOUNCE_flags(%rdx)
- bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
- jnc 1f
- movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
-1: call create_bounce_frame
+ /* TB_flags = VGCF_syscall_disables_events ? TBF_INTERRUPT : 0 */
+ btl $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx)
+ setc %cl
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ call create_bounce_frame
andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
jmp test_all_events
addq $8,%rsp
popq %rcx # RIP
popq %r11 # CS
- cmpw $FLAT_KERNEL_CS32,%r11
+ cmpw $FLAT_USER_CS32,%r11
popq %r11 # RFLAGS
popq %rsp # RSP
je 1f
movq 24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
SAVE_ALL
GET_CURRENT(%rbx)
+ movq VCPU_domain(%rbx),%rcx
+ testb $1,DOMAIN_is_32bit_pv(%rcx)
+ jnz compat_syscall
testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
jz switch_to_kernel
movq $-ENOSYS,UREGS_rax(%rsp)
jmp test_all_events
+ENTRY(sysenter_entry)
+ sti
+ pushq $FLAT_USER_SS
+ pushq $0
+ pushfq
+ .globl sysenter_eflags_saved
+sysenter_eflags_saved:
+ pushq $0
+ pushq $0
+ pushq $0
+ movl $TRAP_syscall,4(%rsp)
+ SAVE_ALL
+ GET_CURRENT(%rbx)
+ movq VCPU_sysexit_addr(%rbx),%rax
+ movzwl VCPU_sysexit_sel(%rbx),%edx
+ cmpb $0,VCPU_sysenter_disables_events(%rbx)
+ movq %rax,UREGS_rip(%rsp)
+ movl %edx,UREGS_cs(%rsp)
+ movq VCPU_sysenter_addr(%rbx),%rax
+ setne %cl
+ leaq VCPU_trap_bounce(%rbx),%rdx
+ testq %rax,%rax
+ leal (,%rcx,TBF_INTERRUPT),%ecx
+ jz 2f
+1: movq VCPU_domain(%rbx),%rdi
+ movq %rax,TRAPBOUNCE_eip(%rdx)
+ movb %cl,TRAPBOUNCE_flags(%rdx)
+ testb $1,DOMAIN_is_32bit_pv(%rdi)
+ jnz compat_sysenter
+ call create_bounce_frame
+ jmp test_all_events
+2: movl %eax,TRAPBOUNCE_error_code(%rdx)
+ movq VCPU_gp_fault_addr(%rbx),%rax
+ movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
+ movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ jmp 1b
+
ENTRY(int80_direct_trap)
pushq $0
SAVE_ALL
shrq $32,%rax
testb $0xFF,%al # Bits 0-7: saved_upcall_mask
setz %ch # %ch == !saved_upcall_mask
- movq UREGS_eflags+8(%rsp),%rax
- andq $~X86_EFLAGS_IF,%rax
- shlb $1,%ch # Bit 9 (EFLAGS.IF)
+ movl UREGS_eflags+8(%rsp),%eax
+ andl $~X86_EFLAGS_IF,%eax
+ addb %ch,%ch # Bit 9 (EFLAGS.IF)
orb %ch,%ah # Fold EFLAGS.IF into %eax
.Lft5: movq %rax,16(%rsi) # RFLAGS
movq UREGS_rip+8(%rsp),%rax
#include <public/callback.h>
asmlinkage void syscall_enter(void);
+asmlinkage void sysenter_entry(void);
asmlinkage void compat_hypercall(void);
asmlinkage void int80_direct_trap(void);
/* Trampoline for SYSCALL entry from long mode. */
stack = &stack[IST_MAX * PAGE_SIZE]; /* Skip the IST stacks. */
- wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+ wrmsrl(MSR_LSTAR, (unsigned long)stack);
stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS64);
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ /* SYSENTER entry. */
+ wrmsrl(MSR_IA32_SYSENTER_ESP, (unsigned long)stack_bottom);
+ wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
+ wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
+ }
+
/* Trampoline for SYSCALL entry from compatibility mode. */
- wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
- stack += write_stack_trampoline(stack, stack_bottom, FLAT_KERNEL_CS32);
+ stack = (char *)L1_CACHE_ALIGN((unsigned long)stack);
+ wrmsrl(MSR_CSTAR, (unsigned long)stack);
+ stack += write_stack_trampoline(stack, stack_bottom, FLAT_USER_CS32);
/* Common SYSCALL parameters. */
wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS);
long ret = 0;
struct vcpu *v = current;
+ if ( !is_canonical_address(reg->address) )
+ return -EINVAL;
+
switch ( reg->type )
{
case CALLBACKTYPE_event:
&v->arch.guest_context.flags);
break;
+ case CALLBACKTYPE_syscall32:
+ v->arch.syscall32_callback_eip = reg->address;
+ v->arch.syscall32_disables_events =
+ !!(reg->flags & CALLBACKF_mask_events);
+ break;
+
+ case CALLBACKTYPE_sysenter:
+ v->arch.sysenter_callback_eip = reg->address;
+ v->arch.sysenter_disables_events =
+ !!(reg->flags & CALLBACKF_mask_events);
+ break;
+
+ case CALLBACKTYPE_sysexit:
+ v->arch.sysexit_eip = reg->address;
+ v->arch.sysexit_cs = FLAT_USER_CS32;
+ break;
+
case CALLBACKTYPE_nmi:
ret = register_guest_nmi_callback(reg->address);
break;
case CALLBACKTYPE_event:
case CALLBACKTYPE_failsafe:
case CALLBACKTYPE_syscall:
+ case CALLBACKTYPE_syscall32:
+ case CALLBACKTYPE_sysenter:
+ case CALLBACKTYPE_sysexit:
ret = -EINVAL;
break;
#define cpu_has_pge 1
#define cpu_has_pat 1
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep 0
+#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
#define cpu_has_mtrr 1
#define cpu_has_mmx 1
#define cpu_has_fxsr 1
#endif
#ifdef CONFIG_X86_64
struct trap_bounce int80_bounce;
+ unsigned long syscall32_callback_eip;
+ unsigned long sysenter_callback_eip;
+ unsigned long sysexit_eip;
+ unsigned short syscall32_callback_cs;
+ unsigned short sysenter_callback_cs;
+ unsigned short sysexit_cs;
+ bool_t syscall32_disables_events;
+ bool_t sysenter_disables_events;
#endif
/* Virtual Machine Extensions */
* @extra_args == Operation-specific extra arguments (NULL if none).
*/
+/* ia64, x86: Callback for event delivery. */
#define CALLBACKTYPE_event 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
#define CALLBACKTYPE_failsafe 1
-#define CALLBACKTYPE_syscall 2 /* x86_64 only */
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall 2
+
/*
- * sysenter is only available on x86_32 with the
- * supervisor_mode_kernel option enabled.
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ * feature is enabled. Do not use this callback type in new code.
*/
-#define CALLBACKTYPE_sysenter 3
+#define CALLBACKTYPE_sysenter_deprecated 3
+
+/* x86: Callback for NMI delivery. */
#define CALLBACKTYPE_nmi 4
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ * [nb. also 64-bit guest applications on Intel CPUs
+ * ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter 5
+
+/*
+ * x86/64 hypervisor: used to fill a sysenter frame's return address, if the
+ * guest desires to have a non-NULL value there. If the guest kernel is
+ * 64-bit then the sysexit code selector is always set to FLAT_USER_CS32.
+ */
+#define CALLBACKTYPE_sysexit 6
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32 7
+
/*
* Disable event deliver during callback? This flag is ignored for event and
* NMI callbacks: event delivery is unconditionally disabled.
typedef struct callback_unregister callback_unregister_t;
DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
+#if __XEN_INTERFACE_VERSION__ < 0x00030207
+#undef CALLBACKTYPE_sysenter
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#endif
+
#endif /* __XEN_PUBLIC_CALLBACK_H__ */
/*
#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
#define __XEN_PUBLIC_XEN_COMPAT_H__
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030207
#if defined(__XEN__) || defined(__XEN_TOOLS__)
/* Xen is built with matching headers and implements the latest interface. */