From: Ian.Campbell@xensource.com Date: Wed, 11 Jan 2006 15:52:33 +0000 (+0000) Subject: Pass NMIs to DOM0 via a dedicated callback, Xen x86_64 support. X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~16541^2~61^2~11 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=dfafd0de0cd52ed57f31a635b165ae4ca3ad498e;p=xen.git Pass NMIs to DOM0 via a dedicated callback, Xen x86_64 support. Handle NMI interrupts and dispatch to dom0 on x86_64. Renames the switch_to_user hypercall to iret. Extend the semantics to include returns to guest/kernel if CS indicates ring 1. Retain the old semantics of returning to guest/user if CS indicates ring3. Plumb in nmi_op hypercall to generic code. Signed-off-by: Ian Campbell --- diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 53e129178c..f492dac762 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -659,35 +659,6 @@ static void save_segments(struct vcpu *v) percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; } -long do_switch_to_user(void) -{ - struct cpu_user_regs *regs = guest_cpu_user_regs(); - struct switch_to_user stu; - struct vcpu *v = current; - - if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) || - unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) - return -EFAULT; - - toggle_guest_mode(v); - - regs->rip = stu.rip; - regs->cs = stu.cs | 3; /* force guest privilege */ - regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE; - regs->rsp = stu.rsp; - regs->ss = stu.ss | 3; /* force guest privilege */ - - if ( !(stu.flags & VGCF_IN_SYSCALL) ) - { - regs->entry_vector = 0; - regs->r11 = stu.r11; - regs->rcx = stu.rcx; - } - - /* Saved %rax gets written back to regs->rax in entry.S. */ - return stu.rax; -} - #define switch_kernel_stack(_n,_c) ((void)0) #elif defined(__i386__) diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c index c7a3e6025c..0aa20ccabb 100644 --- a/xen/arch/x86/x86_64/asm-offsets.c +++ b/xen/arch/x86/x86_64/asm-offsets.c @@ -65,6 +65,10 @@ void __dummy__(void) arch.guest_context.syscall_callback_eip); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_flags, struct vcpu, vcpu_flags); + OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); + DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); + DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); BLANK(); OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending); diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index 3c5c344a1a..88fe273bab 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -171,7 +171,9 @@ test_all_events: leaq irq_stat(%rip),%rcx testl $~0,(%rcx,%rax,1) jnz process_softirqs -/*test_guest_events:*/ + btr $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jc process_nmi +test_guest_events: movq VCPU_vcpu_info(%rbx),%rax testb $0xFF,VCPUINFO_upcall_mask(%rax) jnz restore_all_guest @@ -322,6 +324,23 @@ process_softirqs: call do_softirq jmp test_all_events + ALIGN +/* %rbx: struct vcpu */ +process_nmi: + movq VCPU_nmi_addr(%rbx),%rax + test %rax,%rax + jz test_all_events + bts $_VCPUF_nmi_masked,VCPU_flags(%rbx) + jc 1f + sti + leaq VCPU_trap_bounce(%rbx),%rdx + movq %rax,TRAPBOUNCE_eip(%rdx) + movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) + call create_bounce_frame + jmp test_all_events +1: bts $_VCPUF_nmi_pending,VCPU_flags(%rbx) + jmp test_guest_events + /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK: */ /* { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS } */ /* %rdx: trap_bounce, %rbx: struct vcpu */ @@ -339,6 +358,9 @@ create_bounce_frame: 1: /* In kernel context already: push new frame at existing %rsp. */ movq UREGS_rsp+8(%rsp),%rsi andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest. + testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx) + jz 2f + orb $0x01,UREGS_cs+8(%rsp) 2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi @@ -569,7 +591,7 @@ ENTRY(nmi) SAVE_ALL movq %rsp,%rdi call do_nmi - jmp restore_all_xen + jmp ret_from_intr do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() @@ -626,11 +648,12 @@ ENTRY(hypercall_table) .quad do_grant_table_op /* 20 */ .quad do_vm_assist .quad do_update_va_mapping_otherdomain - .quad do_switch_to_user + .quad do_iret .quad do_vcpu_op .quad do_set_segment_base /* 25 */ .quad do_mmuext_op .quad do_acm_op + .quad do_nmi_op .rept NR_hypercalls-((.-hypercall_table)/4) .quad do_ni_hypercall .endr @@ -659,11 +682,12 @@ ENTRY(hypercall_args_table) .byte 3 /* do_grant_table_op */ /* 20 */ .byte 2 /* do_vm_assist */ .byte 4 /* do_update_va_mapping_otherdomain */ - .byte 0 /* do_switch_to_user */ + .byte 0 /* do_iret */ .byte 3 /* do_vcpu_op */ .byte 2 /* do_set_segment_base */ /* 25 */ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ + .byte 2 /* do_nmi_op */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index 4f7c822ef8..e14daa3e72 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -12,6 +12,7 @@ #include #include #include +#include #include void show_registers(struct cpu_user_regs *regs) @@ -113,6 +114,42 @@ asmlinkage void do_double_fault(struct cpu_user_regs *regs) __asm__ __volatile__ ( "hlt" ); } +extern void toggle_guest_mode(struct vcpu *); + +long do_iret(void) +{ + struct cpu_user_regs *regs = guest_cpu_user_regs(); + struct iret_context iret_saved; + struct vcpu *v = current; + + if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp, sizeof(iret_saved))) || + unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) + return -EFAULT; + + /* returning to user mode */ + if ((iret_saved.cs & 0x03) == 3) + toggle_guest_mode(v); + + regs->rip = iret_saved.rip; + regs->cs = iret_saved.cs | 3; /* force guest privilege */ + regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE; + regs->rsp = iret_saved.rsp; + regs->ss = iret_saved.ss | 3; /* force guest privilege */ + + if ( !(iret_saved.flags & VGCF_IN_SYSCALL) ) + { + regs->entry_vector = 0; + regs->r11 = iret_saved.r11; + regs->rcx = iret_saved.rcx; + } + + /* No longer in NMI context */ + clear_bit(_VCPUF_nmi_masked, ¤t->vcpu_flags); + + /* Saved %rax gets written back to regs->rax in entry.S. */ + return iret_saved.rax; +} + asmlinkage void syscall_enter(void); void __init percpu_traps_init(void) { diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index d3bcc69904..18748e3367 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -123,6 +123,7 @@ #define TBF_EXCEPTION_ERRCODE 2 #define TBF_INTERRUPT 8 #define TBF_FAILSAFE 16 +#define TBF_SLOW_IRET 32 /* 'arch_vcpu' flags values */ #define _TF_kernel_mode 0 diff --git a/xen/include/public/arch-x86_64.h b/xen/include/public/arch-x86_64.h index 03bc3ec08c..e9e69db069 100644 --- a/xen/include/public/arch-x86_64.h +++ b/xen/include/public/arch-x86_64.h @@ -88,11 +88,20 @@ #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ /* - * int HYPERVISOR_switch_to_user(void) + * int HYPERVISOR_iret(void) * All arguments are on the kernel stack, in the following format. * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq * If flags contains VGCF_IN_SYSCALL: - * Restore RAX, RIP, RFLAGS, RSP. + * Restore RAX, RIP, RFLAGS, RSP. * Discard R11, RCX, CS, SS. * Otherwise: * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. @@ -100,10 +109,17 @@ */ /* Guest exited in SYSCALL context? Return to guest with SYSRET? */ #define VGCF_IN_SYSCALL (1<<8) +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; +/* For compatibility with HYPERVISOR_switch_to_user which is the old + * name for HYPERVISOR_iret */ struct switch_to_user { /* Top of stack (%rsp at point of hypercall). */ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; - /* Bottom of switch_to_user stack frame. */ + /* Bottom of iret stack frame. */ }; /* diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index c141dbdc66..ca1d4d1d03 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -53,9 +53,9 @@ #define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 -#define __HYPERVISOR_iret 23 /* x86/32 only */ +#define __HYPERVISOR_iret 23 /* x86 only */ #define __HYPERVISOR_switch_vm86 23 /* x86/32 only (obsolete name) */ -#define __HYPERVISOR_switch_to_user 23 /* x86/64 only */ +#define __HYPERVISOR_switch_to_user 23 /* x86/64 only (obsolete name) */ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26