From: kaf24@firebug.cl.cam.ac.uk Date: Fri, 11 Mar 2005 16:35:53 +0000 (+0000) Subject: bitkeeper revision 1.1236.25.5 (4231c8e9V-iDqxIZGUl47adwrecjjw) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~17857^2~26^2~16 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=ac2f9562e738d9ff8b9809d10054a595e210b36b;p=xen.git bitkeeper revision 1.1236.25.5 (4231c8e9V-iDqxIZGUl47adwrecjjw) Virtualize I/O privilege level (IOPL) to disallow direct execution of IN/OUT/CLI/STI instructions by guests. Signed-off-by: Keir Fraser --- diff --git a/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c b/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c index 905b51c113..d8ef0b95d8 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c @@ -214,7 +214,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, struct task_struct * p, struct pt_regs * regs) { struct pt_regs * childregs; - unsigned long eflags; childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; struct_cpy(childregs, regs); @@ -232,9 +231,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, unlazy_fpu(current); struct_cpy(&p->thread.i387, ¤t->thread.i387); - - __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : ); - p->thread.io_pl = (eflags >> 12) & 3; + p->thread.io_pl = current->thread.io_pl; return 0; } diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c index 017b167c38..8b58a273bd 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c @@ -273,7 +273,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, struct pt_regs * childregs; struct task_struct *tsk; int err; - unsigned long eflags; childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; *childregs = *regs; @@ -323,9 +322,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, desc->b = LDT_entry_b(&info); } - - __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : ); - p->thread.io_pl = (eflags >> 12) & 3; + p->thread.io_pl = current->thread.io_pl; err = 0; out: diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c index 289085849c..6a09d2d75c 100644 --- a/xen/arch/x86/dom0_ops.c +++ b/xen/arch/x86/dom0_ops.c @@ -138,8 +138,12 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) case DOM0_IOPL: { - extern long do_iopl(domid_t, unsigned int); - ret = do_iopl(op->u.iopl.domain, op->u.iopl.iopl); + ret = -EINVAL; + if ( op->u.iopl.domain == DOMID_SELF ) + { + current->arch.iopl = op->u.iopl.iopl & 3; + ret = 0; + } } break; @@ -358,6 +362,9 @@ void arch_getdomaininfo_ctxt( memcpy(&c->cpu_ctxt, &ed->arch.user_ctxt, sizeof(ed->arch.user_ctxt)); + /* IOPL privileges are virtualised -- merge back into returned eflags. */ + BUG_ON((c->cpu_ctxt.eflags & EF_IOPL) != 0); + c->cpu_ctxt.eflags |= ed->arch.iopl << 12; #ifdef __i386__ #ifdef CONFIG_VMX diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 68d079e800..71cc7e1581 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -441,6 +441,9 @@ int arch_final_setup_guest( memcpy(&ed->arch.user_ctxt, &c->cpu_ctxt, sizeof(ed->arch.user_ctxt)); + /* IOPL privileges are virtualised. */ + ed->arch.iopl = (ed->arch.user_ctxt.eflags >> 12) & 3; + ed->arch.user_ctxt.eflags &= ~EF_IOPL; /* Clear IOPL for unprivileged domains. */ if (!IS_PRIV(d)) @@ -820,15 +823,6 @@ void context_switch(struct exec_domain *prev_p, struct exec_domain *next_p) BUG(); } - -/* XXX Currently the 'domain' field is ignored! XXX */ -long do_iopl(domid_t domain, unsigned int new_io_pl) -{ - execution_context_t *ec = get_execution_context(); - ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12); - return 0; -} - unsigned long __hypercall_create_continuation( unsigned int op, unsigned int nr_args, ...) { diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 9491787fcb..29dfc51881 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -368,22 +368,222 @@ long do_fpu_taskswitch(int set) return 0; } +static inline int user_io_okay( + unsigned int port, unsigned int bytes, + struct exec_domain *ed, struct xen_regs *regs) +{ + if ( ed->arch.iopl < (KERNEL_MODE(ed, regs) ? 1 : 3) ) + return 0; + return 1; +} + +#define insn_fetch(_type, _size, _ptr) \ +({ unsigned long _x; \ + if ( get_user(_x, (_type *)eip) ) \ + goto read_fault; \ + eip += _size; (_type)_x; }) + static int emulate_privileged_op(struct xen_regs *regs) { struct exec_domain *ed = current; unsigned long *reg, eip = regs->eip; - u8 opcode; + u8 opcode, modrm_reg = 0, rep_prefix = 0; + unsigned int port, i, op_bytes = 4, data; - if ( get_user(opcode, (u8 *)eip) ) - goto page_fault; - eip += 1; - if ( (opcode & 0xff) != 0x0f ) + /* Legacy prefixes. */ + for ( i = 0; i < 8; i++ ) + { + switch ( opcode = insn_fetch(u8, 1, eip) ) + { + case 0x66: /* operand-size override */ + op_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x67: /* address-size override */ + case 0x2e: /* CS override */ + case 0x3e: /* DS override */ + case 0x26: /* ES override */ + case 0x64: /* FS override */ + case 0x65: /* GS override */ + case 0x36: /* SS override */ + case 0xf0: /* LOCK */ + case 0xf2: /* REPNE/REPNZ */ + break; + case 0xf3: /* REP/REPE/REPZ */ + rep_prefix = 1; + break; + default: + goto done_prefixes; + } + } + done_prefixes: + +#ifdef __x86_64__ + /* REX prefix. */ + if ( (opcode & 0xf0) == 0x40 ) + { + modrm_reg = (opcode & 4) << 1; /* REX.R */ + /* REX.W, REX.B and REX.X do not need to be decoded. */ + opcode = insn_fetch(u8, 1, eip); + } +#endif + + /* Input/Output String instructions. */ + if ( (opcode >= 0x6c) && (opcode <= 0x6f) ) + { + if ( rep_prefix && (regs->ecx == 0) ) + goto done; + + continue_io_string: + switch ( opcode ) + { + case 0x6c: /* INSB */ + op_bytes = 1; + case 0x6d: /* INSW/INSL */ + if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) ) + goto fail; + switch ( op_bytes ) + { + case 1: + data = (u8)inb((u16)regs->edx); + if ( put_user((u8)data, (u8 *)regs->edi) ) + goto write_fault; + break; + case 2: + data = (u16)inw((u16)regs->edx); + if ( put_user((u16)data, (u16 *)regs->edi) ) + goto write_fault; + break; + case 4: + data = (u32)inl((u16)regs->edx); + if ( put_user((u32)data, (u32 *)regs->edi) ) + goto write_fault; + break; + } + regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + break; + + case 0x6e: /* OUTSB */ + op_bytes = 1; + case 0x6f: /* OUTSW/OUTSL */ + if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) ) + goto fail; + switch ( op_bytes ) + { + case 1: + if ( get_user(data, (u8 *)regs->esi) ) + goto read_fault; + outb((u8)data, (u16)regs->edx); + break; + case 2: + if ( get_user(data, (u16 *)regs->esi) ) + goto read_fault; + outw((u16)data, (u16)regs->edx); + break; + case 4: + if ( get_user(data, (u32 *)regs->esi) ) + goto read_fault; + outl((u32)data, (u16)regs->edx); + break; + } + regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; + break; + } + + if ( rep_prefix && (--regs->ecx != 0) ) + { + if ( !hypercall_preempt_check() ) + goto continue_io_string; + eip = regs->eip; + } + + goto done; + } + + /* I/O Port and Interrupt Flag instructions. */ + switch ( opcode ) + { + case 0xe4: /* IN imm8,%al */ + op_bytes = 1; + case 0xe5: /* IN imm8,%eax */ + port = insn_fetch(u8, 1, eip); + exec_in: + if ( !user_io_okay(port, op_bytes, ed, regs) ) + goto fail; + switch ( op_bytes ) + { + case 1: + regs->eax &= ~0xffUL; + regs->eax |= (u8)inb(port); + break; + case 2: + regs->eax &= ~0xffffUL; + regs->eax |= (u16)inw(port); + break; + case 4: + regs->eax = (u32)inl(port); + break; + } + goto done; + + case 0xec: /* IN %dx,%al */ + op_bytes = 1; + case 0xed: /* IN %dx,%eax */ + port = (u16)regs->edx; + goto exec_in; + + case 0xe6: /* OUT %al,imm8 */ + op_bytes = 1; + case 0xe7: /* OUT %eax,imm8 */ + port = insn_fetch(u8, 1, eip); + exec_out: + if ( !user_io_okay(port, op_bytes, ed, regs) ) + goto fail; + switch ( op_bytes ) + { + case 1: + outb((u8)regs->eax, port); + break; + case 2: + outw((u16)regs->eax, port); + break; + case 4: + outl((u32)regs->eax, port); + break; + } + goto done; + + case 0xee: /* OUT %al,%dx */ + op_bytes = 1; + case 0xef: /* OUT %eax,%dx */ + port = (u16)regs->edx; + goto exec_out; + + case 0xfa: /* CLI */ + case 0xfb: /* STI */ + if ( ed->arch.iopl < (KERNEL_MODE(ed, regs) ? 1 : 3) ) + goto fail; + /* + * This is just too dangerous to allow, in my opinion. Consider if the + * caller then tries to reenable interrupts using POPF: we can't trap + * that and we'll end up with hard-to-debug lockups. Fast & loose will + * do for us. :-) + */ + /*ed->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/ + goto done; + + case 0x0f: /* Two-byte opcode */ + break; + + default: goto fail; + } - if ( get_user(opcode, (u8 *)eip) ) - goto page_fault; - eip += 1; + /* Remaining instructions only emulated from guest kernel. */ + if ( !KERNEL_MODE(ed, regs) ) + goto fail; + /* Privileged (ring 0) instructions. */ + opcode = insn_fetch(u8, 1, eip); switch ( opcode ) { case 0x06: /* CLTS */ @@ -399,12 +599,11 @@ static int emulate_privileged_op(struct xen_regs *regs) break; case 0x20: /* MOV CR?, */ - if ( get_user(opcode, (u8 *)eip) ) - goto page_fault; - eip += 1; + opcode = insn_fetch(u8, 1, eip); if ( (opcode & 0xc0) != 0xc0 ) goto fail; - reg = decode_register(opcode & 7, regs, 0); + modrm_reg |= opcode & 7; + reg = decode_register(modrm_reg, regs, 0); switch ( (opcode >> 3) & 7 ) { case 0: /* Read CR0 */ @@ -427,12 +626,11 @@ static int emulate_privileged_op(struct xen_regs *regs) break; case 0x22: /* MOV ,CR? */ - if ( get_user(opcode, (u8 *)eip) ) - goto page_fault; - eip += 1; + opcode = insn_fetch(u8, 1, eip); if ( (opcode & 0xc0) != 0xc0 ) goto fail; - reg = decode_register(opcode & 7, regs, 0); + modrm_reg |= opcode & 7; + reg = decode_register(modrm_reg, regs, 0); switch ( (opcode >> 3) & 7 ) { case 0: /* Write CR0 */ @@ -476,14 +674,19 @@ static int emulate_privileged_op(struct xen_regs *regs) goto fail; } + done: regs->eip = eip; return EXCRET_fault_fixed; fail: return 0; - page_fault: - propagate_page_fault(eip, 0); + read_fault: + propagate_page_fault(eip, 4); /* user mode, read fault */ + return EXCRET_fault_fixed; + + write_fault: + propagate_page_fault(eip, 6); /* user mode, write fault */ return EXCRET_fault_fixed; } @@ -534,9 +737,8 @@ asmlinkage int do_general_protection(struct xen_regs *regs) } } - /* Emulate some simple privileged instructions when exec'ed in ring 1. */ + /* Emulate some simple privileged and I/O instructions. */ if ( (regs->error_code == 0) && - KERNEL_MODE(ed, regs) && emulate_privileged_op(regs) ) return 0; diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index c31b3582d5..901ac5a2b3 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -463,7 +463,7 @@ error_code: SET_XEN_SEGMENTS(a) testb $X86_EFLAGS_IF>>8,XREGS_eflags+1(%esp) jz exception_with_ints_disabled -1: sti # re-enable interrupts + sti # re-enable interrupts xorl %eax,%eax movw XREGS_entry_vector(%esp),%ax movl %esp,%edx @@ -482,8 +482,7 @@ exception_with_ints_disabled: movl XREGS_eflags(%esp),%eax movb XREGS_cs(%esp),%al testl $(3|X86_EFLAGS_VM),%eax # interrupts disabled outside Xen? - jnz 1b # it really does happen! - # (e.g., DOM0 X server) + jnz FATAL_exception_with_ints_disabled pushl %esp call search_pre_exception_table addl $4,%esp @@ -701,8 +700,6 @@ do_switch_vm86: # Discard the return address addl $4,%esp - movl XREGS_eflags(%esp),%edx - # GS:ESI == Ring-1 stack activation movl XREGS_esp(%esp),%esi VFLT1: movl XREGS_ss(%esp),%gs @@ -722,13 +719,9 @@ VFLT3: movl %gs:(%esi),%eax addl $4,%esi loop VFLT3 - # Fix up EFLAGS + # Fix up EFLAGS: IOPL=0, IF=1, VM=1 andl $~X86_EFLAGS_IOPL,XREGS_eflags(%esp) - andl $X86_EFLAGS_IOPL,%edx # Ignore attempts to change EFLAGS.IOPL - jnz 1f - orl $X86_EFLAGS_IF,%edx # EFLAGS.IOPL=0 => no messing with EFLAGS.IF -1: orl $X86_EFLAGS_VM,%edx # Force EFLAGS.VM - orl %edx,XREGS_eflags(%esp) + orl $X86_EFLAGS_IF|X86_EFLAGS_VM,XREGS_eflags(%esp) jmp test_all_events diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index 93bd397386..2b11a46027 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -294,7 +294,7 @@ error_code: SAVE_ALL testb $X86_EFLAGS_IF>>8,XREGS_eflags+1(%rsp) jz exception_with_ints_disabled -1: sti + sti movq %rsp,%rdi movl XREGS_entry_vector(%rsp),%eax leaq SYMBOL_NAME(exception_table)(%rip),%rdx @@ -308,8 +308,7 @@ error_code: /* No special register assumptions. */ exception_with_ints_disabled: testb $3,XREGS_cs(%rsp) # interrupts disabled outside Xen? - jnz 1b # it really does happen! - # (e.g., DOM0 X server) + jnz FATAL_exception_with_ints_disabled movq %rsp,%rdi call search_pre_exception_table testq %rax,%rax # no fixup code for faulting EIP? diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c index a36c31f4d8..023f7543ba 100644 --- a/xen/arch/x86/x86_emulate.c +++ b/xen/arch/x86/x86_emulate.c @@ -421,39 +421,44 @@ x86_emulate_memop( u8 b, d, sib, twobyte = 0, rex_prefix = 0; u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode; - unsigned int lock_prefix = 0, rep_prefix = 0; + unsigned int lock_prefix = 0, rep_prefix = 0, i; struct operand src, dst; /* Shadow copy of register state. Committed on successful emulation. */ struct xen_regs _regs = *regs; /* Legacy prefixes. */ - next_prefix: - switch ( b = insn_fetch(u8, 1, _regs.eip) ) + for ( i = 0; i < 8; i++ ) { - case 0x66: /* operand-size override */ - op_bytes ^= 6; /* switch between 2/4 bytes */ - goto next_prefix; - case 0x67: /* address-size override */ - ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4 or 4/8 bytes */ - goto next_prefix; - case 0x2e: /* CS override */ - case 0x3e: /* DS override */ - case 0x26: /* ES override */ - case 0x64: /* FS override */ - case 0x65: /* GS override */ - case 0x36: /* SS override */ - DPRINTF("Warning: ignoring a segment override. Probably okay. :-)\n"); - goto next_prefix; - case 0xf0: /* LOCK */ - lock_prefix = 1; - goto next_prefix; - case 0xf3: /* REP/REPE/REPZ */ - rep_prefix = 1; - goto next_prefix; - case 0xf2: /* REPNE/REPNZ */ - goto next_prefix; + switch ( b = insn_fetch(u8, 1, _regs.eip) ) + { + case 0x66: /* operand-size override */ + op_bytes ^= 6; /* switch between 2/4 bytes */ + break; + case 0x67: /* address-size override */ + ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4/8 bytes */ + break; + case 0x2e: /* CS override */ + case 0x3e: /* DS override */ + case 0x26: /* ES override */ + case 0x64: /* FS override */ + case 0x65: /* GS override */ + case 0x36: /* SS override */ + DPRINTF("Warning: ignoring a segment override.\n"); + break; + case 0xf0: /* LOCK */ + lock_prefix = 1; + break; + case 0xf3: /* REP/REPE/REPZ */ + rep_prefix = 1; + break; + case 0xf2: /* REPNE/REPNZ */ + break; + default: + goto done_prefixes; + } } + done_prefixes: if ( ad_bytes == 2 ) { diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 90246d63ed..ca4c192c13 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -57,6 +57,7 @@ struct arch_exec_domain /* general user-visible register state */ execution_context_t user_ctxt; + unsigned int iopl; void (*schedule_tail) (struct exec_domain *);