[XEN] Fix page-fault handler to not trust bit 0 of error code.
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Sun, 18 Jun 2006 18:24:00 +0000 (19:24 +0100)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Sun, 18 Jun 2006 18:24:00 +0000 (19:24 +0100)
It can be cleared due to writable-pagetable logic. Various
other cleanups too. Spurious fault detection logic is
simplified.
Signed-off-by: Keir Fraser <keir@xensource.com>
xen/arch/x86/mm.c
xen/arch/x86/traps.c
xen/arch/x86/x86_32/seg_fixup.c
xen/arch/x86/x86_emulate.c

index 18a290ab13b96e982b73dbb3cfed289fc8a2a014..31e6a12ff16a5ff00a4cda713aa533b4a891afc6 100644 (file)
@@ -3351,7 +3351,7 @@ static int ptwr_emulated_update(
         addr &= ~(sizeof(paddr_t)-1);
         if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
         {
-            propagate_page_fault(addr, 4); /* user mode, read fault */
+            propagate_page_fault(addr, 0); /* read fault */
             return X86EMUL_PROPAGATE_FAULT;
         }
         /* Mask out bits provided by caller. */
@@ -3483,12 +3483,12 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
     unsigned long    l2_idx;
     struct x86_emulate_ctxt emul_ctxt;
 
-    if ( unlikely(shadow_mode_enabled(d)) )
-        return 0;
+    ASSERT(!shadow_mode_enabled(d));
 
     /*
      * Attempt to read the PTE that maps the VA being accessed. By checking for
      * PDE validity in the L2 we avoid many expensive fixups in __get_user().
+     * NB. The L2 entry cannot be detached as the caller already checked that.
      */
     if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
            _PAGE_PRESENT) ||
@@ -3579,7 +3579,7 @@ int ptwr_do_page_fault(struct domain *d, unsigned long addr,
     }
 
     /*
-     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
+     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
      * time. If there is already one, we must flush it out.
      */
     if ( d->arch.ptwr[which].l1va )
index e80f71326cbb1617bd98373103610e4b04a61407..cd143360f97518f1f548096f31b0de7efe3e7e9a 100644 (file)
@@ -547,6 +547,7 @@ static int handle_gdt_ldt_mapping_fault(
     {
         /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
         LOCK_BIGLOCK(d);
+        cleanup_writable_pagetable(d);
         ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
         UNLOCK_BIGLOCK(d);
 
@@ -654,41 +655,14 @@ static int __spurious_page_fault(
 static int spurious_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
-    struct vcpu   *v = current;
-    struct domain *d = v->domain;
+    struct domain *d = current->domain;
     int            is_spurious;
 
     LOCK_BIGLOCK(d);
-
+    cleanup_writable_pagetable(d);
     is_spurious = __spurious_page_fault(addr, regs);
-    if ( is_spurious )
-        goto out;
-
-    /*
-     * The only possible reason for a spurious page fault not to be picked
-     * up already is that a page directory was unhooked by writable page table
-     * logic and then reattached before the faulting VCPU could detect it.
-     */
-    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
-         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
-         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
-         (regs->error_code & PGERR_page_present) ) /* not-present fault?    */
-        goto out;
-
-    /*
-     * The page directory could have been detached again while we weren't
-     * holding the per-domain lock. Detect that and fix up if it's the case.
-     */
-    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
-         unlikely(l2_linear_offset(addr) ==
-                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
-    {
-        ptwr_flush(d, PTWR_PT_ACTIVE);
-        is_spurious = 1;
-    }
-
- out:
     UNLOCK_BIGLOCK(d);
+
     return is_spurious;
 }
 
@@ -696,7 +670,6 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
 {
     struct vcpu   *v = current;
     struct domain *d = v->domain;
-    int            rc;
 
     if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
     {
@@ -709,10 +682,7 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
          * Do not propagate spurious faults in the hypervisor area to the
          * guest. It cannot fix them up.
          */
-        LOCK_BIGLOCK(d);
-        rc = __spurious_page_fault(addr, regs);
-        UNLOCK_BIGLOCK(d);
-        return rc;
+        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
     }
 
     if ( unlikely(shadow_mode_enabled(d)) )
@@ -730,12 +700,14 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
             return EXCRET_fault_fixed;
         }
 
+        /*
+         * Note it is *not* safe to check PGERR_page_present here. It can be
+         * clear, due to unhooked page table, when we would otherwise expect
+         * it to be set. We have an aversion to trusting that flag in Xen, and
+         * guests ought to be leery too.
+         */
         if ( guest_kernel_mode(v, regs) &&
-             /* Protection violation on write? No reserved-bit violation? */
-             ((regs->error_code & (PGERR_page_present |
-                                   PGERR_write_access |
-                                   PGERR_reserved_bit)) ==
-              (PGERR_page_present | PGERR_write_access)) &&
+             (regs->error_code & PGERR_write_access) &&
              ptwr_do_page_fault(d, addr, regs) )
         {
             UNLOCK_BIGLOCK(d);
@@ -870,8 +842,6 @@ static inline int admin_io_okay(
     (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
 
 /* Propagate a fault back to the guest kernel. */
-#define USER_READ_FAULT  (PGERR_user_mode)
-#define USER_WRITE_FAULT (PGERR_user_mode | PGERR_write_access)
 #define PAGE_FAULT(_faultaddr, _errcode)        \
 ({  propagate_page_fault(_faultaddr, _errcode); \
     return EXCRET_fault_fixed;                  \
@@ -881,7 +851,7 @@ static inline int admin_io_okay(
 #define insn_fetch(_type, _size, _ptr)          \
 ({  unsigned long _x;                           \
     if ( get_user(_x, (_type *)eip) )           \
-        PAGE_FAULT(eip, USER_READ_FAULT);       \
+        PAGE_FAULT(eip, 0); /* read fault */    \
     eip += _size; (_type)_x; })
 
 static int emulate_privileged_op(struct cpu_user_regs *regs)
@@ -950,17 +920,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
             case 1:
                 data = (u8)inb_user((u16)regs->edx, v, regs);
                 if ( put_user((u8)data, (u8 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 2:
                 data = (u16)inw_user((u16)regs->edx, v, regs);
                 if ( put_user((u16)data, (u16 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             case 4:
                 data = (u32)inl_user((u16)regs->edx, v, regs);
                 if ( put_user((u32)data, (u32 *)regs->edi) )
-                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
+                    PAGE_FAULT(regs->edi, PGERR_write_access);
                 break;
             }
             regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
@@ -975,17 +945,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
             {
             case 1:
                 if ( get_user(data, (u8 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outb_user((u8)data, (u16)regs->edx, v, regs);
                 break;
             case 2:
                 if ( get_user(data, (u16 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outw_user((u16)data, (u16)regs->edx, v, regs);
                 break;
             case 4:
                 if ( get_user(data, (u32 *)regs->esi) )
-                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
+                    PAGE_FAULT(regs->esi, 0); /* read fault */
                 outl_user((u32)data, (u16)regs->edx, v, regs);
                 break;
             }
@@ -1168,7 +1138,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
             v->arch.guest_context.ctrlreg[2] = *reg;
             v->vcpu_info->arch.cr2           = *reg;
             break;
-            
+
         case 3: /* Write CR3 */
             LOCK_BIGLOCK(v->domain);
             cleanup_writable_pagetable(v->domain);
index 352a3e52a71a02a54fa93aa32728e6884ff15c29..fd91ff14bdfb128d02cbbd44a86dec9fd12ae193 100644 (file)
@@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs *regs)
     return 0;
 
  page_fault:
-    propagate_page_fault((unsigned long)pb, 4);
+    propagate_page_fault((unsigned long)pb, 0); /* read fault */
     return EXCRET_fault_fixed;
 }
 
index e48c911058e4f06146da2b2d86f755351c615924..99e707d4f078f343be9f5d8711f2c490b37c6b6a 100644 (file)
@@ -1146,7 +1146,7 @@ x86_emulate_read_std(
     *val = 0;
     if ( copy_from_user((void *)val, (void *)addr, bytes) )
     {
-        propagate_page_fault(addr, 4); /* user mode, read fault */
+        propagate_page_fault(addr, 0); /* read fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;
@@ -1161,7 +1161,7 @@ x86_emulate_write_std(
 {
     if ( copy_to_user((void *)addr, (void *)&val, bytes) )
     {
-        propagate_page_fault(addr, 6); /* user mode, write fault */
+        propagate_page_fault(addr, PGERR_write_access); /* write fault */
         return X86EMUL_PROPAGATE_FAULT;
     }
     return X86EMUL_CONTINUE;