From: kfraser@dhcp93.uk.xensource.com Date: Fri, 16 Jun 2006 17:19:40 +0000 (+0100) Subject: [LINUX] Add spurious page-fault detection, intended primarily X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~15921^2~54 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=d6a811acc8b99176437c699a729d5c078e78432c;p=xen.git [LINUX] Add spurious page-fault detection, intended primarily for spurious write faults on mappings that have been changed from read-only to writable. If a CPU has a stale read-only entry in its TLB, it is allowed to fault on the next write access without re-walking the page table. Signed-off-by: Keir Fraser --- diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c index 308c0bbe8a..16a0155ecb 100644 --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c @@ -273,6 +273,49 @@ static void dump_fault_path(unsigned long address) } #endif +static int spurious_fault(struct pt_regs *regs, + unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +#ifdef CONFIG_XEN + /* Faults in hypervisor area are never spurious. */ + if (address >= HYPERVISOR_VIRT_START) + return 0; +#endif + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & 0x0c) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + if ((error_code & 0x02) && !pte_write(*pte)) + return 0; +#ifdef CONFIG_X86_PAE + if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX)) + return 0; +#endif + + return 1; +} /* * This routine handles page faults. It determines the address, @@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, * protection error (error_code & 1) == 0. */ if (unlikely(address >= TASK_SIZE)) { +#ifdef CONFIG_XEN + /* Faults in hypervisor area can never be patched up. */ + if (address >= HYPERVISOR_VIRT_START) + goto bad_area_nosemaphore; +#endif if (!(error_code & 5)) goto vmalloc_fault; + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c index dfd2f2df0f..963446013f 100644 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c @@ -307,6 +307,49 @@ int exception_trace = 1; #define MEM_LOG(_f, _a...) ((void)0) #endif +static int spurious_fault(struct pt_regs *regs, + unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +#ifdef CONFIG_XEN + /* Faults in hypervisor area are never spurious. */ + if ((address >= HYPERVISOR_VIRT_START) && + (address < HYPERVISOR_VIRT_END)) + return 0; +#endif + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & PF_RSVD|PF_USER) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX)) + return 0; + + return 1; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ if (unlikely(address >= TASK_SIZE64)) { /* - * Must check for the entire kernel range here: with writable - * page tables the hypervisor may temporarily clear PMD - * entries. + * Don't check for the module range here: its PML4 + * is always initialized because it's shared with the main + * kernel text. Only vmalloc may need PML4 syncups. */ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && - address >= PAGE_OFFSET) { + ((address >= VMALLOC_START && address < VMALLOC_END))) { if (vmalloc_fault(address) < 0) goto bad_area_nosemaphore; return; } + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock.