x86/mm: fix race conditions in map_pages_to_xen()
authorMin He <min.he@intel.com>
Tue, 14 Nov 2017 16:10:56 +0000 (17:10 +0100)
committerJan Beulich <jbeulich@suse.com>
Tue, 14 Nov 2017 16:10:56 +0000 (17:10 +0100)
In map_pages_to_xen(), a L2 page table entry may be reset to point to
a superpage, and its corresponding L1 page table need be freed in such
scenario, when these L1 page table entries are mapping to consecutive
page frames and having the same mapping flags.

However, variable `pl1e` is not protected by the lock before L1 page table
is enumerated. A race condition may happen if this code path is invoked
simultaneously on different CPUs.

For example, `pl1e` value on CPU0 may hold an obsolete value, pointing
to a page which has just been freed on CPU1. Besides, before this page
is reused, it will still be holding the old PTEs, referencing consecutive
page frames. Consequently the `free_xen_pagetable(l2e_to_l1e(ol2e))` will
be triggered on CPU0, resulting the unexpected free of a normal page.

This patch fixes the above problem by protecting the `pl1e` with the lock.

Also, there're other potential race conditions. For instance, the L2/L3
entry may be modified concurrently on different CPUs, by routines such as
map_pages_to_xen(), modify_xen_mappings() etc. To fix this, this patch will
check the _PAGE_PRESENT and _PAGE_PSE flags, after the spinlock is obtained,
for the corresponding L2/L3 entry.

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Yi Zhang <yi.z.zhang@intel.com>
Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Release-acked-by: Julien Grall <julien.grall@linaro.org>
xen/arch/x86/mm.c

index a20fdcaea4fb4f81f260755da3ee25dae830628b..1697be9ef05f0af54cacfb3ad220c952b7cbd742 100644 (file)
@@ -4844,9 +4844,29 @@ int map_pages_to_xen(
             {
                 unsigned long base_mfn;
 
-                pl1e = l2e_to_l1e(*pl2e);
                 if ( locking )
                     spin_lock(&map_pgdir_lock);
+
+                ol2e = *pl2e;
+                /*
+                 * L2E may be already cleared, or set to a superpage, by
+                 * concurrent paging structure modifications on other CPUs.
+                 */
+                if ( !(l2e_get_flags(ol2e) & _PAGE_PRESENT) )
+                {
+                    if ( locking )
+                        spin_unlock(&map_pgdir_lock);
+                    continue;
+                }
+
+                if ( l2e_get_flags(ol2e) & _PAGE_PSE )
+                {
+                    if ( locking )
+                        spin_unlock(&map_pgdir_lock);
+                    goto check_l3;
+                }
+
+                pl1e = l2e_to_l1e(ol2e);
                 base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1);
                 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ )
                     if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) ||
@@ -4854,7 +4874,6 @@ int map_pages_to_xen(
                         break;
                 if ( i == L1_PAGETABLE_ENTRIES )
                 {
-                    ol2e = *pl2e;
                     l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
                                                         l1f_to_lNf(flags)));
                     if ( locking )
@@ -4880,7 +4899,20 @@ int map_pages_to_xen(
 
             if ( locking )
                 spin_lock(&map_pgdir_lock);
+
             ol3e = *pl3e;
+            /*
+             * L3E may be already cleared, or set to a superpage, by
+             * concurrent paging structure modifications on other CPUs.
+             */
+            if ( !(l3e_get_flags(ol3e) & _PAGE_PRESENT) ||
+                (l3e_get_flags(ol3e) & _PAGE_PSE) )
+            {
+                if ( locking )
+                    spin_unlock(&map_pgdir_lock);
+                continue;
+            }
+
             pl2e = l3e_to_l2e(ol3e);
             base_mfn = l2e_get_pfn(*pl2e) & ~(L2_PAGETABLE_ENTRIES *
                                               L1_PAGETABLE_ENTRIES - 1);