Walking the page lists needs the page_alloc lock
authorKeir Fraser <keir.fraser@citrix.com>
Wed, 28 Jul 2010 06:54:12 +0000 (07:54 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Wed, 28 Jul 2010 06:54:12 +0000 (07:54 +0100)
There are a few places in Xen where we walk a domain's page lists
without holding the page_alloc lock.  They race with updates to the
page lists, which are normally rare but can be quite common under PoD
when the domain is close to its memory limit and the PoD reclaimer is
busy.  This patch protects those places by taking the page_alloc lock.

I think this is OK for the two debug-key printouts - they don't run
from irq context and look deadlock-free.  The tboot change seems safe
too unless tboot shutdown functions are called from irq context or
with the page_alloc lock held.  The p2m one is the scariest but there
are already code paths in PoD that take the page_alloc lock with the
p2m lock held so it's no worse than existing code.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
xen/arch/x86/domain.c
xen/arch/x86/mm/p2m.c
xen/arch/x86/numa.c
xen/arch/x86/tboot.c

index d6b453b7d4308c9749d919804473add9afda6925..233ce1331cb4b5eff87c0bff1f8ed07047d277b1 100644 (file)
@@ -139,12 +139,14 @@ void dump_pageframe_info(struct domain *d)
     }
     else
     {
+        spin_lock(&d->page_alloc_lock);
         page_list_for_each ( page, &d->page_list )
         {
             printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                    _p(page_to_mfn(page)),
                    page->count_info, page->u.inuse.type_info);
         }
+        spin_unlock(&d->page_alloc_lock);
     }
 
     if ( is_hvm_domain(d) )
@@ -152,12 +154,14 @@ void dump_pageframe_info(struct domain *d)
         p2m_pod_dump_data(d);
     }
 
+    spin_lock(&d->page_alloc_lock);
     page_list_for_each ( page, &d->xenpage_list )
     {
         printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
                _p(page_to_mfn(page)),
                page->count_info, page->u.inuse.type_info);
     }
+    spin_unlock(&d->page_alloc_lock);
 }
 
 struct domain *alloc_domain_struct(void)
index 36728c6192c4b40e799c8e0d8ec8e3799d128e99..1bcd71616a630c7ed0432b9adb20aa02fa4142b3 100644 (file)
@@ -1833,6 +1833,7 @@ int p2m_alloc_table(struct domain *d,
         goto error;
 
     /* Copy all existing mappings from the page list and m2p */
+    spin_lock(&d->page_alloc_lock);
     page_list_for_each(page, &d->page_list)
     {
         mfn = page_to_mfn(page);
@@ -1848,13 +1849,16 @@ int p2m_alloc_table(struct domain *d,
 #endif
              && gfn != INVALID_M2P_ENTRY
             && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
-            goto error;
+            goto error_unlock;
     }
+    spin_unlock(&d->page_alloc_lock);
 
     P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
     p2m_unlock(p2m);
     return 0;
 
+error_unlock:
+    spin_unlock(&d->page_alloc_lock);
  error:
     P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
                PRI_mfn "\n", gfn, mfn_x(mfn));
index 466ff76db6c5b9d434ecb403d76023240d30d97a..92c2f5b631feef2bbbce375ee5ebe870c9093c73 100644 (file)
@@ -385,11 +385,13 @@ static void dump_numa(unsigned char key)
                for_each_online_node(i)
                        page_num_node[i] = 0;
 
+               spin_lock(&d->page_alloc_lock);
                page_list_for_each(page, &d->page_list)
                {
                        i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT);
                        page_num_node[i]++;
                }
+               spin_unlock(&d->page_alloc_lock);
 
                for_each_online_node(i)
                        printk("    Node %u: %u\n", i, page_num_node[i]);
index aac0f82947e346ed418e255fda2dfb46f4f895fb..91f97ce43810defa3607e174fa202877e0b9129e 100644 (file)
@@ -211,12 +211,14 @@ static void tboot_gen_domain_integrity(const uint8_t key[TB_KEY_SIZE],
             continue;
         printk("MACing Domain %u\n", d->domain_id);
 
+        spin_lock(&d->page_alloc_lock);
         page_list_for_each(page, &d->page_list)
         {
             void *pg = __map_domain_page(page);
             vmac_update(pg, PAGE_SIZE, &ctx);
             unmap_domain_page(pg);
         }
+        spin_unlock(&d->page_alloc_lock);
 
         if ( !is_idle_domain(d) )
         {