x86/mem_sharing: resolve mm-lock order violations when forking VMs with nested p2m

author Tamas K Lengyel <tamas.lengyel@intel.com>

Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)

committer Jan Beulich <jbeulich@suse.com>

Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)
author Tamas K Lengyel <tamas.lengyel@intel.com>
Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)
committer Jan Beulich <jbeulich@suse.com>
Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c

index ad9d495110ff314aa8c3c01f3859e5e90d256a39..a98a1709c2d1f4ee18605c9a3836a0a81b013cb5 100644 (file)
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -39,6 +39,7 @@
  #include <asm/event.h>
  #include <asm/hap.h>
  #include <asm/hvm/hvm.h>
+#include <asm/hvm/nestedhvm.h>
  #include <xsm/xsm.h>
  
  #include <public/hvm/params.h>
@@ -893,13 +894,11 @@ static int nominate_page(struct domain *d, gfn_t gfn,
          goto out;
  
      /*
-     * Now that the page is validated, we can lock it. There is no
-     * race because we're holding the p2m entry, so no one else
-     * could be nominating this gfn.
+     * Now that the page is validated, we can make it shared. There is no race
+     * because we're holding the p2m entry, so no one else could be nominating
+     * this gfn & and it is evidently not yet shared with any other VM, thus we
+     * don't need to take the mem_sharing_page_lock here.
       */
-    ret = -ENOENT;
-    if ( !mem_sharing_page_lock(page) )
-        goto out;
  
      /* Initialize the shared state */
      ret = -ENOMEM;
@@ -935,7 +934,6 @@ static int nominate_page(struct domain *d, gfn_t gfn,
  
      *phandle = page->sharing->handle;
      audit_add_list(page);
-    mem_sharing_page_unlock(page);
      ret = 0;
  
  out:
@@ -1214,7 +1212,8 @@ int __mem_sharing_unshare_page(struct domain *d,
      p2m_type_t p2mt;
      mfn_t mfn;
      struct page_info *page, *old_page;
-    int last_gfn;
+    bool last_gfn;
+    int rc = 0;
      gfn_info_t *gfn_info = NULL;
  
      mfn = get_gfn(d, gfn, &p2mt);
@@ -1226,6 +1225,15 @@ int __mem_sharing_unshare_page(struct domain *d,
          return 0;
      }
  
+    /* lock nested p2ms to avoid lock-order violation with sharing lock */
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_lock(d->arch.nested_p2m[i]);
+    }
+
      page = __grab_shared_page(mfn);
      if ( page == NULL )
      {
@@ -1276,9 +1284,7 @@ int __mem_sharing_unshare_page(struct domain *d,
              put_page_alloc_ref(page);
  
          put_page_and_type(page);
-        put_gfn(d, gfn);
-
-        return 0;
+        goto out;
      }
  
      if ( last_gfn )
@@ -1295,12 +1301,12 @@ int __mem_sharing_unshare_page(struct domain *d,
          /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
          atomic_inc(&nr_saved_mfns);
          mem_sharing_page_unlock(old_page);
-        put_gfn(d, gfn);
          /*
           * Caller is responsible for placing an event
           * in the ring.
           */
-        return -ENOMEM;
+        rc = -ENOMEM;
+        goto out;
      }
  
      copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
@@ -1327,8 +1333,18 @@ int __mem_sharing_unshare_page(struct domain *d,
       */
      paging_mark_dirty(d, page_to_mfn(page));
      /* We do not need to unlock a private page */
+
+ out:
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_unlock(d->arch.nested_p2m[i]);
+    }
+
      put_gfn(d, gfn);
-    return 0;
+    return rc;
  }
  
  int relinquish_shared_pages(struct domain *d)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c

index ad4bb94514990c76ca28f5d08f8da668207d5721..a32301c343341bdec78d301e3ea05d0767e7bb8c 100644 (file)
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1597,9 +1597,17 @@ p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
  void
  p2m_flush_nestedp2m(struct domain *d)
  {
-    int i;
+    unsigned int i;
+
      for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_flush_table(d->arch.nested_p2m[i]);
+    {
+        struct p2m_domain *p2m = d->arch.nested_p2m[i];
+
+        if ( p2m_locked_by_me(p2m) )
+            p2m_flush_table_locked(p2m);
+        else
+            p2m_flush_table(p2m);
+    }
  }
  
  void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
author	Tamas K Lengyel <tamas.lengyel@intel.com>
	Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)
committer	Jan Beulich <jbeulich@suse.com>
	Fri, 8 Jan 2021 10:51:36 +0000 (11:51 +0100)
xen/arch/x86/mm/mem_sharing.c		patch \| blob \| history
xen/arch/x86/mm/p2m.c		patch \| blob \| history