libxc: allocate memory with vNUMA information for PV guest
authorWei Liu <wei.liu2@citrix.com>
Mon, 16 Mar 2015 09:52:23 +0000 (09:52 +0000)
committerIan Campbell <ian.campbell@citrix.com>
Wed, 18 Mar 2015 12:04:54 +0000 (12:04 +0000)
From libxc's point of view, it only needs to know vnode to pnode mapping
and size of each vnode to allocate memory accordingly. Add these fields
to xc_dom structure.

The caller might not pass in vNUMA information. In that case, a dummy
layout is generated for the convenience of libxc's allocation code. The
upper layer (libxl etc) still sees the domain has no vNUMA
configuration.

Note that for this patch on PV x86 guest can have multiple regions of
ram allocated.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Dario Faggioli <dario.faggioli@citrix.com>
Cc: Elena Ufimtseva <ufimtseva@gmail.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
tools/libxc/include/xc_dom.h
tools/libxc/include/xenguest.h
tools/libxc/xc_dom_x86.c

index 6b8ddf41a11d3e530ebf0a5689e8e0c31e4d6dbd..a7d059a3ee5565086c04b0336f9297eda1e1e202 100644 (file)
@@ -119,8 +119,10 @@ struct xc_dom_image {
 
     /* physical memory
      *
-     * An x86 PV guest has a single contiguous block of physical RAM,
-     * consisting of total_pages starting at rambase_pfn.
+     * An x86 PV guest has one or more blocks of physical RAM,
+     * consisting of total_pages starting at rambase_pfn. The start
+     * address and size of each block is controlled by vNUMA
+     * structures.
      *
      * An ARM guest has GUEST_RAM_BANKS regions of RAM, with
      * rambank_size[i] pages in each. The lowest RAM address
@@ -168,6 +170,12 @@ struct xc_dom_image {
     struct xc_dom_loader *kernel_loader;
     void *private_loader;
 
+    /* vNUMA information */
+    xen_vmemrange_t *vmemranges;
+    unsigned int nr_vmemranges;
+    unsigned int *vnode_to_pnode;
+    unsigned int nr_vnodes;
+
     /* kernel loader */
     struct xc_dom_arch *arch_hooks;
     /* allocate up to virt_alloc_end */
index 40bbac86a86925241cf181f579201516f78e2a85..b7a924ff33ddbf695fecb4c148b828fac198b80e 100644 (file)
@@ -23,6 +23,8 @@
 #ifndef XENGUEST_H
 #define XENGUEST_H
 
+#define XC_NUMA_NO_NODE   (~0U)
+
 #define XCFLAGS_LIVE      (1 << 0)
 #define XCFLAGS_DEBUG     (1 << 1)
 #define XCFLAGS_HVM       (1 << 2)
index bea54f2bd2dfc33d68f35c71a933af0822c9cfd7..af0c9f46e924ce1baada569d3f3fa659128a0e2d 100644 (file)
@@ -760,7 +760,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid)
 int arch_setup_meminit(struct xc_dom_image *dom)
 {
     int rc;
-    xen_pfn_t pfn, allocsz, i, j, mfn;
+    xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
+    int i, j;
 
     rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
     if ( rc )
@@ -811,26 +812,98 @@ int arch_setup_meminit(struct xc_dom_image *dom)
             if ( rc )
                 return rc;
         }
-        /* setup initial p2m */
-        dom->p2m_size = dom->total_pages;
+
+        /* Setup dummy vNUMA information if it's not provided. Note
+         * that this is a valid state if libxl doesn't provide any
+         * vNUMA information.
+         *
+         * The dummy values make libxc allocate all pages from
+         * arbitrary physical nodes. This is the expected behaviour if
+         * no vNUMA configuration is provided to libxc.
+         *
+         * Note that the following hunk is just for the convenience of
+         * allocation code. No defaulting happens in libxc.
+         */
+        if ( dom->nr_vmemranges == 0 )
+        {
+            dom->nr_vmemranges = 1;
+            dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges));
+            dom->vmemranges[0].start = 0;
+            dom->vmemranges[0].end   = dom->total_pages << PAGE_SHIFT;
+            dom->vmemranges[0].flags = 0;
+            dom->vmemranges[0].nid   = 0;
+
+            dom->nr_vnodes = 1;
+            dom->vnode_to_pnode = xc_dom_malloc(dom,
+                                      sizeof(*dom->vnode_to_pnode));
+            dom->vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+        }
+
+        total = dom->p2m_size = 0;
+        for ( i = 0; i < dom->nr_vmemranges; i++ )
+        {
+            total += ((dom->vmemranges[i].end - dom->vmemranges[i].start)
+                      >> PAGE_SHIFT);
+            dom->p2m_size =
+                dom->p2m_size > (dom->vmemranges[i].end >> PAGE_SHIFT) ?
+                dom->p2m_size : (dom->vmemranges[i].end >> PAGE_SHIFT);
+        }
+        if ( total != dom->total_pages )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                         "%s: vNUMA page count mismatch (0x%"PRIpfn" != 0x%"PRIpfn")\n",
+                         __func__, total, dom->total_pages);
+            return -EINVAL;
+        }
+
         dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) *
                                       dom->p2m_size);
         if ( dom->p2m_host == NULL )
             return -EINVAL;
-        for ( pfn = 0; pfn < dom->total_pages; pfn++ )
-            dom->p2m_host[pfn] = pfn;
+        for ( pfn = 0; pfn < dom->p2m_size; pfn++ )
+            dom->p2m_host[pfn] = INVALID_P2M_ENTRY;
 
         /* allocate guest memory */
-        for ( i = rc = allocsz = 0;
-              (i < dom->total_pages) && !rc;
-              i += allocsz )
+        for ( i = 0; i < dom->nr_vmemranges; i++ )
         {
-            allocsz = dom->total_pages - i;
-            if ( allocsz > 1024*1024 )
-                allocsz = 1024*1024;
-            rc = xc_domain_populate_physmap_exact(
-                dom->xch, dom->guest_domid, allocsz,
-                0, 0, &dom->p2m_host[i]);
+            unsigned int memflags;
+            uint64_t pages;
+            unsigned int pnode = dom->vnode_to_pnode[dom->vmemranges[i].nid];
+
+            memflags = 0;
+            if ( pnode != XC_NUMA_NO_NODE )
+                memflags |= XENMEMF_exact_node(pnode);
+
+            pages = (dom->vmemranges[i].end - dom->vmemranges[i].start)
+                >> PAGE_SHIFT;
+            pfn_base = dom->vmemranges[i].start >> PAGE_SHIFT;
+
+            for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
+                dom->p2m_host[pfn] = pfn;
+
+            for ( j = 0; j < pages; j += allocsz )
+            {
+                allocsz = pages - j;
+                if ( allocsz > 1024*1024 )
+                    allocsz = 1024*1024;
+
+                rc = xc_domain_populate_physmap_exact(dom->xch,
+                         dom->guest_domid, allocsz, 0, memflags,
+                         &dom->p2m_host[pfn_base+j]);
+
+                if ( rc )
+                {
+                    if ( pnode != XC_NUMA_NO_NODE )
+                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                     "%s: failed to allocate 0x%"PRIx64" pages (v=%d, p=%d)\n",
+                                     __func__, pages, i, pnode);
+                    else
+                        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                     "%s: failed to allocate 0x%"PRIx64" pages\n",
+                                     __func__, pages);
+                    return rc;
+                }
+            }
         }
 
         /* Ensure no unclaimed pages are left unused.