From ba59e2ce935de7bd6a0cbe36488ca99b3489cf17 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 16 Mar 2015 09:52:23 +0000 Subject: [PATCH] libxc: allocate memory with vNUMA information for PV guest From libxc's point of view, it only needs to know vnode to pnode mapping and size of each vnode to allocate memory accordingly. Add these fields to xc_dom structure. The caller might not pass in vNUMA information. In that case, a dummy layout is generated for the convenience of libxc's allocation code. The upper layer (libxl etc) still sees the domain has no vNUMA configuration. Note that for this patch on PV x86 guest can have multiple regions of ram allocated. Signed-off-by: Wei Liu Cc: Ian Campbell Cc: Ian Jackson Cc: Dario Faggioli Cc: Elena Ufimtseva Acked-by: Ian Campbell --- tools/libxc/include/xc_dom.h | 12 +++- tools/libxc/include/xenguest.h | 2 + tools/libxc/xc_dom_x86.c | 101 ++++++++++++++++++++++++++++----- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h index 6b8ddf41a1..a7d059a3ee 100644 --- a/tools/libxc/include/xc_dom.h +++ b/tools/libxc/include/xc_dom.h @@ -119,8 +119,10 @@ struct xc_dom_image { /* physical memory * - * An x86 PV guest has a single contiguous block of physical RAM, - * consisting of total_pages starting at rambase_pfn. + * An x86 PV guest has one or more blocks of physical RAM, + * consisting of total_pages starting at rambase_pfn. The start + * address and size of each block is controlled by vNUMA + * structures. * * An ARM guest has GUEST_RAM_BANKS regions of RAM, with * rambank_size[i] pages in each. The lowest RAM address @@ -168,6 +170,12 @@ struct xc_dom_image { struct xc_dom_loader *kernel_loader; void *private_loader; + /* vNUMA information */ + xen_vmemrange_t *vmemranges; + unsigned int nr_vmemranges; + unsigned int *vnode_to_pnode; + unsigned int nr_vnodes; + /* kernel loader */ struct xc_dom_arch *arch_hooks; /* allocate up to virt_alloc_end */ diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h index 40bbac86a8..b7a924ff33 100644 --- a/tools/libxc/include/xenguest.h +++ b/tools/libxc/include/xenguest.h @@ -23,6 +23,8 @@ #ifndef XENGUEST_H #define XENGUEST_H +#define XC_NUMA_NO_NODE (~0U) + #define XCFLAGS_LIVE (1 << 0) #define XCFLAGS_DEBUG (1 << 1) #define XCFLAGS_HVM (1 << 2) diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c index bea54f2bd2..af0c9f46e9 100644 --- a/tools/libxc/xc_dom_x86.c +++ b/tools/libxc/xc_dom_x86.c @@ -760,7 +760,8 @@ static int x86_shadow(xc_interface *xch, domid_t domid) int arch_setup_meminit(struct xc_dom_image *dom) { int rc; - xen_pfn_t pfn, allocsz, i, j, mfn; + xen_pfn_t pfn, allocsz, mfn, total, pfn_base; + int i, j; rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type); if ( rc ) @@ -811,26 +812,98 @@ int arch_setup_meminit(struct xc_dom_image *dom) if ( rc ) return rc; } - /* setup initial p2m */ - dom->p2m_size = dom->total_pages; + + /* Setup dummy vNUMA information if it's not provided. Note + * that this is a valid state if libxl doesn't provide any + * vNUMA information. + * + * The dummy values make libxc allocate all pages from + * arbitrary physical nodes. This is the expected behaviour if + * no vNUMA configuration is provided to libxc. + * + * Note that the following hunk is just for the convenience of + * allocation code. No defaulting happens in libxc. + */ + if ( dom->nr_vmemranges == 0 ) + { + dom->nr_vmemranges = 1; + dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges)); + dom->vmemranges[0].start = 0; + dom->vmemranges[0].end = dom->total_pages << PAGE_SHIFT; + dom->vmemranges[0].flags = 0; + dom->vmemranges[0].nid = 0; + + dom->nr_vnodes = 1; + dom->vnode_to_pnode = xc_dom_malloc(dom, + sizeof(*dom->vnode_to_pnode)); + dom->vnode_to_pnode[0] = XC_NUMA_NO_NODE; + } + + total = dom->p2m_size = 0; + for ( i = 0; i < dom->nr_vmemranges; i++ ) + { + total += ((dom->vmemranges[i].end - dom->vmemranges[i].start) + >> PAGE_SHIFT); + dom->p2m_size = + dom->p2m_size > (dom->vmemranges[i].end >> PAGE_SHIFT) ? + dom->p2m_size : (dom->vmemranges[i].end >> PAGE_SHIFT); + } + if ( total != dom->total_pages ) + { + xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, + "%s: vNUMA page count mismatch (0x%"PRIpfn" != 0x%"PRIpfn")\n", + __func__, total, dom->total_pages); + return -EINVAL; + } + dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->p2m_size); if ( dom->p2m_host == NULL ) return -EINVAL; - for ( pfn = 0; pfn < dom->total_pages; pfn++ ) - dom->p2m_host[pfn] = pfn; + for ( pfn = 0; pfn < dom->p2m_size; pfn++ ) + dom->p2m_host[pfn] = INVALID_P2M_ENTRY; /* allocate guest memory */ - for ( i = rc = allocsz = 0; - (i < dom->total_pages) && !rc; - i += allocsz ) + for ( i = 0; i < dom->nr_vmemranges; i++ ) { - allocsz = dom->total_pages - i; - if ( allocsz > 1024*1024 ) - allocsz = 1024*1024; - rc = xc_domain_populate_physmap_exact( - dom->xch, dom->guest_domid, allocsz, - 0, 0, &dom->p2m_host[i]); + unsigned int memflags; + uint64_t pages; + unsigned int pnode = dom->vnode_to_pnode[dom->vmemranges[i].nid]; + + memflags = 0; + if ( pnode != XC_NUMA_NO_NODE ) + memflags |= XENMEMF_exact_node(pnode); + + pages = (dom->vmemranges[i].end - dom->vmemranges[i].start) + >> PAGE_SHIFT; + pfn_base = dom->vmemranges[i].start >> PAGE_SHIFT; + + for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ ) + dom->p2m_host[pfn] = pfn; + + for ( j = 0; j < pages; j += allocsz ) + { + allocsz = pages - j; + if ( allocsz > 1024*1024 ) + allocsz = 1024*1024; + + rc = xc_domain_populate_physmap_exact(dom->xch, + dom->guest_domid, allocsz, 0, memflags, + &dom->p2m_host[pfn_base+j]); + + if ( rc ) + { + if ( pnode != XC_NUMA_NO_NODE ) + xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, + "%s: failed to allocate 0x%"PRIx64" pages (v=%d, p=%d)\n", + __func__, pages, i, pnode); + else + xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, + "%s: failed to allocate 0x%"PRIx64" pages\n", + __func__, pages); + return rc; + } + } } /* Ensure no unclaimed pages are left unused. -- 2.30.2