xen/pvshim: memory hotplug
authorRoger Pau Monne <roger.pau@citrix.com>
Thu, 11 Jan 2018 11:41:20 +0000 (11:41 +0000)
committerWei Liu <wei.liu2@citrix.com>
Tue, 16 Jan 2018 18:34:05 +0000 (18:34 +0000)
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
xen/arch/x86/pv/shim.c
xen/common/memory.c
xen/include/asm-x86/pv/shim.h

index 68ec7bed8ed866da6567da6d3f434006f3b62fbe..4120cc550ed73af7f4249c8474cec7bcb50cf48d 100644 (file)
@@ -48,6 +48,9 @@ static unsigned int nr_grant_list;
 static unsigned long *grant_frames;
 static DEFINE_SPINLOCK(grant_lock);
 
+static PAGE_LIST_HEAD(balloon);
+static DEFINE_SPINLOCK(balloon_lock);
+
 static long pv_shim_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg);
 static long pv_shim_grant_table_op(unsigned int cmd,
                                    XEN_GUEST_HANDLE_PARAM(void) uop,
@@ -814,6 +817,115 @@ long pv_shim_cpu_down(void *data)
     return 0;
 }
 
+static unsigned long batch_memory_op(unsigned int cmd, unsigned int order,
+                                     const struct page_list_head *list)
+{
+    struct xen_memory_reservation xmr = {
+        .domid = DOMID_SELF,
+        .extent_order = order,
+    };
+    unsigned long pfns[64];
+    const struct page_info *pg;
+    unsigned long done = 0;
+
+    set_xen_guest_handle(xmr.extent_start, pfns);
+    page_list_for_each ( pg, list )
+    {
+        pfns[xmr.nr_extents++] = page_to_mfn(pg);
+        if ( xmr.nr_extents == ARRAY_SIZE(pfns) || !page_list_next(pg, list) )
+        {
+            long nr = xen_hypercall_memory_op(cmd, &xmr);
+
+            done += nr > 0 ? nr : 0;
+            if ( nr != xmr.nr_extents )
+                break;
+            xmr.nr_extents = 0;
+        }
+    }
+
+    return done;
+}
+
+void pv_shim_online_memory(unsigned int nr, unsigned int order)
+{
+    struct page_info *page, *tmp;
+    PAGE_LIST_HEAD(list);
+
+    spin_lock(&balloon_lock);
+    page_list_for_each_safe ( page, tmp, &balloon )
+    {
+        /* TODO: add support for splitting high order memory chunks. */
+        if ( page->v.free.order != order )
+            continue;
+
+        page_list_del(page, &balloon);
+        page_list_add_tail(page, &list);
+        if ( !--nr )
+            break;
+    }
+    spin_unlock(&balloon_lock);
+
+    if ( nr )
+        gprintk(XENLOG_WARNING,
+                "failed to allocate %u extents of order %u for onlining\n",
+                nr, order);
+
+    nr = batch_memory_op(XENMEM_populate_physmap, order, &list);
+    while ( nr-- )
+    {
+        BUG_ON((page = page_list_remove_head(&list)) == NULL);
+        free_domheap_pages(page, order);
+    }
+
+    if ( !page_list_empty(&list) )
+    {
+        gprintk(XENLOG_WARNING,
+                "failed to online some of the memory regions\n");
+        spin_lock(&balloon_lock);
+        page_list_splice(&list, &balloon);
+        spin_unlock(&balloon_lock);
+    }
+}
+
+void pv_shim_offline_memory(unsigned int nr, unsigned int order)
+{
+    struct page_info *page;
+    PAGE_LIST_HEAD(list);
+
+    while ( nr-- )
+    {
+        page = alloc_domheap_pages(NULL, order, 0);
+        if ( !page )
+            break;
+
+        page_list_add_tail(page, &list);
+        page->v.free.order = order;
+    }
+
+    if ( nr + 1 )
+        gprintk(XENLOG_WARNING,
+                "failed to reserve %u extents of order %u for offlining\n",
+                nr + 1, order);
+
+
+    nr = batch_memory_op(XENMEM_decrease_reservation, order, &list);
+    spin_lock(&balloon_lock);
+    while ( nr-- )
+    {
+        BUG_ON((page = page_list_remove_head(&list)) == NULL);
+        page_list_add_tail(page, &balloon);
+    }
+    spin_unlock(&balloon_lock);
+
+    if ( !page_list_empty(&list) )
+    {
+        gprintk(XENLOG_WARNING,
+                "failed to offline some of the memory regions\n");
+        while ( (page = page_list_remove_head(&list)) != NULL )
+            free_domheap_pages(page, order);
+    }
+}
+
 domid_t get_initial_domain_id(void)
 {
     uint32_t eax, ebx, ecx, edx;
index 5a1508a292be1c6e7ae90c12db59225cc8a9751c..71e19aa629fd430097621cdbb47aa5e7c8e9bb73 100644 (file)
 #include <public/memory.h>
 #include <xsm/xsm.h>
 
+#ifdef CONFIG_X86
+#include <asm/guest.h>
+#endif
+
 struct memop_args {
     /* INPUT */
     struct domain *domain;     /* Domain to be affected. */
@@ -993,6 +997,12 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
             return start_extent;
         }
 
+#ifdef CONFIG_X86
+        if ( pv_shim && op != XENMEM_decrease_reservation && !args.preempted )
+            /* Avoid calling pv_shim_online_memory when preempted. */
+            pv_shim_online_memory(args.nr_extents, args.extent_order);
+#endif
+
         switch ( op )
         {
         case XENMEM_increase_reservation:
@@ -1015,6 +1025,17 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
                 __HYPERVISOR_memory_op, "lh",
                 op | (rc << MEMOP_EXTENT_SHIFT), arg);
 
+#ifdef CONFIG_X86
+        if ( pv_shim && op == XENMEM_decrease_reservation )
+            /*
+             * Only call pv_shim_offline_memory when the hypercall has
+             * finished. Note that nr_done is used to cope in case the
+             * hypercall has failed and only part of the extents where
+             * processed.
+             */
+            pv_shim_offline_memory(args.nr_extents, args.nr_done);
+#endif
+
         break;
 
     case XENMEM_exchange:
index eb59ddd38a1464e265f08680280952072e8ae96e..fb739772df158ab070381ed26ffb01cbe7dfb445 100644 (file)
@@ -39,6 +39,8 @@ int pv_shim_shutdown(uint8_t reason);
 void pv_shim_inject_evtchn(unsigned int port);
 long pv_shim_cpu_up(void *data);
 long pv_shim_cpu_down(void *data);
+void pv_shim_online_memory(unsigned int nr, unsigned int order);
+void pv_shim_offline_memory(unsigned int nr, unsigned int order);
 domid_t get_initial_domain_id(void);
 uint64_t pv_shim_mem(uint64_t avail);
 
@@ -72,6 +74,14 @@ static inline long pv_shim_cpu_down(void *data)
     ASSERT_UNREACHABLE();
     return 0;
 }
+static inline void pv_shim_online_memory(unsigned int nr, unsigned int order)
+{
+    ASSERT_UNREACHABLE();
+}
+static inline void pv_shim_offline_memory(unsigned int nr, unsigned int order)
+{
+    ASSERT_UNREACHABLE();
+}
 static inline domid_t get_initial_domain_id(void)
 {
     return 0;