[IA64] vti domain save/restore: libxc: implement vti domain save/restore
authorAlex Williamson <alex.williamson@hp.com>
Sun, 21 Oct 2007 20:57:13 +0000 (14:57 -0600)
committerAlex Williamson <alex.williamson@hp.com>
Sun, 21 Oct 2007 20:57:13 +0000 (14:57 -0600)
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
tools/libxc/ia64/xc_ia64_linux_restore.c
tools/libxc/ia64/xc_ia64_linux_save.c
tools/libxc/ia64/xc_ia64_save_restore.h

index 258cfdee8058f273360d9efe67bb7f3b77096d85..5336b5bd513a802fe7993c4e5a5d3ca2aa8f8c86 100644 (file)
@@ -8,6 +8,7 @@
  *
  * Copyright (c) 2007 Isaku Yamahata <yamahata@valinux.co.jp>
  *   Use foreign p2m exposure.
+ *   VTi domain support
  */
 
 #include <stdlib.h>
@@ -17,6 +18,7 @@
 #include "xc_ia64_save_restore.h"
 #include "xc_ia64.h"
 #include "xc_efi.h"
+#include "xen/hvm/params.h"
 
 #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
 
@@ -75,6 +77,354 @@ read_page(int xc_handle, int io_fd, uint32_t dom, unsigned long pfn)
     return 0;
 }
 
+/*
+ * Get the list of PFNs that are not in the psuedo-phys map.
+ * Although we allocate pages on demand, balloon driver may 
+ * decreased simaltenously. So we have to free the freed
+ * pages here.
+ */
+static int
+xc_ia64_recv_unallocated_list(int xc_handle, int io_fd, uint32_t dom,
+                              struct xen_ia64_p2m_table *p2m_table)
+{
+    int rc = -1;
+    unsigned int i;
+    unsigned int count;
+    unsigned long *pfntab = NULL;
+    unsigned int nr_frees;
+
+    if (!read_exact(io_fd, &count, sizeof(count))) {
+        ERROR("Error when reading pfn count");
+        goto out;
+    }
+
+    pfntab = malloc(sizeof(unsigned long) * count);
+    if (pfntab == NULL) {
+        ERROR("Out of memory");
+        goto out;
+    }
+
+    if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
+        ERROR("Error when reading pfntab");
+        goto out;
+    }
+
+    nr_frees = 0;
+    for (i = 0; i < count; i++) {
+        if (xc_ia64_p2m_allocated(p2m_table, pfntab[i])) {
+            pfntab[nr_frees] = pfntab[i];
+            nr_frees++;
+        }
+    }
+    if (nr_frees > 0) {
+        if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees,
+                                                  0, pfntab) < 0) {
+            PERROR("Could not decrease reservation");
+            goto out;
+        } else
+            DPRINTF("Decreased reservation by %d / %d pages\n",
+                    nr_frees, count);
+    }
+
+    rc = 0;
+    
+ out:
+    if (pfntab != NULL)
+        free(pfntab);
+    return rc;
+}
+
+static int
+xc_ia64_recv_vcpu_context(int xc_handle, int io_fd, uint32_t dom,
+                          uint32_t vcpu, vcpu_guest_context_t *ctxt)
+{
+    if (!read_exact(io_fd, ctxt, sizeof(*ctxt))) {
+        ERROR("Error when reading ctxt");
+        return -1;
+    }
+
+    fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]);
+
+    /* Initialize and set registers.  */
+    ctxt->flags = VGCF_EXTRA_REGS;
+    if (xc_vcpu_setcontext(xc_handle, dom, vcpu, ctxt) != 0) {
+        ERROR("Couldn't set vcpu context");
+        return -1;
+    }
+
+    /* Just a check.  */
+    ctxt->flags = 0;
+    if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) {
+        ERROR("Could not get vcpu context");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Read shared info.  */
+static int
+xc_ia64_recv_shared_info(int xc_handle, int io_fd, uint32_t dom,
+                         unsigned long shared_info_frame,
+                         unsigned long *start_info_pfn)
+{
+    unsigned int i;
+
+    /* The new domain's shared-info frame. */
+    shared_info_t *shared_info;
+    
+    /* Read shared info.  */
+    shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE,
+                                       shared_info_frame);
+    if (shared_info == NULL) {
+        ERROR("cannot map page");
+        return -1;
+    }
+
+    if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
+        ERROR("Error when reading shared_info page");
+        munmap(shared_info, PAGE_SIZE);
+        return -1;
+    }
+
+    /* clear any pending events and the selector */
+    memset(&(shared_info->evtchn_pending[0]), 0,
+           sizeof (shared_info->evtchn_pending));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+
+    if (start_info_pfn != NULL)
+        *start_info_pfn = shared_info->arch.start_info_pfn;
+
+    munmap (shared_info, PAGE_SIZE);
+
+    return 0;
+}
+
+static int
+xc_ia64_pv_recv_context(int xc_handle, int io_fd, uint32_t dom,
+                        unsigned long shared_info_frame,
+                        struct xen_ia64_p2m_table *p2m_table,
+                        unsigned int store_evtchn, unsigned long *store_mfn,
+                        unsigned int console_evtchn,
+                        unsigned long *console_mfn)
+{
+    int rc = -1;
+    unsigned long gmfn;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    /* A temporary mapping of the guest's start_info page. */
+    start_info_t *start_info;
+
+    if (lock_pages(&ctxt, sizeof(ctxt))) {
+        /* needed for build domctl, but might as well do early */
+        ERROR("Unable to lock_pages ctxt");
+        return -1;
+    }
+
+    if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt))
+        goto out;
+
+    /* Then get privreg page.  */
+    if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) {
+        ERROR("Could not read vcpu privregs");
+        goto out;
+    }
+
+    /* Read shared info.  */
+    if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom,
+                                 shared_info_frame, &gmfn))
+        goto out;
+
+    /* Uncanonicalise the suspend-record frame number and poke resume rec. */
+    if (populate_page_if_necessary(xc_handle, dom, gmfn, p2m_table)) {
+        ERROR("cannot populate page 0x%lx", gmfn);
+        goto out;
+    }
+    start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                      PROT_READ | PROT_WRITE, gmfn);
+    if (start_info == NULL) {
+        ERROR("cannot map start_info page");
+        goto out;
+    }
+    start_info->nr_pages = p2m_size;
+    start_info->shared_info = shared_info_frame << PAGE_SHIFT;
+    start_info->flags = 0;
+    *store_mfn = start_info->store_mfn;
+    start_info->store_evtchn = store_evtchn;
+    *console_mfn = start_info->console.domU.mfn;
+    start_info->console.domU.evtchn = console_evtchn;
+    munmap(start_info, PAGE_SIZE);
+
+    rc = 0;
+
+ out:
+    unlock_pages(&ctxt, sizeof(ctxt));
+    return rc;
+}
+
+static int
+xc_ia64_hvm_recv_context(int xc_handle, int io_fd, uint32_t dom,
+                         unsigned long shared_info_frame,
+                         struct xen_ia64_p2m_table *p2m_table,
+                         unsigned int store_evtchn, unsigned long *store_mfn,
+                         unsigned int console_evtchn,
+                         unsigned long *console_mfn)
+{
+    int rc = -1;
+    xc_dominfo_t info;
+    unsigned int i;
+    
+    /* cpu */
+    uint64_t max_virt_cpus;
+    unsigned long vcpumap_size;
+    uint64_t *vcpumap = NULL;
+
+    /* HVM: magic frames for ioreqs and xenstore comms */
+    const int hvm_params[] = {
+        HVM_PARAM_IOREQ_PFN,
+        HVM_PARAM_BUFIOREQ_PFN,
+        HVM_PARAM_STORE_PFN,
+    };
+    const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]);
+    /* ioreq_pfn, bufioreq_pfn, store_pfn */
+    uint64_t magic_pfns[NR_PARAMS];
+
+    /* HVM: a buffer for holding HVM contxt */
+    uint64_t rec_size = 0;
+    uint8_t *hvm_buf = NULL;
+
+    /* Read shared info.  */
+    if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, shared_info_frame,
+                                 NULL))
+        goto out;
+
+    /* vcpu map */
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("Could not get domain info");
+        goto out;
+    }
+    if (!read_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) {
+        ERROR("error reading max_virt_cpus");
+        goto out;
+    }
+    if (max_virt_cpus < info.max_vcpu_id) {
+        ERROR("too large max_virt_cpus %i < %i\n",
+              max_virt_cpus, info.max_vcpu_id);
+        goto out;
+    }
+    vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) /
+        sizeof(vcpumap[0]);
+    vcpumap = malloc(vcpumap_size);
+    if (vcpumap == NULL) {
+        ERROR("memory alloc for vcpumap");
+        goto out;
+    }
+    memset(vcpumap, 0, vcpumap_size);
+    if (!read_exact(io_fd, vcpumap, vcpumap_size)) {
+        ERROR("read vcpumap");
+        goto out;
+    }
+    
+    /* vcpu context */
+    for (i = 0; i <= info.max_vcpu_id; i++) {
+        /* A copy of the CPU context of the guest. */
+        vcpu_guest_context_t ctxt;
+
+        if (!__test_bit(i, vcpumap))
+            continue;
+
+        if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, i, &ctxt))
+            goto out;
+
+        // system context of vcpu is recieved as hvm context.
+    }    
+
+    /* Set HVM-specific parameters */
+    if (!read_exact(io_fd, magic_pfns, sizeof(magic_pfns))) {
+        ERROR("error reading magic page addresses");
+        goto out;
+    }
+
+    /* These comms pages need to be zeroed at the start of day */
+    for (i = 0; i < NR_PARAMS; i++) {
+        rc = xc_clear_domain_page(xc_handle, dom, magic_pfns[i]);
+        if (rc != 0) {
+            ERROR("error zeroing magic pages: %i", rc);
+            goto out;
+        }
+        rc = xc_set_hvm_param(xc_handle, dom, hvm_params[i], magic_pfns[i]);
+        if (rc != 0) {
+            ERROR("error setting HVM params: %i", rc);
+            goto out;
+        }
+    }
+    rc = xc_set_hvm_param(xc_handle, dom,
+                          HVM_PARAM_STORE_EVTCHN, store_evtchn);
+    if (rc != 0) {
+        ERROR("error setting HVM params: %i", rc);
+        goto out;
+    }
+    *store_mfn = magic_pfns[2];
+
+    /* Read HVM context */
+    if (!read_exact(io_fd, &rec_size, sizeof(rec_size))) {
+        ERROR("error read hvm context size!\n");
+        goto out;
+    }
+
+    hvm_buf = malloc(rec_size);
+    if (hvm_buf == NULL) {
+        ERROR("memory alloc for hvm context buffer failed");
+        errno = ENOMEM;
+        goto out;
+    }
+
+    if (!read_exact(io_fd, hvm_buf, rec_size)) {
+        ERROR("error loading the HVM context");
+        goto out;
+    }
+
+    rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_size);
+    if (rc != 0) {
+        ERROR("error setting the HVM context");
+        goto out;
+    }
+       
+    rc = 0;
+
+out:
+    if (vcpumap != NULL)
+        free(vcpumap);
+    if (hvm_buf != NULL)
+        free(hvm_buf);
+    return rc;
+}
+
+/*
+ * hvm domain requires IO pages allocated when XEN_DOMCTL_arch_setup
+ */
+static int
+xc_ia64_hvm_domain_setup(int xc_handle, uint32_t dom)
+{
+    int rc;
+    xen_pfn_t pfn_list[] = {
+        IO_PAGE_START >> PAGE_SHIFT,
+        BUFFER_IO_PAGE_START >> PAGE_SHIFT,
+        BUFFER_PIO_PAGE_START >> PAGE_SHIFT,
+    };
+    unsigned long nr_pages = sizeof(pfn_list) / sizeof(pfn_list[0]);
+
+    rc = xc_domain_memory_populate_physmap(xc_handle, dom, nr_pages,
+                                           0, 0, &pfn_list[0]);
+    if (rc != 0)
+        PERROR("Could not allocate IO page or buffer io page.\n");
+    return rc;
+}
+
 int
 xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                  unsigned int store_evtchn, unsigned long *store_mfn,
@@ -83,29 +433,14 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
 {
     DECLARE_DOMCTL;
     int rc = 1;
-    unsigned int i;
-    unsigned long gmfn;
     unsigned long ver;
 
     /* The new domain's shared-info frame number. */
     unsigned long shared_info_frame;
-    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
-    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
-
-    /* A copy of the CPU context of the guest. */
-    vcpu_guest_context_t ctxt;
-
-    /* A temporary mapping of the guest's start_info page. */
-    start_info_t *start_info;
 
     struct xen_ia64_p2m_table p2m_table;
     xc_ia64_p2m_init(&p2m_table);
 
-    if (hvm) {
-        ERROR("HVM Restore is unsupported");
-        goto out;
-    }
-
     /* For info only */
     nr_pfns = 0;
 
@@ -125,17 +460,14 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
         goto out;
     }
 
-    if (lock_pages(&ctxt, sizeof(ctxt))) {
-        /* needed for build domctl, but might as well do early */
-        ERROR("Unable to lock_pages ctxt");
-        return 1;
-    }
-
     if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) {
         ERROR("read: domain setup");
         goto out;
     }
 
+    if (hvm && xc_ia64_hvm_domain_setup(xc_handle, dom) != 0)
+        goto out;
+    
     /* Build firmware (will be overwritten).  */
     domctl.domain = (domid_t)dom;
     domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query;
@@ -212,6 +544,7 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
     DPRINTF("Reloading memory pages:   0%%\n");
 
     while (1) {
+        unsigned long gmfn;
         if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) {
             ERROR("Error when reading batch size");
             goto out;
@@ -229,127 +562,19 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
 
     DPRINTF("Received all pages\n");
 
-    /*
-     * Get the list of PFNs that are not in the psuedo-phys map.
-     * Although we allocate pages on demand, balloon driver may 
-     * decreased simaltenously. So we have to free the freed
-     * pages here.
-     */
-    {
-        unsigned int count;
-        unsigned long *pfntab;
-        unsigned int nr_frees;
-
-        if (!read_exact(io_fd, &count, sizeof(count))) {
-            ERROR("Error when reading pfn count");
-            goto out;
-        }
-
-        pfntab = malloc(sizeof(unsigned long) * count);
-        if (!pfntab) {
-            ERROR("Out of memory");
-            goto out;
-        }
-
-        if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
-            ERROR("Error when reading pfntab");
-            free(pfntab);
-            goto out;
-        }
-
-        nr_frees = 0;
-        for (i = 0; i < count; i++) {
-            if (xc_ia64_p2m_allocated(&p2m_table, pfntab[i])) {
-                pfntab[nr_frees] = pfntab[i];
-                nr_frees++;
-            }
-        }
-        if (nr_frees > 0) {
-            if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees,
-                                                      0, pfntab) < 0) {
-                ERROR("Could not decrease reservation : %d", rc);
-                free(pfntab);
-                goto out;
-            }
-            else
-                DPRINTF("Decreased reservation by %d / %d pages\n",
-                        nr_frees, count);
-        }
-        free(pfntab);
-    }
-
-    if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
-        ERROR("Error when reading ctxt");
-        goto out;
-    }
-
-    fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]);
-
-    /* Initialize and set registers.  */
-    ctxt.flags = VGCF_EXTRA_REGS;
-    domctl.cmd = XEN_DOMCTL_setvcpucontext;
-    domctl.domain = (domid_t)dom;
-    domctl.u.vcpucontext.vcpu   = 0;
-    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
-    if (xc_domctl(xc_handle, &domctl) != 0) {
-        ERROR("Couldn't set vcpu context");
-        goto out;
-    }
-
-    /* Just a check.  */
-    if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) {
-        ERROR("Could not get vcpu context");
+    if (xc_ia64_recv_unallocated_list(xc_handle, io_fd, dom, &p2m_table))
         goto out;
-    }
 
-    /* Then get privreg page.  */
-    if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) {
-        ERROR("Could not read vcpu privregs");
+    if (!hvm)
+        rc = xc_ia64_pv_recv_context(xc_handle, io_fd, dom, shared_info_frame,
+                                     &p2m_table, store_evtchn, store_mfn,
+                                     console_evtchn, console_mfn);
+    else
+        rc = xc_ia64_hvm_recv_context(xc_handle, io_fd, dom, shared_info_frame,
+                                      &p2m_table, store_evtchn, store_mfn,
+                                      console_evtchn, console_mfn);
+    if (rc)
         goto out;
-    }
-
-    /* Read shared info.  */
-    shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                       PROT_READ|PROT_WRITE, shared_info_frame);
-    if (shared_info == NULL) {
-            ERROR("cannot map page");
-            goto out;
-    }
-    if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
-            ERROR("Error when reading shared_info page");
-            munmap(shared_info, PAGE_SIZE);
-            goto out;
-    }
-
-    /* clear any pending events and the selector */
-    memset(&(shared_info->evtchn_pending[0]), 0,
-           sizeof (shared_info->evtchn_pending));
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
-        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
-
-    gmfn = shared_info->arch.start_info_pfn;
-
-    munmap (shared_info, PAGE_SIZE);
-
-    /* Uncanonicalise the suspend-record frame number and poke resume rec. */
-    if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) {
-        ERROR("cannot populate page 0x%lx", gmfn);
-        goto out;
-    }
-    start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                      PROT_READ | PROT_WRITE, gmfn);
-    if (start_info == NULL) {
-        ERROR("cannot map start_info page");
-        goto out;
-    }
-    start_info->nr_pages = p2m_size;
-    start_info->shared_info = shared_info_frame << PAGE_SHIFT;
-    start_info->flags = 0;
-    *store_mfn = start_info->store_mfn;
-    start_info->store_evtchn = store_evtchn;
-    *console_mfn = start_info->console.domU.mfn;
-    start_info->console.domU.evtchn = console_evtchn;
-    munmap(start_info, PAGE_SIZE);
 
     /*
      * Safety checking of saved context:
@@ -368,12 +593,10 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
     rc = 0;
 
  out:
-    if ((rc != 0) && (dom != 0))
-        xc_domain_destroy(xc_handle, dom);
-
     xc_ia64_p2m_unmap(&p2m_table);
 
-    unlock_pages(&ctxt, sizeof(ctxt));
+    if ((rc != 0) && (dom != 0))
+        xc_domain_destroy(xc_handle, dom);
 
     DPRINTF("Restore exit with rc=%d\n", rc);
 
index aca71b7b22582c4cdfed5c498bba1c7edc517842..046f7c4cdde5918c251c1654543dc8a6420d1b78 100644 (file)
@@ -8,6 +8,7 @@
  *
  * Copyright (c) 2007 Isaku Yamahata <yamahata@valinux.co.jp>
  *   Use foreign p2m exposure.
+ *   VTi domain support.
  */
 
 #include <inttypes.h>
@@ -20,6 +21,7 @@
 #include "xc_ia64.h"
 #include "xc_ia64_save_restore.h"
 #include "xc_efi.h"
+#include "xen/hvm/params.h"
 
 /*
 ** Default values for important tuning parameters. Can override by passing
 ** During (live) save/migrate, we maintain a number of bitmaps to track
 ** which pages we have to send, and to skip.
 */
-
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-
-#define BITMAP_ENTRY(_nr,_bmap) \
-   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
-
-#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
-
 static inline int test_bit(int nr, volatile void * addr)
 {
     return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
@@ -136,6 +130,271 @@ retry:
     return -1;
 }
 
+static inline int
+md_is_not_ram(const efi_memory_desc_t *md)
+{
+    return ((md->type != EFI_CONVENTIONAL_MEMORY) ||
+            (md->attribute != EFI_MEMORY_WB) ||
+            (md->num_pages == 0));
+}
+
+/*
+ * Send through a list of all the PFNs that were not in map at the close.
+ * We send pages which was allocated. However balloon driver may 
+ * decreased after sending page. So we have to check the freed
+ * page after pausing the domain.
+ */
+static int
+xc_ia64_send_unallocated_list(int xc_handle, int io_fd, 
+                              struct xen_ia64_p2m_table *p2m_table,
+                              xen_ia64_memmap_info_t *memmap_info, 
+                              void *memmap_desc_start, void *memmap_desc_end)
+{
+    void *p;
+    efi_memory_desc_t *md;
+
+    unsigned long N;
+    unsigned long pfntab[1024];
+    unsigned int j;
+
+    j = 0;
+    for (p = memmap_desc_start;
+         p < memmap_desc_end;
+         p += memmap_info->efi_memdesc_size) {
+        md = p;
+
+        if (md_is_not_ram(md))
+            continue;
+
+        for (N = md->phys_addr >> PAGE_SHIFT;
+             N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
+                 PAGE_SHIFT;
+             N++) {
+            if (!xc_ia64_p2m_allocated(p2m_table, N))
+                j++;
+        }
+    }
+    if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
+        ERROR("Error when writing to state file (6a)");
+        return -1;
+    }
+        
+    j = 0;
+    for (p = memmap_desc_start;
+         p < memmap_desc_end;
+         p += memmap_info->efi_memdesc_size) {
+        md = p;
+
+        if (md_is_not_ram(md))
+            continue;
+
+        for (N = md->phys_addr >> PAGE_SHIFT;
+             N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
+                 PAGE_SHIFT;
+             N++) {
+            if (!xc_ia64_p2m_allocated(p2m_table, N))
+                pfntab[j++] = N;
+            if (j == sizeof(pfntab)/sizeof(pfntab[0])) {
+                if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
+                    ERROR("Error when writing to state file (6b)");
+                    return -1;
+                }
+                j = 0;
+            }
+        }
+    }
+    if (j > 0) {
+        if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
+            ERROR("Error when writing to state file (6c)");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int
+xc_ia64_send_vcpu_context(int xc_handle, int io_fd, uint32_t dom,
+                          uint32_t vcpu, vcpu_guest_context_t *ctxt)
+{
+    if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) {
+        ERROR("Could not get vcpu context");
+        return -1;
+    }
+
+    if (!write_exact(io_fd, ctxt, sizeof(*ctxt))) {
+        ERROR("Error when writing to state file (1)");
+        return -1;
+    }
+
+    fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]);
+    return 0;
+}
+
+static int
+xc_ia64_send_shared_info(int xc_handle, int io_fd, shared_info_t *live_shinfo)
+{
+    if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
+        ERROR("Error when writing to state file (1)");
+        return -1;
+    }
+    return 0;
+}
+
+static int
+xc_ia64_pv_send_context(int xc_handle, int io_fd, uint32_t dom,
+                        shared_info_t *live_shinfo)
+{
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+    char *mem;
+
+    if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt))
+        return -1;
+
+    mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                               PROT_READ|PROT_WRITE, ctxt.privregs_pfn);
+    if (mem == NULL) {
+        ERROR("cannot map privreg page");
+        return -1;
+    }
+    if (!write_exact(io_fd, mem, PAGE_SIZE)) {
+        ERROR("Error when writing privreg to state file (5)");
+        munmap(mem, PAGE_SIZE);
+        return -1;
+    }
+    munmap(mem, PAGE_SIZE);
+
+    if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo))
+        return -1;
+
+    return 0;
+}
+
+static int
+xc_ia64_hvm_send_context(int xc_handle, int io_fd, uint32_t dom,
+                         const xc_dominfo_t *info, shared_info_t *live_shinfo)
+{
+    int rc = -1;
+    unsigned int i;
+
+    /* vcpu map */
+    uint64_t max_virt_cpus;
+    unsigned long vcpumap_size;
+    uint64_t *vcpumap = NULL;
+
+    /* HVM: magic frames for ioreqs and xenstore comms */
+    const int hvm_params[] = {
+        HVM_PARAM_IOREQ_PFN,
+        HVM_PARAM_BUFIOREQ_PFN,
+        HVM_PARAM_STORE_PFN,
+    };
+    const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]);
+    /* ioreq_pfn, bufioreq_pfn, store_pfn */
+    uint64_t magic_pfns[NR_PARAMS];
+
+    /* HVM: a buffer for holding HVM contxt */
+    uint64_t rec_size;
+    uint64_t hvm_buf_size = 0;
+    uint8_t *hvm_buf = NULL;
+
+    if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo))
+        return -1;
+
+    /* vcpu map */
+    max_virt_cpus = MAX_VIRT_CPUS;
+    vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) /
+        sizeof(vcpumap[0]);
+    vcpumap = malloc(vcpumap_size);
+    if (vcpumap == NULL) {
+        ERROR("memory alloc for vcpumap");
+        goto out;
+    }
+    memset(vcpumap, 0, vcpumap_size);
+
+    for (i = 0; i <= info->max_vcpu_id; i++) {
+        xc_vcpuinfo_t vinfo;
+        if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && vinfo.online)
+            __set_bit(i, vcpumap);
+    }
+
+    if (!write_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) {
+        ERROR("write max_virt_cpus");
+        goto out;
+    }
+
+    if (!write_exact(io_fd, vcpumap, vcpumap_size)) {
+        ERROR("write vcpumap");
+        goto out;
+    }
+
+    /* vcpu context */
+    for (i = 0; i <= info->max_vcpu_id; i++) {
+        /* A copy of the CPU context of the guest. */
+        vcpu_guest_context_t ctxt;
+
+        if (!__test_bit(i, vcpumap))
+            continue;
+
+        if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, i, &ctxt))
+            goto out;
+
+        // system context of vcpu is sent as hvm context.
+    }    
+
+    /* Save magic-page locations. */
+    memset(magic_pfns, 0, sizeof(magic_pfns));
+    for (i = 0; i < NR_PARAMS; i++) {
+        if (xc_get_hvm_param(xc_handle, dom, hvm_params[i], &magic_pfns[i])) {
+            PERROR("Error when xc_get_hvm_param");
+            goto out;
+        }
+    }
+
+    if (!write_exact(io_fd, magic_pfns, sizeof(magic_pfns))) {
+        ERROR("Error when writing to state file (7)");
+        goto out;
+    }
+
+    /* Need another buffer for HVM context */
+    hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0);
+    if (hvm_buf_size == -1) {
+        ERROR("Couldn't get HVM context size from Xen");
+        goto out;
+    }
+
+    hvm_buf = malloc(hvm_buf_size);
+    if (!hvm_buf) {
+        ERROR("Couldn't allocate memory");
+        goto out;
+    }
+
+    /* Get HVM context from Xen and save it too */
+    rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, hvm_buf_size);
+    if (rec_size == -1) {
+        ERROR("HVM:Could not get hvm buffer");
+        goto out;
+    }
+        
+    if (!write_exact(io_fd, &rec_size, sizeof(rec_size))) {
+        ERROR("error write hvm buffer size");
+        goto out;
+    }
+        
+    if (!write_exact(io_fd, hvm_buf, rec_size)) {
+        ERROR("write HVM info failed!\n");
+        goto out;
+    }
+
+    rc = 0;
+out:
+    if (hvm_buf != NULL)
+        free(hvm_buf);
+    if (vcpumap != NULL)
+        free(vcpumap);
+    return rc;
+}
+
 int
 xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
                uint32_t max_factor, uint32_t flags, int (*suspend)(int),
@@ -147,16 +406,12 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
 
     int rc = 1;
 
-    //int live  = (flags & XCFLAGS_LIVE);
     int debug = (flags & XCFLAGS_DEBUG);
     int live  = (flags & XCFLAGS_LIVE);
 
     /* The new domain's shared-info frame number. */
     unsigned long shared_info_frame;
 
-    /* A copy of the CPU context of the guest. */
-    vcpu_guest_context_t ctxt;
-
     /* Live mapping of shared info structure */
     shared_info_t *live_shinfo = NULL;
 
@@ -185,6 +440,12 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
 
     char *mem;
 
+    /* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */
+    unsigned long *qemu_bitmaps[2];
+    int qemu_active = 0;
+    int qemu_non_active = 1;
+
+    /* for foreign p2m exposure */
     unsigned int memmap_info_num_pages;
     unsigned long memmap_size = 0;
     xen_ia64_memmap_info_t *memmap_info_live = NULL;
@@ -299,6 +560,14 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
             goto out;
         }
 
+        if (hvm) {
+            /* Get qemu-dm logging dirty pages too */
+            void *seg = init_qemu_maps(dom, bitmap_size);
+            qemu_bitmaps[0] = seg;
+            qemu_bitmaps[1] = seg + bitmap_size;
+            qemu_active = 0;
+            qemu_non_active = 1;
+        }
     } else {
 
         /* This is a non-live suspend. Issue the call back to get the
@@ -374,9 +643,7 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
              p < memmap_desc_end;
              p += memmap_info->efi_memdesc_size) {
             md = p;
-            if (md->type != EFI_CONVENTIONAL_MEMORY ||
-                md->attribute != EFI_MEMORY_WB ||
-                md->num_pages == 0)
+            if (md_is_not_ram(md))
                 continue;
             
             for (N = md->phys_addr >> PAGE_SHIFT;
@@ -455,11 +722,27 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
                 goto out;
             }
 
+            if (hvm) {
+                unsigned int j;
+                /* Pull in the dirty bits from qemu-dm too */
+                if (!last_iter) {
+                    qemu_active = qemu_non_active;
+                    qemu_non_active = qemu_active ? 0 : 1;
+                    qemu_flip_buffer(dom, qemu_active);
+                    for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) {
+                        to_send[j] |= qemu_bitmaps[qemu_non_active][j];
+                        qemu_bitmaps[qemu_non_active][j] = 0;
+                    }
+                } else {
+                    for (j = 0; j < bitmap_size / sizeof(unsigned long); j++)
+                        to_send[j] |= qemu_bitmaps[qemu_active][j];
+                }
+            }
+
             sent_last_iter = sent_this_iter;
 
             //print_stats(xc_handle, dom, sent_this_iter, &stats, 1);
         }
-
     }
 
     fprintf(stderr, "All memory is saved\n");
@@ -473,100 +756,18 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
         }
     }
 
-    /*
-     * Send through a list of all the PFNs that were not in map at the close.
-     * We send pages which was allocated. However balloon driver may 
-     * decreased after sending page. So we have to check the freed
-     * page after pausing the domain.
-     */
-    {
-        unsigned long N;
-        unsigned long pfntab[1024];
-        unsigned int j;
-
-        j = 0;
-        for (p = memmap_desc_start;
-             p < memmap_desc_end;
-             p += memmap_info->efi_memdesc_size) {
-            md = p;
-            if (md->type != EFI_CONVENTIONAL_MEMORY ||
-                md->attribute != EFI_MEMORY_WB ||
-                md->num_pages == 0)
-                continue;
-            for (N = md->phys_addr >> PAGE_SHIFT;
-                 N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
-                     PAGE_SHIFT;
-                 N++) {
-                if (!xc_ia64_p2m_allocated(&p2m_table, N))
-                    j++;
-            }
-        }
-        if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
-            ERROR("Error when writing to state file (6a)");
-            goto out;
-        }
-        
-        j = 0;
-        for (p = memmap_desc_start;
-             p < memmap_desc_end;
-             p += memmap_info->efi_memdesc_size) {
-            md = p;
-            if (md->type != EFI_CONVENTIONAL_MEMORY ||
-                md->attribute != EFI_MEMORY_WB ||
-                md->num_pages == 0)
-                continue;
-            for (N = md->phys_addr >> PAGE_SHIFT;
-                 N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
-                     PAGE_SHIFT;
-                 N++) {
-                if (!xc_ia64_p2m_allocated(&p2m_table, N))
-                    pfntab[j++] = N;
-                if (j == sizeof(pfntab)/sizeof(pfntab[0])) {
-                    if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
-                        ERROR("Error when writing to state file (6b)");
-                        goto out;
-                    }
-                    j = 0;
-                }
-            }
-        }
-        if (j > 0) {
-            if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) {
-                ERROR("Error when writing to state file (6b)");
-                goto out;
-            }
-        }
-    }
-
-    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
-        ERROR("Could not get vcpu context");
-        goto out;
-    }
-
-    if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
-        ERROR("Error when writing to state file (1)");
+    if (xc_ia64_send_unallocated_list(xc_handle, io_fd, &p2m_table,
+                                      memmap_info,
+                                      memmap_desc_start, memmap_desc_end))
         goto out;
-    }
-
-    fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]);
 
-    mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                               PROT_READ|PROT_WRITE, ctxt.privregs_pfn);
-    if (mem == NULL) {
-        ERROR("cannot map privreg page");
+    if (!hvm)
+        rc = xc_ia64_pv_send_context(xc_handle, io_fd, dom, live_shinfo);
+    else
+        rc = xc_ia64_hvm_send_context(xc_handle, io_fd,
+                                      dom, &info, live_shinfo);
+    if (rc)
         goto out;
-    }
-    if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
-        ERROR("Error when writing privreg to state file (5)");
-        munmap(mem, PAGE_SIZE);
-        goto out;
-    }
-    munmap(mem, PAGE_SIZE);
-
-    if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
-        ERROR("Error when writing to state file (1)");
-        goto out;
-    }
 
     /* Success! */
     rc = 0;
index 4225932a3f1bca23292595c205dffa9b32b7bf31..c713cec12ef66cbbaab83c0ffec83511da3a838c 100644 (file)
 
 #define XC_IA64_SR_FORMAT_VER_CURRENT   XC_IA64_SR_FORMAT_VER_TWO
 
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+
+#define BITMAP_ENTRY(_nr,_bmap) \
+   ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG]
+
+#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
+
+static inline int __test_bit(int nr, void * addr)
+{
+    return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1;
+}
+
+static inline void __set_bit(int nr, void * addr)
+{
+    BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr));
+}
+
 #endif /* XC_IA64_SAVE_RESTORE_H */
 
 /*