From: Alex Williamson Date: Sun, 21 Oct 2007 20:57:13 +0000 (-0600) Subject: [IA64] vti domain save/restore: libxc: implement vti domain save/restore X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~14855 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=44d78108bffa1ed42836398a177f42576bb0ef0f;p=xen.git [IA64] vti domain save/restore: libxc: implement vti domain save/restore Signed-off-by: Isaku Yamahata --- diff --git a/tools/libxc/ia64/xc_ia64_linux_restore.c b/tools/libxc/ia64/xc_ia64_linux_restore.c index 258cfdee80..5336b5bd51 100644 --- a/tools/libxc/ia64/xc_ia64_linux_restore.c +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c @@ -8,6 +8,7 @@ * * Copyright (c) 2007 Isaku Yamahata * Use foreign p2m exposure. + * VTi domain support */ #include @@ -17,6 +18,7 @@ #include "xc_ia64_save_restore.h" #include "xc_ia64.h" #include "xc_efi.h" +#include "xen/hvm/params.h" #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) @@ -75,6 +77,354 @@ read_page(int xc_handle, int io_fd, uint32_t dom, unsigned long pfn) return 0; } +/* + * Get the list of PFNs that are not in the psuedo-phys map. + * Although we allocate pages on demand, balloon driver may + * decreased simaltenously. So we have to free the freed + * pages here. + */ +static int +xc_ia64_recv_unallocated_list(int xc_handle, int io_fd, uint32_t dom, + struct xen_ia64_p2m_table *p2m_table) +{ + int rc = -1; + unsigned int i; + unsigned int count; + unsigned long *pfntab = NULL; + unsigned int nr_frees; + + if (!read_exact(io_fd, &count, sizeof(count))) { + ERROR("Error when reading pfn count"); + goto out; + } + + pfntab = malloc(sizeof(unsigned long) * count); + if (pfntab == NULL) { + ERROR("Out of memory"); + goto out; + } + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + ERROR("Error when reading pfntab"); + goto out; + } + + nr_frees = 0; + for (i = 0; i < count; i++) { + if (xc_ia64_p2m_allocated(p2m_table, pfntab[i])) { + pfntab[nr_frees] = pfntab[i]; + nr_frees++; + } + } + if (nr_frees > 0) { + if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees, + 0, pfntab) < 0) { + PERROR("Could not decrease reservation"); + goto out; + } else + DPRINTF("Decreased reservation by %d / %d pages\n", + nr_frees, count); + } + + rc = 0; + + out: + if (pfntab != NULL) + free(pfntab); + return rc; +} + +static int +xc_ia64_recv_vcpu_context(int xc_handle, int io_fd, uint32_t dom, + uint32_t vcpu, vcpu_guest_context_t *ctxt) +{ + if (!read_exact(io_fd, ctxt, sizeof(*ctxt))) { + ERROR("Error when reading ctxt"); + return -1; + } + + fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]); + + /* Initialize and set registers. */ + ctxt->flags = VGCF_EXTRA_REGS; + if (xc_vcpu_setcontext(xc_handle, dom, vcpu, ctxt) != 0) { + ERROR("Couldn't set vcpu context"); + return -1; + } + + /* Just a check. */ + ctxt->flags = 0; + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) { + ERROR("Could not get vcpu context"); + return -1; + } + + return 0; +} + +/* Read shared info. */ +static int +xc_ia64_recv_shared_info(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + unsigned long *start_info_pfn) +{ + unsigned int i; + + /* The new domain's shared-info frame. */ + shared_info_t *shared_info; + + /* Read shared info. */ + shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + shared_info_frame); + if (shared_info == NULL) { + ERROR("cannot map page"); + return -1; + } + + if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { + ERROR("Error when reading shared_info page"); + munmap(shared_info, PAGE_SIZE); + return -1; + } + + /* clear any pending events and the selector */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for (i = 0; i < MAX_VIRT_CPUS; i++) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + if (start_info_pfn != NULL) + *start_info_pfn = shared_info->arch.start_info_pfn; + + munmap (shared_info, PAGE_SIZE); + + return 0; +} + +static int +xc_ia64_pv_recv_context(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + struct xen_ia64_p2m_table *p2m_table, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int rc = -1; + unsigned long gmfn; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + if (lock_pages(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to lock_pages ctxt"); + return -1; + } + + if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt)) + goto out; + + /* Then get privreg page. */ + if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { + ERROR("Could not read vcpu privregs"); + goto out; + } + + /* Read shared info. */ + if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, + shared_info_frame, &gmfn)) + goto out; + + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ + if (populate_page_if_necessary(xc_handle, dom, gmfn, p2m_table)) { + ERROR("cannot populate page 0x%lx", gmfn); + goto out; + } + start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, gmfn); + if (start_info == NULL) { + ERROR("cannot map start_info page"); + goto out; + } + start_info->nr_pages = p2m_size; + start_info->shared_info = shared_info_frame << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = start_info->store_mfn; + start_info->store_evtchn = store_evtchn; + *console_mfn = start_info->console.domU.mfn; + start_info->console.domU.evtchn = console_evtchn; + munmap(start_info, PAGE_SIZE); + + rc = 0; + + out: + unlock_pages(&ctxt, sizeof(ctxt)); + return rc; +} + +static int +xc_ia64_hvm_recv_context(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + struct xen_ia64_p2m_table *p2m_table, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int rc = -1; + xc_dominfo_t info; + unsigned int i; + + /* cpu */ + uint64_t max_virt_cpus; + unsigned long vcpumap_size; + uint64_t *vcpumap = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms */ + const int hvm_params[] = { + HVM_PARAM_IOREQ_PFN, + HVM_PARAM_BUFIOREQ_PFN, + HVM_PARAM_STORE_PFN, + }; + const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]); + /* ioreq_pfn, bufioreq_pfn, store_pfn */ + uint64_t magic_pfns[NR_PARAMS]; + + /* HVM: a buffer for holding HVM contxt */ + uint64_t rec_size = 0; + uint8_t *hvm_buf = NULL; + + /* Read shared info. */ + if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, shared_info_frame, + NULL)) + goto out; + + /* vcpu map */ + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + goto out; + } + if (!read_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) { + ERROR("error reading max_virt_cpus"); + goto out; + } + if (max_virt_cpus < info.max_vcpu_id) { + ERROR("too large max_virt_cpus %i < %i\n", + max_virt_cpus, info.max_vcpu_id); + goto out; + } + vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) / + sizeof(vcpumap[0]); + vcpumap = malloc(vcpumap_size); + if (vcpumap == NULL) { + ERROR("memory alloc for vcpumap"); + goto out; + } + memset(vcpumap, 0, vcpumap_size); + if (!read_exact(io_fd, vcpumap, vcpumap_size)) { + ERROR("read vcpumap"); + goto out; + } + + /* vcpu context */ + for (i = 0; i <= info.max_vcpu_id; i++) { + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + if (!__test_bit(i, vcpumap)) + continue; + + if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, i, &ctxt)) + goto out; + + // system context of vcpu is recieved as hvm context. + } + + /* Set HVM-specific parameters */ + if (!read_exact(io_fd, magic_pfns, sizeof(magic_pfns))) { + ERROR("error reading magic page addresses"); + goto out; + } + + /* These comms pages need to be zeroed at the start of day */ + for (i = 0; i < NR_PARAMS; i++) { + rc = xc_clear_domain_page(xc_handle, dom, magic_pfns[i]); + if (rc != 0) { + ERROR("error zeroing magic pages: %i", rc); + goto out; + } + rc = xc_set_hvm_param(xc_handle, dom, hvm_params[i], magic_pfns[i]); + if (rc != 0) { + ERROR("error setting HVM params: %i", rc); + goto out; + } + } + rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_STORE_EVTCHN, store_evtchn); + if (rc != 0) { + ERROR("error setting HVM params: %i", rc); + goto out; + } + *store_mfn = magic_pfns[2]; + + /* Read HVM context */ + if (!read_exact(io_fd, &rec_size, sizeof(rec_size))) { + ERROR("error read hvm context size!\n"); + goto out; + } + + hvm_buf = malloc(rec_size); + if (hvm_buf == NULL) { + ERROR("memory alloc for hvm context buffer failed"); + errno = ENOMEM; + goto out; + } + + if (!read_exact(io_fd, hvm_buf, rec_size)) { + ERROR("error loading the HVM context"); + goto out; + } + + rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_size); + if (rc != 0) { + ERROR("error setting the HVM context"); + goto out; + } + + rc = 0; + +out: + if (vcpumap != NULL) + free(vcpumap); + if (hvm_buf != NULL) + free(hvm_buf); + return rc; +} + +/* + * hvm domain requires IO pages allocated when XEN_DOMCTL_arch_setup + */ +static int +xc_ia64_hvm_domain_setup(int xc_handle, uint32_t dom) +{ + int rc; + xen_pfn_t pfn_list[] = { + IO_PAGE_START >> PAGE_SHIFT, + BUFFER_IO_PAGE_START >> PAGE_SHIFT, + BUFFER_PIO_PAGE_START >> PAGE_SHIFT, + }; + unsigned long nr_pages = sizeof(pfn_list) / sizeof(pfn_list[0]); + + rc = xc_domain_memory_populate_physmap(xc_handle, dom, nr_pages, + 0, 0, &pfn_list[0]); + if (rc != 0) + PERROR("Could not allocate IO page or buffer io page.\n"); + return rc; +} + int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, @@ -83,29 +433,14 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, { DECLARE_DOMCTL; int rc = 1; - unsigned int i; - unsigned long gmfn; unsigned long ver; /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; - unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *shared_info = (shared_info_t *)shared_info_page; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A temporary mapping of the guest's start_info page. */ - start_info_t *start_info; struct xen_ia64_p2m_table p2m_table; xc_ia64_p2m_init(&p2m_table); - if (hvm) { - ERROR("HVM Restore is unsupported"); - goto out; - } - /* For info only */ nr_pfns = 0; @@ -125,17 +460,14 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, goto out; } - if (lock_pages(&ctxt, sizeof(ctxt))) { - /* needed for build domctl, but might as well do early */ - ERROR("Unable to lock_pages ctxt"); - return 1; - } - if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { ERROR("read: domain setup"); goto out; } + if (hvm && xc_ia64_hvm_domain_setup(xc_handle, dom) != 0) + goto out; + /* Build firmware (will be overwritten). */ domctl.domain = (domid_t)dom; domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query; @@ -212,6 +544,7 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, DPRINTF("Reloading memory pages: 0%%\n"); while (1) { + unsigned long gmfn; if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) { ERROR("Error when reading batch size"); goto out; @@ -229,127 +562,19 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, DPRINTF("Received all pages\n"); - /* - * Get the list of PFNs that are not in the psuedo-phys map. - * Although we allocate pages on demand, balloon driver may - * decreased simaltenously. So we have to free the freed - * pages here. - */ - { - unsigned int count; - unsigned long *pfntab; - unsigned int nr_frees; - - if (!read_exact(io_fd, &count, sizeof(count))) { - ERROR("Error when reading pfn count"); - goto out; - } - - pfntab = malloc(sizeof(unsigned long) * count); - if (!pfntab) { - ERROR("Out of memory"); - goto out; - } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERROR("Error when reading pfntab"); - free(pfntab); - goto out; - } - - nr_frees = 0; - for (i = 0; i < count; i++) { - if (xc_ia64_p2m_allocated(&p2m_table, pfntab[i])) { - pfntab[nr_frees] = pfntab[i]; - nr_frees++; - } - } - if (nr_frees > 0) { - if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees, - 0, pfntab) < 0) { - ERROR("Could not decrease reservation : %d", rc); - free(pfntab); - goto out; - } - else - DPRINTF("Decreased reservation by %d / %d pages\n", - nr_frees, count); - } - free(pfntab); - } - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("Error when reading ctxt"); - goto out; - } - - fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]); - - /* Initialize and set registers. */ - ctxt.flags = VGCF_EXTRA_REGS; - domctl.cmd = XEN_DOMCTL_setvcpucontext; - domctl.domain = (domid_t)dom; - domctl.u.vcpucontext.vcpu = 0; - set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); - if (xc_domctl(xc_handle, &domctl) != 0) { - ERROR("Couldn't set vcpu context"); - goto out; - } - - /* Just a check. */ - if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { - ERROR("Could not get vcpu context"); + if (xc_ia64_recv_unallocated_list(xc_handle, io_fd, dom, &p2m_table)) goto out; - } - /* Then get privreg page. */ - if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { - ERROR("Could not read vcpu privregs"); + if (!hvm) + rc = xc_ia64_pv_recv_context(xc_handle, io_fd, dom, shared_info_frame, + &p2m_table, store_evtchn, store_mfn, + console_evtchn, console_mfn); + else + rc = xc_ia64_hvm_recv_context(xc_handle, io_fd, dom, shared_info_frame, + &p2m_table, store_evtchn, store_mfn, + console_evtchn, console_mfn); + if (rc) goto out; - } - - /* Read shared info. */ - shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, shared_info_frame); - if (shared_info == NULL) { - ERROR("cannot map page"); - goto out; - } - if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { - ERROR("Error when reading shared_info page"); - munmap(shared_info, PAGE_SIZE); - goto out; - } - - /* clear any pending events and the selector */ - memset(&(shared_info->evtchn_pending[0]), 0, - sizeof (shared_info->evtchn_pending)); - for (i = 0; i < MAX_VIRT_CPUS; i++) - shared_info->vcpu_info[i].evtchn_pending_sel = 0; - - gmfn = shared_info->arch.start_info_pfn; - - munmap (shared_info, PAGE_SIZE); - - /* Uncanonicalise the suspend-record frame number and poke resume rec. */ - if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) { - ERROR("cannot populate page 0x%lx", gmfn); - goto out; - } - start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ | PROT_WRITE, gmfn); - if (start_info == NULL) { - ERROR("cannot map start_info page"); - goto out; - } - start_info->nr_pages = p2m_size; - start_info->shared_info = shared_info_frame << PAGE_SHIFT; - start_info->flags = 0; - *store_mfn = start_info->store_mfn; - start_info->store_evtchn = store_evtchn; - *console_mfn = start_info->console.domU.mfn; - start_info->console.domU.evtchn = console_evtchn; - munmap(start_info, PAGE_SIZE); /* * Safety checking of saved context: @@ -368,12 +593,10 @@ xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, rc = 0; out: - if ((rc != 0) && (dom != 0)) - xc_domain_destroy(xc_handle, dom); - xc_ia64_p2m_unmap(&p2m_table); - unlock_pages(&ctxt, sizeof(ctxt)); + if ((rc != 0) && (dom != 0)) + xc_domain_destroy(xc_handle, dom); DPRINTF("Restore exit with rc=%d\n", rc); diff --git a/tools/libxc/ia64/xc_ia64_linux_save.c b/tools/libxc/ia64/xc_ia64_linux_save.c index aca71b7b22..046f7c4cdd 100644 --- a/tools/libxc/ia64/xc_ia64_linux_save.c +++ b/tools/libxc/ia64/xc_ia64_linux_save.c @@ -8,6 +8,7 @@ * * Copyright (c) 2007 Isaku Yamahata * Use foreign p2m exposure. + * VTi domain support. */ #include @@ -20,6 +21,7 @@ #include "xc_ia64.h" #include "xc_ia64_save_restore.h" #include "xc_efi.h" +#include "xen/hvm/params.h" /* ** Default values for important tuning parameters. Can override by passing @@ -35,14 +37,6 @@ ** During (live) save/migrate, we maintain a number of bitmaps to track ** which pages we have to send, and to skip. */ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - static inline int test_bit(int nr, volatile void * addr) { return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; @@ -136,6 +130,271 @@ retry: return -1; } +static inline int +md_is_not_ram(const efi_memory_desc_t *md) +{ + return ((md->type != EFI_CONVENTIONAL_MEMORY) || + (md->attribute != EFI_MEMORY_WB) || + (md->num_pages == 0)); +} + +/* + * Send through a list of all the PFNs that were not in map at the close. + * We send pages which was allocated. However balloon driver may + * decreased after sending page. So we have to check the freed + * page after pausing the domain. + */ +static int +xc_ia64_send_unallocated_list(int xc_handle, int io_fd, + struct xen_ia64_p2m_table *p2m_table, + xen_ia64_memmap_info_t *memmap_info, + void *memmap_desc_start, void *memmap_desc_end) +{ + void *p; + efi_memory_desc_t *md; + + unsigned long N; + unsigned long pfntab[1024]; + unsigned int j; + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + + if (md_is_not_ram(md)) + continue; + + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(p2m_table, N)) + j++; + } + } + if (!write_exact(io_fd, &j, sizeof(unsigned int))) { + ERROR("Error when writing to state file (6a)"); + return -1; + } + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + + if (md_is_not_ram(md)) + continue; + + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(p2m_table, N)) + pfntab[j++] = N; + if (j == sizeof(pfntab)/sizeof(pfntab[0])) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6b)"); + return -1; + } + j = 0; + } + } + } + if (j > 0) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6c)"); + return -1; + } + } + + return 0; +} + +static int +xc_ia64_send_vcpu_context(int xc_handle, int io_fd, uint32_t dom, + uint32_t vcpu, vcpu_guest_context_t *ctxt) +{ + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) { + ERROR("Could not get vcpu context"); + return -1; + } + + if (!write_exact(io_fd, ctxt, sizeof(*ctxt))) { + ERROR("Error when writing to state file (1)"); + return -1; + } + + fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]); + return 0; +} + +static int +xc_ia64_send_shared_info(int xc_handle, int io_fd, shared_info_t *live_shinfo) +{ + if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + ERROR("Error when writing to state file (1)"); + return -1; + } + return 0; +} + +static int +xc_ia64_pv_send_context(int xc_handle, int io_fd, uint32_t dom, + shared_info_t *live_shinfo) +{ + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + char *mem; + + if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt)) + return -1; + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, ctxt.privregs_pfn); + if (mem == NULL) { + ERROR("cannot map privreg page"); + return -1; + } + if (!write_exact(io_fd, mem, PAGE_SIZE)) { + ERROR("Error when writing privreg to state file (5)"); + munmap(mem, PAGE_SIZE); + return -1; + } + munmap(mem, PAGE_SIZE); + + if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo)) + return -1; + + return 0; +} + +static int +xc_ia64_hvm_send_context(int xc_handle, int io_fd, uint32_t dom, + const xc_dominfo_t *info, shared_info_t *live_shinfo) +{ + int rc = -1; + unsigned int i; + + /* vcpu map */ + uint64_t max_virt_cpus; + unsigned long vcpumap_size; + uint64_t *vcpumap = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms */ + const int hvm_params[] = { + HVM_PARAM_IOREQ_PFN, + HVM_PARAM_BUFIOREQ_PFN, + HVM_PARAM_STORE_PFN, + }; + const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]); + /* ioreq_pfn, bufioreq_pfn, store_pfn */ + uint64_t magic_pfns[NR_PARAMS]; + + /* HVM: a buffer for holding HVM contxt */ + uint64_t rec_size; + uint64_t hvm_buf_size = 0; + uint8_t *hvm_buf = NULL; + + if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo)) + return -1; + + /* vcpu map */ + max_virt_cpus = MAX_VIRT_CPUS; + vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) / + sizeof(vcpumap[0]); + vcpumap = malloc(vcpumap_size); + if (vcpumap == NULL) { + ERROR("memory alloc for vcpumap"); + goto out; + } + memset(vcpumap, 0, vcpumap_size); + + for (i = 0; i <= info->max_vcpu_id; i++) { + xc_vcpuinfo_t vinfo; + if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && vinfo.online) + __set_bit(i, vcpumap); + } + + if (!write_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) { + ERROR("write max_virt_cpus"); + goto out; + } + + if (!write_exact(io_fd, vcpumap, vcpumap_size)) { + ERROR("write vcpumap"); + goto out; + } + + /* vcpu context */ + for (i = 0; i <= info->max_vcpu_id; i++) { + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + if (!__test_bit(i, vcpumap)) + continue; + + if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, i, &ctxt)) + goto out; + + // system context of vcpu is sent as hvm context. + } + + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + for (i = 0; i < NR_PARAMS; i++) { + if (xc_get_hvm_param(xc_handle, dom, hvm_params[i], &magic_pfns[i])) { + PERROR("Error when xc_get_hvm_param"); + goto out; + } + } + + if (!write_exact(io_fd, magic_pfns, sizeof(magic_pfns))) { + ERROR("Error when writing to state file (7)"); + goto out; + } + + /* Need another buffer for HVM context */ + hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); + if (hvm_buf_size == -1) { + ERROR("Couldn't get HVM context size from Xen"); + goto out; + } + + hvm_buf = malloc(hvm_buf_size); + if (!hvm_buf) { + ERROR("Couldn't allocate memory"); + goto out; + } + + /* Get HVM context from Xen and save it too */ + rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, hvm_buf_size); + if (rec_size == -1) { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if (!write_exact(io_fd, &rec_size, sizeof(rec_size))) { + ERROR("error write hvm buffer size"); + goto out; + } + + if (!write_exact(io_fd, hvm_buf, rec_size)) { + ERROR("write HVM info failed!\n"); + goto out; + } + + rc = 0; +out: + if (hvm_buf != NULL) + free(hvm_buf); + if (vcpumap != NULL) + free(vcpumap); + return rc; +} + int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, int (*suspend)(int), @@ -147,16 +406,12 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, int rc = 1; - //int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); int live = (flags & XCFLAGS_LIVE); /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; @@ -185,6 +440,12 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, char *mem; + /* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */ + unsigned long *qemu_bitmaps[2]; + int qemu_active = 0; + int qemu_non_active = 1; + + /* for foreign p2m exposure */ unsigned int memmap_info_num_pages; unsigned long memmap_size = 0; xen_ia64_memmap_info_t *memmap_info_live = NULL; @@ -299,6 +560,14 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, goto out; } + if (hvm) { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, bitmap_size); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + bitmap_size; + qemu_active = 0; + qemu_non_active = 1; + } } else { /* This is a non-live suspend. Issue the call back to get the @@ -374,9 +643,7 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, p < memmap_desc_end; p += memmap_info->efi_memdesc_size) { md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) + if (md_is_not_ram(md)) continue; for (N = md->phys_addr >> PAGE_SHIFT; @@ -455,11 +722,27 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, goto out; } + if (hvm) { + unsigned int j; + /* Pull in the dirty bits from qemu-dm too */ + if (!last_iter) { + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_flip_buffer(dom, qemu_active); + for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) { + to_send[j] |= qemu_bitmaps[qemu_non_active][j]; + qemu_bitmaps[qemu_non_active][j] = 0; + } + } else { + for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) + to_send[j] |= qemu_bitmaps[qemu_active][j]; + } + } + sent_last_iter = sent_this_iter; //print_stats(xc_handle, dom, sent_this_iter, &stats, 1); } - } fprintf(stderr, "All memory is saved\n"); @@ -473,100 +756,18 @@ xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, } } - /* - * Send through a list of all the PFNs that were not in map at the close. - * We send pages which was allocated. However balloon driver may - * decreased after sending page. So we have to check the freed - * page after pausing the domain. - */ - { - unsigned long N; - unsigned long pfntab[1024]; - unsigned int j; - - j = 0; - for (p = memmap_desc_start; - p < memmap_desc_end; - p += memmap_info->efi_memdesc_size) { - md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) - continue; - for (N = md->phys_addr >> PAGE_SHIFT; - N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> - PAGE_SHIFT; - N++) { - if (!xc_ia64_p2m_allocated(&p2m_table, N)) - j++; - } - } - if (!write_exact(io_fd, &j, sizeof(unsigned int))) { - ERROR("Error when writing to state file (6a)"); - goto out; - } - - j = 0; - for (p = memmap_desc_start; - p < memmap_desc_end; - p += memmap_info->efi_memdesc_size) { - md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) - continue; - for (N = md->phys_addr >> PAGE_SHIFT; - N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> - PAGE_SHIFT; - N++) { - if (!xc_ia64_p2m_allocated(&p2m_table, N)) - pfntab[j++] = N; - if (j == sizeof(pfntab)/sizeof(pfntab[0])) { - if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { - ERROR("Error when writing to state file (6b)"); - goto out; - } - j = 0; - } - } - } - if (j > 0) { - if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { - ERROR("Error when writing to state file (6b)"); - goto out; - } - } - } - - if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERROR("Could not get vcpu context"); - goto out; - } - - if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("Error when writing to state file (1)"); + if (xc_ia64_send_unallocated_list(xc_handle, io_fd, &p2m_table, + memmap_info, + memmap_desc_start, memmap_desc_end)) goto out; - } - - fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]); - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, ctxt.privregs_pfn); - if (mem == NULL) { - ERROR("cannot map privreg page"); + if (!hvm) + rc = xc_ia64_pv_send_context(xc_handle, io_fd, dom, live_shinfo); + else + rc = xc_ia64_hvm_send_context(xc_handle, io_fd, + dom, &info, live_shinfo); + if (rc) goto out; - } - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERROR("Error when writing privreg to state file (5)"); - munmap(mem, PAGE_SIZE); - goto out; - } - munmap(mem, PAGE_SIZE); - - if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { - ERROR("Error when writing to state file (1)"); - goto out; - } /* Success! */ rc = 0; diff --git a/tools/libxc/ia64/xc_ia64_save_restore.h b/tools/libxc/ia64/xc_ia64_save_restore.h index 4225932a3f..c713cec12e 100644 --- a/tools/libxc/ia64/xc_ia64_save_restore.h +++ b/tools/libxc/ia64/xc_ia64_save_restore.h @@ -31,6 +31,27 @@ #define XC_IA64_SR_FORMAT_VER_CURRENT XC_IA64_SR_FORMAT_VER_TWO +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ +#define BITS_PER_LONG (sizeof(unsigned long) * 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int __test_bit(int nr, void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void __set_bit(int nr, void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + #endif /* XC_IA64_SAVE_RESTORE_H */ /*