x86: Save/restore new syscall/sysenter context info.
authorKeir Fraser <keir@xensource.com>
Thu, 25 Oct 2007 13:24:52 +0000 (14:24 +0100)
committerKeir Fraser <keir@xensource.com>
Thu, 25 Oct 2007 13:24:52 +0000 (14:24 +0100)
Signed-off-by: Keir Fraser <keir@xensource.com>
tools/libxc/xc_domain_restore.c
tools/libxc/xc_domain_save.c
xen/arch/x86/domctl.c
xen/include/public/domctl.h

index fd1f1e5ca8821ea3ebb6fc2bfcef3653e51dd8a2..6e60758cedddf69bf3e1659a64ed6e6dea98c2cc 100644 (file)
@@ -169,7 +169,8 @@ static int uncanonicalize_pagetable(int xc_handle, uint32_t dom,
 
 
 /* Load the p2m frame list, plus potential extended info chunk */
-static xen_pfn_t *load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
+static xen_pfn_t *load_p2m_frame_list(
+    int io_fd, int *pae_extended_cr3, int *ext_vcpucontext)
 {
     xen_pfn_t *p2m_frame_list;
     vcpu_guest_context_either_t ctxt;
@@ -200,7 +201,8 @@ static xen_pfn_t *load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
             
             /* 4-character chunk signature + 4-byte remaining chunk size. */
             if ( !read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
-                 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) )
+                 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
+                 (tot_bytes < (chunk_bytes + 8)) )
             {
                 ERROR("read extended-info chunk signature failed");
                 return NULL;
@@ -240,6 +242,10 @@ static xen_pfn_t *load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
                      & (1UL << VMASST_TYPE_pae_extended_cr3) )
                     *pae_extended_cr3 = 1;
             }
+            else if ( !strncmp(chunk_sig, "extv", 4) )
+            {
+                *ext_vcpucontext = 1;
+            }
             
             /* Any remaining bytes of this chunk: read and discard. */
             while ( chunk_bytes )
@@ -289,7 +295,7 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int hvm, unsigned int pae)
 {
     DECLARE_DOMCTL;
-    int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
+    int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
     unsigned long mfn, pfn;
     unsigned int prev_pc, this_pc;
     int verify = 0;
@@ -373,7 +379,8 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
     if ( !hvm ) 
     {
         /* Load the p2m frame list, plus potential extended info chunk */
-        p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3);
+        p2m_frame_list = load_p2m_frame_list(
+            io_fd, &pae_extended_cr3, &ext_vcpucontext);
         if ( !p2m_frame_list )
             goto out;
 
@@ -382,13 +389,12 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
         domctl.domain = dom;
         domctl.cmd    = XEN_DOMCTL_set_address_size;
         domctl.u.address_size.size = guest_width * 8;
-        rc = do_domctl(xc_handle, &domctl);
-        if ( rc != 0 )
+        frc = do_domctl(xc_handle, &domctl);
+        if ( frc != 0 )
         {
             ERROR("Unable to set guest address size.");
             goto out;
         }
-        rc = 1;
     }
 
     /* We want zeroed memory so use calloc rather than malloc. */
@@ -713,18 +719,19 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
             goto out;
         }
                 
-        if ( (rc = xc_set_hvm_param(xc_handle, dom, 
-                                    HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
-             || (rc = xc_set_hvm_param(xc_handle, dom, 
-                                       HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
-             || (rc = xc_set_hvm_param(xc_handle, dom, 
-                                       HVM_PARAM_STORE_PFN, magic_pfns[2]))
-             || (rc = xc_set_hvm_param(xc_handle, dom, 
-                                       HVM_PARAM_PAE_ENABLED, pae))
-             || (rc = xc_set_hvm_param(xc_handle, dom, 
-                                       HVM_PARAM_STORE_EVTCHN, store_evtchn)) )
+        if ( (frc = xc_set_hvm_param(xc_handle, dom, 
+                                     HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
+             || (frc = xc_set_hvm_param(xc_handle, dom, 
+                                        HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
+             || (frc = xc_set_hvm_param(xc_handle, dom, 
+                                        HVM_PARAM_STORE_PFN, magic_pfns[2]))
+             || (frc = xc_set_hvm_param(xc_handle, dom, 
+                                        HVM_PARAM_PAE_ENABLED, pae))
+             || (frc = xc_set_hvm_param(xc_handle, dom, 
+                                        HVM_PARAM_STORE_EVTCHN,
+                                        store_evtchn)) )
         {
-            ERROR("error setting HVM params: %i", rc);
+            ERROR("error setting HVM params: %i", frc);
             goto out;
         }
         *store_mfn = magic_pfns[2];
@@ -750,10 +757,15 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
             goto out;
         }
         
-        rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
-        if ( rc ) 
+        frc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
+        if ( frc )
+        {
             ERROR("error setting the HVM context");
-       
+            goto out;
+        }
+
+        /* HVM success! */
+        rc = 0;
         goto out;
     }
 
@@ -929,7 +941,7 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
     {
         unsigned int count = 0;
         unsigned long *pfntab;
-        int nr_frees, rc;
+        int nr_frees;
 
         if ( !read_exact(io_fd, &count, sizeof(count)) ||
              (count > (1U << 28)) ) /* up to 1TB of address space */
@@ -973,10 +985,10 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
             };
             set_xen_guest_handle(reservation.extent_start, pfntab);
 
-            if ( (rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
-                                    &reservation)) != nr_frees )
+            if ( (frc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
+                                     &reservation)) != nr_frees )
             {
-                ERROR("Could not decrease reservation : %d", rc);
+                ERROR("Could not decrease reservation : %d", frc);
                 goto out;
             }
             else
@@ -1091,13 +1103,29 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
         domctl.domain = (domid_t)dom;
         domctl.u.vcpucontext.vcpu = i;
         set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c);
-        rc = xc_domctl(xc_handle, &domctl);
-        if ( rc != 0 )
+        frc = xc_domctl(xc_handle, &domctl);
+        if ( frc != 0 )
         {
             ERROR("Couldn't build vcpu%d", i);
             goto out;
         }
-        rc = 1;
+
+        if ( !ext_vcpucontext )
+            continue;
+        if ( !read_exact(io_fd, &domctl.u.ext_vcpucontext, 128) ||
+             (domctl.u.ext_vcpucontext.vcpu != i) )
+        {
+            ERROR("Error when reading extended ctxt %d", i);
+            goto out;
+        }
+        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+        domctl.domain = dom;
+        frc = xc_domctl(xc_handle, &domctl);
+        if ( frc != 0 )
+        {
+            ERROR("Couldn't set extended vcpu%d info\n", i);
+            goto out;
+        }
     }
 
     if ( !read_exact(io_fd, shared_info_page, PAGE_SIZE) )
index 8bd1248ebc028d0e65ae4baadb0c4074ec95f5d3..1b5411476e89687c9a370aba15c140cc6f730e73 100644 (file)
@@ -777,16 +777,18 @@ static xen_pfn_t *map_and_save_p2m_table(int xc_handle,
      */
     {
         unsigned long signature = ~0UL;
-        uint32_t chunk_sz = ((guest_width==8) 
-                             ? sizeof(ctxt.x64) 
-                             : sizeof(ctxt.x32));
-        uint32_t tot_sz   = chunk_sz + 8;
-        char chunk_sig[]  = "vcpu";
+        uint32_t chunk1_sz = ((guest_width==8) 
+                              ? sizeof(ctxt.x64) 
+                              : sizeof(ctxt.x32));
+        uint32_t chunk2_sz = 0;
+        uint32_t tot_sz    = (chunk1_sz + 8) + (chunk2_sz + 8);
         if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
-             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
-             !write_exact(io_fd, &chunk_sig, 4) ||
-             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
-             !write_exact(io_fd, &ctxt,      chunk_sz) )
+             !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
+             !write_exact(io_fd, "vcpu", 4) ||
+             !write_exact(io_fd, &chunk1_sz, sizeof(chunk1_sz)) ||
+             !write_exact(io_fd, &ctxt, chunk1_sz) ||
+             !write_exact(io_fd, "extv", 4) ||
+             !write_exact(io_fd, &chunk2_sz, sizeof(chunk2_sz)) )
         {
             ERROR("write: extended info");
             goto out;
@@ -830,6 +832,7 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
                    void (*qemu_flip_buffer)(int, int))
 {
     xc_dominfo_t info;
+    DECLARE_DOMCTL;
 
     int rc = 1, frc, i, j, last_iter, iter = 0;
     int live  = (flags & XCFLAGS_LIVE);
@@ -1095,7 +1098,6 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
         while ( N < p2m_size )
         {
             unsigned int this_pc = (N * 100) / p2m_size;
-            int rc;
 
             if ( (this_pc - prev_pc) >= 5 )
             {
@@ -1107,10 +1109,10 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
             {
                 /* Slightly wasteful to peek the whole array evey time,
                    but this is fast enough for the moment. */
-                rc = xc_shadow_control(
+                frc = xc_shadow_control(
                     xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
                     p2m_size, NULL, 0, NULL);
-                if ( rc != p2m_size )
+                if ( frc != p2m_size )
                 {
                     ERROR("Error peeking shadow bitmap");
                     goto out;
@@ -1601,6 +1603,20 @@ int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
             ERROR("Error when writing to state file (1) (errno %d)", errno);
             goto out;
         }
+
+        domctl.cmd = XEN_DOMCTL_get_ext_vcpucontext;
+        domctl.domain = dom;
+        domctl.u.ext_vcpucontext.vcpu = i;
+        if ( xc_domctl(xc_handle, &domctl) < 0 )
+        {
+            ERROR("No extended context for VCPU%d", i);
+            goto out;
+        }
+        if ( !write_exact(io_fd, &domctl.u.ext_vcpucontext, 128) )
+        {
+            ERROR("Error when writing to state file (2) (errno %d)", errno);
+            goto out;
+        }
     }
 
     /*
index fc71829bb57e11b199eabc18212cb06fd1d62e87..fe90c09906773f5eae6906bb00a49ceb1bd1f16d 100644 (file)
@@ -26,7 +26,6 @@
 #include <asm/hvm/cacheattr.h>
 #include <asm/processor.h>
 #include <xsm/xsm.h>
-#include <xen/list.h>
 #include <asm/iommu.h>
 
 long arch_do_domctl(
@@ -697,6 +696,79 @@ long arch_do_domctl(
     }
     break;
 
+    case XEN_DOMCTL_set_ext_vcpucontext:
+    case XEN_DOMCTL_get_ext_vcpucontext:
+    {
+        struct xen_domctl_ext_vcpucontext *evc;
+        struct domain *d;
+        struct vcpu *v;
+
+        evc = &domctl->u.ext_vcpucontext;
+
+        ret = (evc->size < sizeof(*evc)) ? -EINVAL : 0;
+        evc->size = sizeof(*evc);
+        if ( ret != 0 )
+            break;
+
+        ret = -ESRCH;
+        d = rcu_lock_domain_by_id(domctl->domain);
+        if ( d == NULL )
+            break;
+
+        ret = -ESRCH;
+        if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
+             ((v = d->vcpu[evc->vcpu]) == NULL) )
+            goto ext_vcpucontext_out;
+
+        if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
+        {
+#ifdef __x86_64__
+            evc->sysenter_callback_cs      = v->arch.sysenter_callback_cs;
+            evc->sysenter_callback_eip     = v->arch.sysenter_callback_eip;
+            evc->sysenter_disables_events  = v->arch.sysenter_disables_events;
+            evc->syscall32_callback_cs     = v->arch.syscall32_callback_cs;
+            evc->syscall32_callback_eip    = v->arch.syscall32_callback_eip;
+            evc->syscall32_disables_events = v->arch.syscall32_disables_events;
+#else
+            evc->sysenter_callback_cs      = 0;
+            evc->sysenter_callback_eip     = 0;
+            evc->sysenter_disables_events  = 0;
+            evc->syscall32_callback_cs     = 0;
+            evc->syscall32_callback_eip    = 0;
+            evc->syscall32_disables_events = 0;
+#endif
+        }
+        else
+        {
+#ifdef __x86_64__
+            fixup_guest_code_selector(d, evc->sysenter_callback_cs);
+            v->arch.sysenter_callback_cs      = evc->sysenter_callback_cs;
+            v->arch.sysenter_callback_eip     = evc->sysenter_callback_eip;
+            v->arch.sysenter_disables_events  = evc->sysenter_disables_events;
+            fixup_guest_code_selector(d, evc->syscall32_callback_cs);
+            v->arch.syscall32_callback_cs     = evc->syscall32_callback_cs;
+            v->arch.syscall32_callback_eip    = evc->syscall32_callback_eip;
+            v->arch.syscall32_disables_events = evc->syscall32_disables_events;
+#else
+            /* We do not support syscall/syscall32/sysenter on 32-bit Xen. */
+            ret = -EINVAL;
+            if ( (evc->sysenter_callback_cs & ~3) ||
+                 evc->sysenter_callback_eip ||
+                 (evc->syscall32_callback_cs & ~3) ||
+                 evc->syscall32_callback_eip )
+                goto ext_vcpucontext_out;
+#endif
+        }
+
+        ret = 0;
+
+    ext_vcpucontext_out:
+        rcu_unlock_domain(d);
+        if ( copy_to_guest(u_domctl, domctl, 1) )
+            ret = -EFAULT;
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
index d44e04f4afc11dbbe7fd92889daab1b6433efd0d..d9f9f5dea5afa220b8c8cc5cbddf41dde63afd16 100644 (file)
@@ -515,6 +515,31 @@ typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
 
 
+#define XEN_DOMCTL_set_ext_vcpucontext 42
+#define XEN_DOMCTL_get_ext_vcpucontext 43
+struct xen_domctl_ext_vcpucontext {
+    /* IN: VCPU that this call applies to. */
+    uint32_t         vcpu;
+    /*
+     * SET: Size of struct (IN)
+     * GET: Size of struct (OUT)
+     */
+    uint32_t         size;
+#if defined(__i386__) || defined(__x86_64__)
+    /* SYSCALL from 32-bit mode and SYSENTER callback information. */
+    /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */
+    uint64_aligned_t syscall32_callback_eip;
+    uint64_aligned_t sysenter_callback_eip;
+    uint16_t         syscall32_callback_cs;
+    uint16_t         sysenter_callback_cs;
+    uint8_t          syscall32_disables_events;
+    uint8_t          sysenter_disables_events;
+#endif
+};
+typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
+
+
 struct xen_domctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -549,6 +574,7 @@ struct xen_domctl {
         struct xen_domctl_memory_mapping    memory_mapping;
         struct xen_domctl_ioport_mapping    ioport_mapping;
         struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;
+        struct xen_domctl_ext_vcpucontext   ext_vcpucontext;
         uint8_t                             pad[128];
     } u;
 };