libxc/restore: support COLO restore

author Wen Congyang <wency@cn.fujitsu.com>

Wed, 15 Jul 2015 09:18:44 +0000 (17:18 +0800)

committer Changlong Xie <xiecl.fnst@cn.fujitsu.com>

Fri, 1 Apr 2016 03:07:29 +0000 (11:07 +0800)
author Wen Congyang <wency@cn.fujitsu.com>
Wed, 15 Jul 2015 09:18:44 +0000 (17:18 +0800)
committer Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Fri, 1 Apr 2016 03:07:29 +0000 (11:07 +0800)
diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h

index 8ea5a3c93787e170278c739ecf264e2a2775e524..40902ee4c9d959258e04fe9f749eda55a4eb1a71 100644 (file)
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -133,6 +133,14 @@ struct restore_callbacks {
       */
      int (*wait_checkpoint)(void* data);
  
+    /*
+     * callback to send store gfn and console gfn to xl
+     * if we want to resume vm before xc_domain_save()
+     * exits.
+     */
+    void (*restore_results)(xen_pfn_t store_gfn, xen_pfn_t console_gfn,
+                            void *data);
+
      /* to be provided as the last argument to each callback function */
      void* data;
  };
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h

index c990664d7c0b2ddb87fb6ac7a06e11ee76c7103a..cf32ab80ffc6c64b701a6076153a028d13bb0928 100644 (file)
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -214,6 +214,10 @@ struct xc_sr_context
              struct xc_sr_restore_ops ops;
              struct restore_callbacks *callbacks;
  
+            int send_back_fd;
+            unsigned long p2m_size;
+            xc_hypercall_buffer_t dirty_bitmap_hbuf;
+
              /* From Image Header. */
              uint32_t format_version;
  
@@ -222,13 +226,13 @@ struct xc_sr_context
              uint32_t guest_page_size;
  
              /* Plain VM, or checkpoints over time. */
-            bool checkpointed;
+            int checkpointed;
  
              /* Currently buffering records between a checkpoint */
              bool buffer_all_records;
  
  /*
- * With Remus, we buffer the records sent by the primary at checkpoint,
+ * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
   * in case the primary will fail, we can recover from the last
   * checkpoint state.
   * This should be enough for most of the cases because primary only send
diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c

index 3e4ca7ffdd59c794ea0165bbdfd69663bd97fddf..728edbc009356672a7706765ca3d14e2c6cd7691 100644 (file)
--- a/tools/libxc/xc_sr_restore.c
+++ b/tools/libxc/xc_sr_restore.c
@@ -411,6 +411,92 @@ static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
      return rc;
  }
  
+/*
+ * Send checkpoint dirty pfn list to primary.
+ */
+static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = -1;
+    unsigned count, written;
+    uint64_t i, *pfns = NULL;
+    struct iovec *iov = NULL;
+    xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
+    struct xc_sr_record rec =
+    {
+        .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
+    };
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
+
+    if ( xc_shadow_control(
+             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+             HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
+             NULL, 0, &stats) != ctx->restore.p2m_size )
+    {
+        PERROR("Failed to retrieve logdirty bitmap");
+        goto err;
+    }
+
+    for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
+    {
+        if ( test_bit(i, dirty_bitmap) )
+            count++;
+    }
+
+
+    pfns = malloc(count * sizeof(*pfns));
+    if ( !pfns )
+    {
+        ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
+              count * sizeof(*pfns));
+        goto err;
+    }
+
+    for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
+    {
+        if ( !test_bit(i, dirty_bitmap) )
+            continue;
+
+        if ( written > count )
+        {
+            ERROR("Dirty pfn list exceed");
+            goto err;
+        }
+
+        pfns[written++] = i;
+    }
+
+    /* iovec[] for writev(). */
+    iov = malloc(3 * sizeof(*iov));
+    if ( !iov )
+    {
+        ERROR("Unable to allocate memory for sending dirty bitmap");
+        goto err;
+    }
+
+    rec.length = count * sizeof(*pfns);
+
+    iov[0].iov_base = &rec.type;
+    iov[0].iov_len = sizeof(rec.type);
+
+    iov[1].iov_base = &rec.length;
+    iov[1].iov_len = sizeof(rec.length);
+
+    iov[2].iov_base = pfns;
+    iov[2].iov_len = count * sizeof(*pfns);
+
+    if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
+    {
+        PERROR("Failed to write dirty bitmap to stream");
+        goto err;
+    }
+
+    rc = 0;
+ err:
+    return rc;
+}
+
  static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
  static int handle_checkpoint(struct xc_sr_context *ctx)
  {
@@ -460,6 +546,53 @@ static int handle_checkpoint(struct xc_sr_context *ctx)
      else
          ctx->restore.buffer_all_records = true;
  
+    if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
+    {
+#define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
+    do {                                                    \
+        if ( ret == 1 )                                     \
+            rc = 0; /* Success */                           \
+        else                                                \
+        {                                                   \
+            if ( ret == 2 )                                 \
+                rc = BROKEN_CHANNEL;                        \
+            else                                            \
+                rc = -1; /* Some unspecified error */       \
+            goto err;                                       \
+        }                                                   \
+    } while (0)
+
+        /* COLO */
+
+        /* We need to resume guest */
+        rc = ctx->restore.ops.stream_complete(ctx);
+        if ( rc )
+            goto err;
+
+        ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
+                                                ctx->restore.console_gfn,
+                                                ctx->restore.callbacks->data);
+
+        /* Resume secondary vm */
+        ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+        /* Wait for a new checkpoint */
+        ret = ctx->restore.callbacks->wait_checkpoint(
+                                                ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+        /* suspend secondary vm */
+        ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+#undef HANDLE_CALLBACK_RETURN_VALUE
+
+        rc = send_checkpoint_dirty_pfn_list(ctx);
+        if ( rc )
+            goto err;
+    }
+
   err:
      return rc;
  }
@@ -529,6 +662,21 @@ static int setup(struct xc_sr_context *ctx)
  {
      xc_interface *xch = ctx->xch;
      int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
+
+    if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
+    {
+        dirty_bitmap = xc_hypercall_buffer_alloc_pages(xch, dirty_bitmap,
+                                NRPAGES(bitmap_size(ctx->restore.p2m_size)));
+
+        if ( !dirty_bitmap )
+        {
+            ERROR("Unable to allocate memory for dirty bitmap");
+            rc = -1;
+            goto err;
+        }
+    }
  
      rc = ctx->restore.ops.setup(ctx);
      if ( rc )
@@ -562,10 +710,15 @@ static void cleanup(struct xc_sr_context *ctx)
  {
      xc_interface *xch = ctx->xch;
      unsigned i;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
  
      for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
          free(ctx->restore.buffered_records[i].data);
  
+    if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
+        xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
+                                   NRPAGES(bitmap_size(ctx->restore.p2m_size)));
      free(ctx->restore.buffered_records);
      free(ctx->restore.populated_pfns);
      if ( ctx->restore.ops.cleanup(ctx) )
@@ -631,6 +784,15 @@ static int restore(struct xc_sr_context *ctx)
      } while ( rec.type != REC_TYPE_END );
  
   remus_failover:
+
+    if ( ctx->restore.checkpointed == XC_MIG_STREAM_COLO )
+    {
+        /* With COLO, we have already called stream_complete */
+        rc = 0;
+        IPRINTF("COLO Failover");
+        goto done;
+    }
+
      /*
       * With Remus, if we reach here, there must be some error on primary,
       * failover from the last checkpoint state.
@@ -667,6 +829,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
                        xc_migration_stream_t stream_type,
                        struct restore_callbacks *callbacks, int send_back_fd)
  {
+    xen_pfn_t nr_pfns;
      struct xc_sr_context ctx =
          {
              .xch = xch,
@@ -680,11 +843,21 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
      ctx.restore.xenstore_domid = store_domid;
      ctx.restore.checkpointed = stream_type;
      ctx.restore.callbacks = callbacks;
+    ctx.restore.send_back_fd = send_back_fd;
  
      /* Sanity checks for callbacks. */
      if ( stream_type )
          assert(callbacks->checkpoint);
  
+    if ( ctx.restore.checkpointed == XC_MIG_STREAM_COLO )
+    {
+        /* this is COLO restore */
+        assert(callbacks->suspend &&
+               callbacks->postcopy &&
+               callbacks->wait_checkpoint &&
+               callbacks->restore_results);
+    }
+
      DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d"
              ", stream_type %d", io_fd, dom, hvm, pae,
              superpages, stream_type);
@@ -706,6 +879,14 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
      if ( read_headers(&ctx) )
          return -1;
  
+    if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
+    {
+        PERROR("Unable to obtain the guest p2m size");
+        return -1;
+    }
+
+    ctx.restore.p2m_size = nr_pfns;
+
      if ( ctx.dominfo.hvm )
      {
          ctx.restore.ops = restore_ops_x86_hvm;
diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c

index a8f74a793a7dfe8bffa70c2c7f92f9bef540dcc6..04b02d84e25337ef5e82c2ac4afb58eeb087370f 100644 (file)
--- a/tools/libxl/libxl_colo_restore.c
+++ b/tools/libxl/libxl_colo_restore.c
@@ -126,11 +126,6 @@ static void colo_resume_vm(libxl__egc *egc,
          return;
      }
  
-    /*
-     * TODO: get store gfn and console gfn
-     *  We should call the callback restore_results in
-     *  xc_domain_restore() before resuming the guest.
-     */
      libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0);
  
      return;
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c

index c58dd7e60256bc49eed9dd611b8ecee9771ee331..d6c794ee5d9dc9e4518a96ee5bd202708697d1bb 100644 (file)
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -1017,6 +1017,8 @@ static void domcreate_bootloader_done(libxl__egc *egc,
      const int checkpointed_stream = dcs->restore_params.checkpointed_stream;
      libxl__colo_restore_state *const crs = &dcs->crs;
      libxl_domain_build_info *const info = &d_config->b_info;
+    libxl__srm_restore_autogen_callbacks *const callbacks =
+        &dcs->srs.shs.callbacks.restore.a;
  
      if (rc) {
          domcreate_rebuild_done(egc, dcs, rc);
@@ -1044,6 +1046,7 @@ static void domcreate_bootloader_done(libxl__egc *egc,
      }
  
      /* Restore */
+    callbacks->restore_results = libxl__srm_callout_callback_restore_results;
  
      /* COLO only supports HVM now because it does not work very
       * well with pv drivers:
diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl

index 6016706d6ef2873388ec0cc1ad2734ca028a7c03..c2243f242c5ea8e17940d099882c1192475d4e36 100755 (executable)
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -29,8 +29,8 @@ our @msgs = (
      [  6, 'srcxA',  "wait_checkpoint", [] ],
      [  7, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                                unsigned enable)] ],
-    [  8, 'r',      "restore_results",       ['unsigned long', 'store_mfn',
-                                              'unsigned long', 'console_mfn'] ],
+    [  8, 'rcx',    "restore_results",       ['unsigned long', 'store_gfn',
+                                              'unsigned long', 'console_gfn'] ],
      [  9, 'srW',    "complete",              [qw(int retval
                                                   int errnoval)] ],
  );
author	Wen Congyang <wency@cn.fujitsu.com>
	Wed, 15 Jul 2015 09:18:44 +0000 (17:18 +0800)
committer	Changlong Xie <xiecl.fnst@cn.fujitsu.com>
	Fri, 1 Apr 2016 03:07:29 +0000 (11:07 +0800)
tools/libxc/include/xenguest.h		patch \| blob \| history
tools/libxc/xc_sr_common.h		patch \| blob \| history
tools/libxc/xc_sr_restore.c		patch \| blob \| history
tools/libxl/libxl_colo_restore.c		patch \| blob \| history
tools/libxl/libxl_create.c		patch \| blob \| history
tools/libxl/libxl_save_msgs_gen.pl		patch \| blob \| history