Introduce migration precopy policy

author Jennifer Herbert <Jennifer.Herbert@citrix.com>

Mon, 25 Sep 2017 18:55:34 +0000 (19:55 +0100)

committer Wei Liu <wei.liu2@citrix.com>

Tue, 26 Sep 2017 10:10:08 +0000 (11:10 +0100)
author Jennifer Herbert <Jennifer.Herbert@citrix.com>
Mon, 25 Sep 2017 18:55:34 +0000 (19:55 +0100)
committer Wei Liu <wei.liu2@citrix.com>
Tue, 26 Sep 2017 10:10:08 +0000 (11:10 +0100)
diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h

index 6626f0c9c415267e699893ef8165ed26a8383c5c..de97f1bb8790ee52dbc7d6d1e4aa7bdf6747e17a 100644 (file)
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -39,6 +39,20 @@
   */
  struct xenevtchn_handle;
  
+/* For save's precopy_policy(). */
+struct precopy_stats
+{
+    unsigned int iteration;
+    unsigned int total_written;
+    long dirty_count; /* -1 if unknown */
+};
+
+/*
+ * A precopy_policy callback may not be running in the same address
+ * space as libxc an so precopy_stats is passed by value.
+ */
+typedef int (*precopy_policy_t)(struct precopy_stats, void *);
+
  /* callbacks provided by xc_domain_save */
  struct save_callbacks {
      /* Called after expiration of checkpoint interval,
@@ -46,7 +60,22 @@ struct save_callbacks {
       */
      int (*suspend)(void* data);
  
-    /* Called after the guest's dirty pages have been
+    /*
+     * Called before and after every batch of page data sent during
+     * the precopy phase of a live migration to ask the caller what
+     * to do next based on the current state of the precopy migration.
+     *
+     * Should return one of the values listed below:
+     */
+#define XGS_POLICY_ABORT          (-1) /* Abandon the migration entirely
+                                        * and tidy up. */
+#define XGS_POLICY_CONTINUE_PRECOPY 0  /* Remain in the precopy phase. */
+#define XGS_POLICY_STOP_AND_COPY    1  /* Immediately suspend and transmit the
+                                        * remaining dirty pages. */
+    precopy_policy_t precopy_policy;
+
+    /*
+     * Called after the guest's dirty pages have been
       *  copied into an output buffer.
       * Callback function resumes the guest & the device model,
       *  returns to xc_domain_save.
@@ -55,7 +84,8 @@ struct save_callbacks {
       */
      int (*postcopy)(void* data);
  
-    /* Called after the memory checkpoint has been flushed
+    /*
+     * Called after the memory checkpoint has been flushed
       * out into the network. Typical actions performed in this
       * callback include:
       *   (a) send the saved device model state (for HVM guests),
@@ -65,7 +95,8 @@ struct save_callbacks {
       *
       * returns:
       * 0: terminate checkpointing gracefully
-     * 1: take another checkpoint */
+     * 1: take another checkpoint
+     */
      int (*checkpoint)(void* data);
  
      /*
diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h

index a83f22af4eb1477abc0ab14c6637af7c483e7816..36357043aeca47cb37311e65d0bdf9ebaebea26e 100644 (file)
--- a/tools/libxc/xc_sr_common.h
+++ b/tools/libxc/xc_sr_common.h
@@ -198,12 +198,10 @@ struct xc_sr_context
              /* Further debugging information in the stream. */
              bool debug;
  
-            /* Parameters for tweaking live migration. */
-            unsigned max_iterations;
-            unsigned dirty_threshold;
-
              unsigned long p2m_size;
  
+            struct precopy_stats stats;
+
              xen_pfn_t *batch_pfns;
              unsigned nr_batch_pfns;
              unsigned long *deferred_pages;
diff --git a/tools/libxc/xc_sr_save.c b/tools/libxc/xc_sr_save.c

index 1e7502d7421d4f68ea9acef873db0e2c7acc4b31..5a40e588e0a5b759ffa416848fd78b93f20af85e 100644 (file)
--- a/tools/libxc/xc_sr_save.c
+++ b/tools/libxc/xc_sr_save.c
@@ -446,14 +446,13 @@ static int enable_logdirty(struct xc_sr_context *ctx)
      return 0;
  }
  
-static int update_progress_string(struct xc_sr_context *ctx,
-                                  char **str, unsigned iter)
+static int update_progress_string(struct xc_sr_context *ctx, char **str)
  {
      xc_interface *xch = ctx->xch;
      char *new_str = NULL;
+    unsigned int iter = ctx->save.stats.iteration;
  
-    if ( asprintf(&new_str, "Frames iteration %u of %u",
-                  iter, ctx->save.max_iterations) == -1 )
+    if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
      {
          PERROR("Unable to allocate new progress string");
          return -1;
@@ -466,6 +465,28 @@ static int update_progress_string(struct xc_sr_context *ctx,
      return 0;
  }
  
+/*
+ * This is the live migration precopy policy - it's called periodically during
+ * the precopy phase of live migrations, and is responsible for deciding when
+ * the precopy phase should terminate and what should be done next.
+ *
+ * The policy implemented here behaves identically to the policy previously
+ * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
+ * the live migration when there are either fewer than 50 dirty pages, or more
+ * than 5 precopy rounds have completed.
+ */
+#define SPP_MAX_ITERATIONS      5
+#define SPP_TARGET_DIRTY_COUNT 50
+
+static int simple_precopy_policy(struct precopy_stats stats, void *user)
+{
+    return ((stats.dirty_count >= 0 &&
+            stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
+            stats.iteration >= SPP_MAX_ITERATIONS)
+        ? XGS_POLICY_STOP_AND_COPY
+        : XGS_POLICY_CONTINUE_PRECOPY;
+}
+
  /*
   * Send memory while guest is running.
   */
@@ -474,21 +495,59 @@ static int send_memory_live(struct xc_sr_context *ctx)
      xc_interface *xch = ctx->xch;
      xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
      char *progress_str = NULL;
-    unsigned x;
+    unsigned int x = 0;
      int rc;
+    int policy_decision;
  
-    rc = update_progress_string(ctx, &progress_str, 0);
-    if ( rc )
-        goto out;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
  
-    rc = send_all_pages(ctx);
+    precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
+    void *data = ctx->save.callbacks->data;
+
+    struct precopy_stats *policy_stats;
+
+    rc = update_progress_string(ctx, &progress_str);
      if ( rc )
          goto out;
  
-    for ( x = 1;
-          ((x < ctx->save.max_iterations) &&
-           (stats.dirty_count > ctx->save.dirty_threshold)); ++x )
+    ctx->save.stats = (struct precopy_stats)
+        { .dirty_count   = ctx->save.p2m_size };
+    policy_stats = &ctx->save.stats;
+
+    if ( precopy_policy == NULL )
+         precopy_policy = simple_precopy_policy;
+
+    bitmap_set(dirty_bitmap, ctx->save.p2m_size);
+
+    for ( ; ; )
      {
+        policy_decision = precopy_policy(*policy_stats, data);
+        x++;
+
+        if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
+        {
+            rc = update_progress_string(ctx, &progress_str);
+            if ( rc )
+                goto out;
+
+            rc = send_dirty_pages(ctx, stats.dirty_count);
+            if ( rc )
+                goto out;
+        }
+
+        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
+            break;
+
+        policy_stats->iteration     = x;
+        policy_stats->total_written += policy_stats->dirty_count;
+        policy_stats->dirty_count   = -1;
+
+        policy_decision = precopy_policy(*policy_stats, data);
+
+        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
+           break;
+
          if ( xc_shadow_control(
                   xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
                   &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
@@ -499,16 +558,8 @@ static int send_memory_live(struct xc_sr_context *ctx)
              goto out;
          }
  
-        if ( stats.dirty_count == 0 )
-            break;
+        policy_stats->dirty_count = stats.dirty_count;
  
-        rc = update_progress_string(ctx, &progress_str, x);
-        if ( rc )
-            goto out;
-
-        rc = send_dirty_pages(ctx, stats.dirty_count);
-        if ( rc )
-            goto out;
      }
  
   out:
@@ -600,8 +651,7 @@ static int suspend_and_send_dirty(struct xc_sr_context *ctx)
  
      if ( ctx->save.live )
      {
-        rc = update_progress_string(ctx, &progress_str,
-                                    ctx->save.max_iterations);
+        rc = update_progress_string(ctx, &progress_str);
          if ( rc )
              goto out;
      }
@@ -937,15 +987,6 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
             stream_type == XC_MIG_STREAM_REMUS ||
             stream_type == XC_MIG_STREAM_COLO);
  
-    /*
-     * TODO: Find some time to better tweak the live migration algorithm.
-     *
-     * These parameters are better than the legacy algorithm especially for
-     * busy guests.
-     */
-    ctx.save.max_iterations = 5;
-    ctx.save.dirty_threshold = 50;
-
      /* Sanity checks for callbacks. */
      if ( hvm )
          assert(callbacks->switch_qemu_logdirty);
author	Jennifer Herbert <Jennifer.Herbert@citrix.com>
	Mon, 25 Sep 2017 18:55:34 +0000 (19:55 +0100)
committer	Wei Liu <wei.liu2@citrix.com>
	Tue, 26 Sep 2017 10:10:08 +0000 (11:10 +0100)
tools/libxc/include/xenguest.h		patch \| blob \| history
tools/libxc/xc_sr_common.h		patch \| blob \| history
tools/libxc/xc_sr_save.c		patch \| blob \| history