libxc/save: implement Remus checkpointed save
authorYang Hongyang <yanghy@cn.fujitsu.com>
Mon, 18 May 2015 07:03:55 +0000 (15:03 +0800)
committerIan Campbell <ian.campbell@citrix.com>
Fri, 29 May 2015 11:25:00 +0000 (12:25 +0100)
With Remus, the save flow should be:
live migration->{ periodically save(checkpointed save) }

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
CC: Ian Jackson <Ian.Jackson@eu.citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
tools/libxc/xc_sr_save.c

index c08a49e04c6b1dfc26310bb78ce45a78588e980b..d63b783828b39af51a384f7dda4b542048484617 100644 (file)
@@ -56,6 +56,16 @@ static int write_end_record(struct xc_sr_context *ctx)
     return write_record(ctx, &end);
 }
 
+/*
+ * Writes a CHECKPOINT record into the stream.
+ */
+static int write_checkpoint_record(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record checkpoint = { REC_TYPE_CHECKPOINT, 0, NULL };
+
+    return write_record(ctx, &checkpoint);
+}
+
 /*
  * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
  * is constructed in ctx->save.batch_pfns.
@@ -627,6 +637,14 @@ static int send_domain_memory_live(struct xc_sr_context *ctx)
     return rc;
 }
 
+/*
+ * Checkpointed save.
+ */
+static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
+{
+    return suspend_and_send_dirty(ctx);
+}
+
 /*
  * Send all domain memory, pausing the domain first.  Generally used for
  * suspend-to-file.
@@ -726,29 +744,53 @@ static int save(struct xc_sr_context *ctx, uint16_t guest_type)
     if ( rc )
         goto err;
 
-    rc = ctx->save.ops.start_of_checkpoint(ctx);
-    if ( rc )
-        goto err;
+    do {
+        rc = ctx->save.ops.start_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
 
-    if ( ctx->save.live )
-        rc = send_domain_memory_live(ctx);
-    else
-        rc = send_domain_memory_nonlive(ctx);
+        if ( ctx->save.live )
+            rc = send_domain_memory_live(ctx);
+        else if ( ctx->save.checkpointed )
+            rc = send_domain_memory_checkpointed(ctx);
+        else
+            rc = send_domain_memory_nonlive(ctx);
 
-    if ( rc )
-        goto err;
+        if ( rc )
+            goto err;
 
-    if ( !ctx->dominfo.shutdown ||
-         (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain has not been suspended");
-        rc = -1;
-        goto err;
-    }
+        if ( !ctx->dominfo.shutdown ||
+             (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+        {
+            ERROR("Domain has not been suspended");
+            rc = -1;
+            goto err;
+        }
 
-    rc = ctx->save.ops.end_of_checkpoint(ctx);
-    if ( rc )
-        goto err;
+        rc = ctx->save.ops.end_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
+
+        if ( ctx->save.checkpointed )
+        {
+            /*
+             * We have now completed the initial live portion of the checkpoint
+             * process. Therefore switch into periodically sending synchronous
+             * batches of pages.
+             */
+            ctx->save.live = false;
+
+            rc = write_checkpoint_record(ctx);
+            if ( rc )
+                goto err;
+
+            ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
+
+            rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+            if ( rc <= 0 )
+                ctx->save.checkpointed = false;
+        }
+    } while ( ctx->save.checkpointed );
 
     xc_report_progress_single(xch, "End of stream");