libxl: wait for qemu to acknowledge logdirty command
authorIan Jackson <ian.jackson@eu.citrix.com>
Thu, 28 Jun 2012 17:43:23 +0000 (18:43 +0100)
committerIan Jackson <ian.jackson@eu.citrix.com>
Thu, 28 Jun 2012 17:43:23 +0000 (18:43 +0100)
The current migration code in libxl instructs qemu to start or stop
logdirty, but it does not wait for an acknowledgement from qemu before
continuing.  This might lead to memory corruption (!)

Fix this by waiting for qemu to acknowledge the command.

Unfortunately the necessary ao arrangements for waiting for this
command are unique because qemu has a special protocol for this
particular operation.

Also, this change means that the switch_qemu_logdirty callback
implementation in libxl can no longer synchronously produce its return
value, as it now needs to wait for xenstore.  So we tell the
marshalling code generator that it is a message which does not need a
reply.  This turns the callback function called by the marshaller into
one which returns void; the callback function arranges to later
explicitly sends the reply to the helper, when the xs watch triggers
and the appropriate value is read from xenstore.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
tools/libxl/libxl_dom.c
tools/libxl/libxl_internal.h
tools/libxl/libxl_save_callout.c
tools/libxl/libxl_save_msgs_gen.pl

index 57b94ed593f08af22ab7804c0f147e863bbc7d02..7e6ad84b206f6bb16d9da7e0911b5b9f6c6ce1af 100644 (file)
@@ -530,30 +530,181 @@ int libxl__toolstack_restore(uint32_t domid, const uint8_t *buf,
 static void domain_suspend_done(libxl__egc *egc,
                         libxl__domain_suspend_state *dss, int rc);
 
-/*----- callbacks, called by xc_domain_save -----*/
+/*----- complicated callback, called by xc_domain_save -----*/
+
+/*
+ * We implement the other end of protocol for controlling qemu-dm's
+ * logdirty.  There is no documentation for this protocol, but our
+ * counterparty's implementation is in
+ * qemu-xen-traditional.git:xenstore.c in the function
+ * xenstore_process_logdirty_event
+ */
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                                    const struct timeval *requested_abs);
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
+                            const char *watch_path, const char *event_path);
+static void switch_logdirty_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss, int ok);
 
-int libxl__domain_suspend_common_switch_qemu_logdirty
+static void logdirty_init(libxl__logdirty_switch *lds)
+{
+    lds->cmd_path = 0;
+    libxl__ev_xswatch_init(&lds->watch);
+    libxl__ev_time_init(&lds->timeout);
+}
+
+void libxl__domain_suspend_common_switch_qemu_logdirty
                                (int domid, unsigned enable, void *user)
 {
     libxl__save_helper_state *shs = user;
+    libxl__egc *egc = shs->egc;
     libxl__domain_suspend_state *dss = CONTAINER_OF(shs, *dss, shs);
+    libxl__logdirty_switch *lds = &dss->logdirty;
     STATE_AO_GC(dss->ao);
-    char *path;
-    bool rc;
+    int rc;
+    xs_transaction_t t = 0;
+    const char *got;
 
-    path = libxl__sprintf(gc,
+    if (!lds->cmd_path) {
+        lds->cmd_path = GCSPRINTF(
                    "/local/domain/0/device-model/%u/logdirty/cmd", domid);
-    if (!path)
-        return 1;
+        lds->ret_path = GCSPRINTF(
+                   "/local/domain/0/device-model/%u/logdirty/ret", domid);
+    }
+    lds->cmd = enable ? "enable" : "disable";
 
-    if (enable)
-        rc = xs_write(CTX->xsh, XBT_NULL, path, "enable", strlen("enable"));
-    else
-        rc = xs_write(CTX->xsh, XBT_NULL, path, "disable", strlen("disable"));
+    rc = libxl__ev_xswatch_register(gc, &lds->watch,
+                                switch_logdirty_xswatch, lds->ret_path);
+    if (rc) goto out;
+
+    rc = libxl__ev_time_register_rel(gc, &lds->timeout,
+                                switch_logdirty_timeout, 10*1000);
+    if (rc) goto out;
+
+    for (;;) {
+        rc = libxl__xs_transaction_start(gc, &t);
+        if (rc) goto out;
+
+        rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
+        if (rc) goto out;
+
+        if (got) {
+            const char *got_ret;
+            rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
+            if (rc) goto out;
 
-    return rc ? 0 : 1;
+            if (strcmp(got, got_ret)) {
+                LOG(ERROR,"controlling logdirty: qemu was already sent"
+                    " command `%s' (xenstore path `%s') but result is `%s'",
+                    got, lds->cmd_path, got_ret ? got_ret : "<none>");
+                rc = ERROR_FAIL;
+                goto out;
+            }
+            rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+            if (rc) goto out;
+        }
+
+        rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
+        if (rc) goto out;
+
+        rc = libxl__xs_transaction_commit(gc, &t);
+        if (!rc) break;
+        if (rc<0) goto out;
+    }
+
+    /* OK, wait for some callback */
+    return;
+
+ out:
+    LOG(ERROR,"logdirty switch failed (rc=%d), aborting suspend",rc);
+    switch_logdirty_done(egc,dss,-1);
+}
+
+static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                                    const struct timeval *requested_abs)
+{
+    libxl__domain_suspend_state *dss = CONTAINER_OF(ev, *dss, logdirty.timeout);
+    STATE_AO_GC(dss->ao);
+    LOG(ERROR,"logdirty switch: wait for device model timed out");
+    switch_logdirty_done(egc,dss,-1);
 }
 
+static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
+                            const char *watch_path, const char *event_path)
+{
+    libxl__domain_suspend_state *dss =
+        CONTAINER_OF(watch, *dss, logdirty.watch);
+    libxl__logdirty_switch *lds = &dss->logdirty;
+    STATE_AO_GC(dss->ao);
+    const char *got;
+    xs_transaction_t t = 0;
+    int rc;
+
+    for (;;) {
+        rc = libxl__xs_transaction_start(gc, &t);
+        if (rc) goto out;
+
+        rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
+        if (rc) goto out;
+
+        if (!got) {
+            rc = +1;
+            goto out;
+        }
+
+        if (strcmp(got, lds->cmd)) {
+            LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
+                " (xenstore paths `%s' / `%s')", lds->cmd, got,
+                lds->cmd_path, lds->ret_path);
+            rc = ERROR_FAIL;
+            goto out;
+        }
+
+        rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
+        if (rc) goto out;
+
+        rc = libxl__xs_transaction_commit(gc, &t); 
+        if (!rc) break;
+        if (rc<0) goto out;
+    }
+
+ out:
+    /* rc < 0: error
+     * rc == 0: ok, we are done
+     * rc == +1: need to keep waiting
+     */
+    libxl__xs_transaction_abort(gc, &t);
+
+    if (!rc) {
+        switch_logdirty_done(egc,dss,0);
+    } else if (rc < 0) {
+        LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
+        switch_logdirty_done(egc,dss,-1);
+    }
+}
+
+static void switch_logdirty_done(libxl__egc *egc,
+                                 libxl__domain_suspend_state *dss,
+                                 int broke)
+{
+    STATE_AO_GC(dss->ao);
+    libxl__logdirty_switch *lds = &dss->logdirty;
+
+    libxl__ev_xswatch_deregister(gc, &lds->watch);
+    libxl__ev_time_deregister(gc, &lds->timeout);
+
+    libxl__xc_domain_saverestore_async_callback_done(egc, &dss->shs, broke);
+}
+
+/*----- callbacks, called by xc_domain_save -----*/
+
 int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid)
 {
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -877,6 +1028,8 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss)
     libxl__srm_save_autogen_callbacks *const callbacks =
         &dss->shs.callbacks.save.a;
 
+    logdirty_init(&dss->logdirty);
+
     switch (type) {
     case LIBXL_DOMAIN_TYPE_HVM: {
         char *path;
index 87ca72109da4a1113cc5ea3354133f6e4485b9f1..d2176000456bed6fceba1a48b8992958a737c9df 100644 (file)
@@ -1864,6 +1864,14 @@ typedef struct libxl__domain_suspend_state libxl__domain_suspend_state;
 typedef void libxl__domain_suspend_cb(libxl__egc*,
                                       libxl__domain_suspend_state*, int rc);
 
+typedef struct libxl__logdirty_switch {
+    const char *cmd;
+    const char *cmd_path;
+    const char *ret_path;
+    libxl__ev_xswatch watch;
+    libxl__ev_time timeout;
+} libxl__logdirty_switch;
+
 struct libxl__domain_suspend_state {
     /* set by caller of libxl__domain_suspend */
     libxl__ao *ao;
@@ -1883,6 +1891,7 @@ struct libxl__domain_suspend_state {
     int guest_responded;
     int interval; /* checkpoint interval (for Remus) */
     libxl__save_helper_state shs;
+    libxl__logdirty_switch logdirty;
 };
 
 
@@ -2013,8 +2022,15 @@ _hidden void libxl__xc_domain_save(libxl__egc*, libxl__domain_suspend_state*,
 _hidden void libxl__xc_domain_save_done(libxl__egc*, void *dss_void,
                                         int rc, int retval, int errnoval);
 
+/* Used by asynchronous callbacks: ie ones which xc regards as
+ * returning a value, but which we want to handle asynchronously.
+ * Such functions' actual callback function return void in libxl
+ * When they are ready to indicate completion, they call this. */
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+                           libxl__save_helper_state *shs, int return_value);
+
 _hidden int libxl__domain_suspend_common_callback(void *data);
-_hidden int libxl__domain_suspend_common_switch_qemu_logdirty
+_hidden void libxl__domain_suspend_common_switch_qemu_logdirty
                                (int domid, unsigned int enable, void *data);
 _hidden int libxl__toolstack_save(uint32_t domid, uint8_t **buf,
         uint32_t *len, void *data);
index a6abcda123c32dcc7ea839e6e28dced1cc4a3bcd..078b7eefe571d11c613779c656ae98806c17b66b 100644 (file)
@@ -135,6 +135,14 @@ void libxl__xc_domain_save(libxl__egc *egc, libxl__domain_suspend_state *dss,
 }
 
 
+void libxl__xc_domain_saverestore_async_callback_done(libxl__egc *egc,
+                           libxl__save_helper_state *shs, int return_value)
+{
+    shs->egc = egc;
+    libxl__srm_callout_sendreply(return_value, shs);
+    shs->egc = 0;
+}
+
 /*----- helper execution -----*/
 
 static void run_helper(libxl__egc *egc, libxl__save_helper_state *shs,
index c45986ed742dc4f8a26d0aed3774996dbff3bc72..a9ac808ff0299ae4ad074129b6545a766499cef2 100755 (executable)
@@ -14,6 +14,7 @@ our @msgs = (
     #   x  - function pointer is in struct {save,restore}_callbacks
     #         and its null-ness needs to be passed through to the helper's xc
     #   W  - needs a return value; callback is synchronous
+    #   A  - needs a return value; callback is asynchronous
     [  1, 'sr',     "log",                   [qw(uint32_t level
                                                  uint32_t errnoval
                                                  STRING context
@@ -25,7 +26,7 @@ our @msgs = (
     [  3, 'scxW',   "suspend", [] ],         
     [  4, 'scxW',   "postcopy", [] ],        
     [  5, 'scxW',   "checkpoint", [] ],      
-    [  6, 'scxW',   "switch_qemu_logdirty",  [qw(int domid
+    [  6, 'scxA',   "switch_qemu_logdirty",  [qw(int domid
                                               unsigned enable)] ],
     #                toolstack_save          done entirely `by hand'
     [  7, 'rcxW',   "toolstack_restore",     [qw(uint32_t domid
@@ -262,7 +263,7 @@ foreach my $msginfo (@msgs) {
         $f_more_sr->("        int r;\n");
     }
 
-    my $c_rtype_helper = $flags =~ m/W/ ? 'int' : 'void';
+    my $c_rtype_helper = $flags =~ m/[WA]/ ? 'int' : 'void';
     my $c_rtype_callout = $flags =~ m/W/ ? 'int' : 'void';
     my $c_decl = '(';
     my $c_callback_args = '';
@@ -351,7 +352,7 @@ END_ALWAYS
     assert(len == allocd);
     ${transmit}(buf, len, user);
 ");
-    if ($flags =~ m/W/) {
+    if ($flags =~ m/[WA]/) {
        f_more("${encode}_$name",
                (<<END_ALWAYS.($debug ? <<END_DEBUG : '').<<END_ALWAYS));
     int r = ${helper}_getreply(user);