From 9aec2ef5c9029d4fe6ee07dc1b0731e309a338ac Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Wed, 17 Feb 2016 15:10:27 +0800 Subject: [PATCH] COLO: use qemu block replication Use qemu block replication as our block replication solution. Note that guest must be paused before starting COLO, otherwise, the disk won't be consistent between primary and secondary. Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang Signed-off-by: Changlong Xie Acked-by: Ian Jackson --- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo.h | 15 ++ tools/libxl/libxl_colo_qdisk.c | 230 +++++++++++++++++++++++++++++++ tools/libxl/libxl_colo_restore.c | 42 +++++- tools/libxl/libxl_colo_save.c | 54 +++++++- tools/libxl/libxl_internal.h | 5 + 6 files changed, 342 insertions(+), 5 deletions(-) create mode 100644 tools/libxl/libxl_colo_qdisk.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index c5ef3f0e46..701c069844 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -66,6 +66,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o +LIBXL_OBJS-y += libxl_colo_qdisk.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h index feec7f1aa9..90345f46d2 100644 --- a/tools/libxl/libxl_colo.h +++ b/tools/libxl/libxl_colo.h @@ -19,6 +19,7 @@ struct libxl__ao; struct libxl__egc; struct libxl__colo_save_state; +struct libxl__checkpoint_devices_state; enum { LIBXL_COLO_SETUPED, @@ -26,6 +27,10 @@ enum { LIBXL_COLO_RESUMED, }; +typedef struct libxl__colo_qdisk { + bool setuped; +} libxl__colo_qdisk; + typedef struct libxl__domain_create_state libxl__domain_create_state; typedef void libxl__domain_create_cb(struct libxl__egc *egc, libxl__domain_create_state *dcs, @@ -47,8 +52,18 @@ struct libxl__colo_restore_state { /* private, colo restore checkpoint state */ libxl__domain_create_cb *saved_cb; void *crcs; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; + const char *host; + const char *port; }; +int init_subkind_qdisk(struct libxl__checkpoint_devices_state *cds); + +void cleanup_subkind_qdisk(struct libxl__checkpoint_devices_state *cds); + extern void libxl__colo_restore_setup(struct libxl__egc *egc, libxl__colo_restore_state *crs); extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void, diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c new file mode 100644 index 0000000000..c23b81bae8 --- /dev/null +++ b/tools/libxl/libxl_colo_qdisk.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2016 FUJITSU LIMITED + * Author: Wen Congyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +/* ========== init() and cleanup() ========== */ + +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ + /* + * We don't know if we use qemu block replication, so + * we cannot start block replication here. + */ + return 0; +} + +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== setup() and teardown() ========== */ + +static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + libxl__colo_qdisk *colo_qdisk = NULL; + char port[32]; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const char *host = disk->colo_host; + const char *export_name = disk->colo_export; + const int domid = cds->domid; + + STATE_AO_GC(dev->cds->ao); + + if (disk->backend != LIBXL_DISK_BACKEND_QDISK || + !libxl_defbool_val(disk->colo_enable) || + !host || !export_name || (disk->colo_port <= 0) || + !disk->active_disk || !disk->hidden_disk) { + rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH; + goto out; + } + + dev->matched = true; + + GCNEW(colo_qdisk); + dev->concrete_data = colo_qdisk; + + if (primary) { + libxl__colo_save_state *css = cds->concrete_data; + + css->qdisk_used = true; + /* NBD server is not ready, so we cannot start block replication now */ + goto out; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + sprintf(port, "%d", disk->colo_port); + + if (!crs->qdisk_used) { + /* start nbd server */ + ret = libxl__qmp_nbd_server_start(gc, domid, host, port); + if (ret) { + rc = ERROR_FAIL; + goto out; + } + crs->host = host; + crs->port = port; + } else { + if (strcmp(crs->host, host) || strcmp(crs->port, port)) { + LOG(ERROR, "The host and port of all disks must be the same"); + rc = ERROR_FAIL; + goto out; + } + } + + crs->qdisk_used = true; + + ret = libxl__qmp_nbd_server_add(gc, domid, export_name); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + int ret, rc = 0; + const libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const int domid = cds->domid; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (primary) { + if (!colo_qdisk->setuped) + goto out; + + /* + * There is no way to get the child name, but we know it is children.1 + */ + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, + "children.1", NULL); + if (ret) + rc = ERROR_FAIL; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (crs->qdisk_used) { + ret = libxl__qmp_nbd_server_stop(gc, domid); + if (ret) + rc = ERROR_FAIL; + } + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ========== checkpointing APIs ========== */ + +static void colo_qdisk_save_preresume(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + char *node = NULL; + char *cmd = NULL; + + /* Convenience aliases */ + const int domid = dev->cds->domid; + const char *host = disk->colo_host; + int port = disk->colo_port; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (colo_qdisk->setuped) + goto out; + + /* qmp command doesn't support the driver "nbd" */ + node = GCSPRINTF("colo_node%d", + libxl__device_disk_dev_number(disk->vdev, NULL, NULL)); + cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary," + "file.driver=nbd,file.host=%s,file.port=%d," + "file.export=%s,node-name=%s,if=none", + host, port, export_name, node); + ret = libxl__qmp_hmp(gc, domid, cmd); + if (ret) + rc = ERROR_FAIL; + + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ======== primary ======== */ + +static void colo_qdisk_save_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, true); +} + +static void colo_qdisk_save_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, true); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_save_setup, + .teardown = colo_qdisk_save_teardown, + .preresume = colo_qdisk_save_preresume, +}; + +/* ======== secondary ======== */ + +static void colo_qdisk_restore_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, false); +} + +static void colo_qdisk_restore_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, false); +} + +const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_restore_setup, + .teardown = colo_qdisk_restore_teardown, +}; diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c index 04b02d84e2..2ab69ed5be 100644 --- a/tools/libxl/libxl_colo_restore.c +++ b/tools/libxl/libxl_colo_restore.c @@ -37,7 +37,10 @@ struct libxl__colo_restore_checkpoint_state { int); }; +extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = { + &colo_restore_device_qdisk, NULL, }; @@ -137,7 +140,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -145,6 +152,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================ colo: setup restore environment ================ */ @@ -213,6 +222,8 @@ void libxl__colo_restore_setup(libxl__egc *egc, GCNEW(crcs); crs->crcs = crcs; crcs->crs = crs; + crs->qdisk_setuped = false; + crs->qdisk_used = false; /* setup dsps */ crcs->dsps.ao = ao; @@ -301,6 +312,11 @@ void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void, } libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval); + if (crs->qdisk_setuped) { + libxl__qmp_stop_replication(gc, crs->domid, false); + crs->qdisk_setuped = false; + } + crcs->saved_rc = rc; if (!crcs->teardown_devices) { colo_restore_teardown_devices_done(egc, &dcs->cds, 0); @@ -573,6 +589,13 @@ static void colo_restore_preresume_cb(libxl__egc *egc, goto out; } + if (crs->qdisk_setuped) { + if (libxl__qmp_do_checkpoint(gc, crs->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + colo_restore_resume_vm(egc, crcs); return; @@ -730,8 +753,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc, STATE_AO_GC(crs->ao); - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->callback = colo_restore_setup_cds_done; cds->ao = ao; cds->domid = crs->domid; @@ -768,6 +791,14 @@ static void colo_restore_setup_cds_done(libxl__egc *egc, goto out; } + if (crs->qdisk_used && !crs->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, crs->domid, false)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + crs->qdisk_setuped = true; + } + colo_send_svm_ready(egc, crcs); return; @@ -922,13 +953,18 @@ static void colo_suspend_vm_done(libxl__egc *egc, crcs->status = LIBXL_COLO_SUSPENDED; + if (libxl__qmp_get_replication_error(gc, crs->domid)) { + LOG(ERROR, "replication error occurs when secondary vm is running"); + goto out; + } + cds->callback = colo_restore_postsuspend_cb; libxl__checkpoint_devices_postsuspend(egc, cds); return; out: - libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc); + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0); } static void colo_restore_postsuspend_cb(libxl__egc *egc, diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c index cca6bdeb22..d73e632c28 100644 --- a/tools/libxl/libxl_colo_save.c +++ b/tools/libxl/libxl_colo_save.c @@ -18,7 +18,10 @@ #include "libxl_internal.h" +extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_ops[] = { + &colo_save_device_qdisk, NULL, }; @@ -30,7 +33,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -38,6 +45,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================= colo: setup save environment ================= */ @@ -79,9 +88,12 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css) css->send_fd = dss->fd; css->recv_fd = dss->recv_fd; css->svm_running = false; + css->paused = true; + css->qdisk_setuped = false; + css->qdisk_used = false; - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->ops = colo_ops; cds->callback = colo_save_setup_done; cds->ao = ao; @@ -163,6 +175,11 @@ void libxl__colo_save_teardown(libxl__egc *egc, libxl__stream_read_abort(egc, &css->srs, 1); + if (css->qdisk_setuped) { + libxl__qmp_stop_replication(gc, dss->domid, true); + css->qdisk_setuped = false; + } + dss->cds.callback = colo_teardown_done; libxl__checkpoint_devices_teardown(egc, &dss->cds); return; @@ -291,6 +308,11 @@ static void colo_read_svm_suspended_done(libxl__egc *egc, goto out; } + if (!css->paused && libxl__qmp_get_replication_error(gc, dss->domid)) { + LOG(ERROR, "replication error occurs when primary vm is running"); + goto out; + } + ok = 1; out: @@ -389,12 +411,40 @@ static void colo_preresume_cb(libxl__egc *egc, goto out; } + if (css->qdisk_used && !css->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, dss->domid, true)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + css->qdisk_setuped = true; + } + + if (!css->paused) { + if (libxl__qmp_do_checkpoint(gc, dss->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + /* Resumes the domain and the device model */ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) { LOG(ERROR, "cannot resume primary vm"); goto out; } + /* + * The guest should be paused before doing colo because there is + * no disk migration. + */ + if (css->paused) { + rc = libxl_domain_unpause(CTX, dss->domid); + if (rc) { + LOG(ERROR, "cannot unpause primary vm"); + goto out; + } + css->paused = false; + } + /* read CHECKPOINT_SVM_RESUMED */ css->callback = colo_read_svm_resumed_done; css->srs.checkpoint_callback = colo_common_read_stream_done; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 148df055bf..c3366d7a3b 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3211,6 +3211,11 @@ struct libxl__colo_save_state { libxl__stream_read_state srs; void (*callback)(libxl__egc *, libxl__colo_save_state *, int); bool svm_running; + bool paused; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; }; typedef struct libxl__logdirty_switch { -- 2.30.2