From: Yang Hongyang Date: Wed, 11 Jun 2014 03:29:44 +0000 (+0800) Subject: xl/remus: cmdline switches and config vars to control network buffering X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~4316^2~3 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=d2dec181db41f9402271f53ebb1bd5e17fa6746f;p=xen.git xl/remus: cmdline switches and config vars to control network buffering Add two members in libxl_domain_remus_info: netbuf: whether netbuf is enabled netbufscript: the path of the script which will be run to setup and tear down the guest's interface. Add cmdline switches to 'xl remus' command to enable or disable network buffering and a domain-specific hotplug script to setup network buffering. Add a new config var 'remus.default.netbufscript' to xl.conf, that allows the user to override the default global script used to setup network buffering. Note: Network buffering is enabled by default. Disabling network buffering requires enabling unsafe mode. Signed-off-by: Shriram Rajagopalan Signed-off-by: Lai Jiangshan Reviewed-by: Wen Congyang Acked-by: Ian Jackson Acked-by: Konrad Rzeszutek Wilk --- diff --git a/docs/man/xl.conf.pod.5 b/docs/man/xl.conf.pod.5 index 7c43bde4fa..8ae19bb5f3 100644 --- a/docs/man/xl.conf.pod.5 +++ b/docs/man/xl.conf.pod.5 @@ -105,6 +105,12 @@ Configures the default gateway device to set for virtual network devices. Default: C +=item B + +Configures the default script used by Remus to setup network buffering. + +Default: C + =item B Configures the default output format used by xl when printing "machine diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index 2ae3007d53..1f165adf18 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -436,7 +436,7 @@ Enable Remus HA for domain. By default B relies on ssh as a transport mechanism between the two hosts. N.B: Remus support in xl is still in experimental (proof-of-concept) phase. - There is no support for network or disk buffering at the moment. + There is no support for disk buffering at the moment. B @@ -460,6 +460,11 @@ If empty, run instead of ssh xl migrate-receive -r [-e]. On the new host, do not wait in the background (on ) for the death of the domain. See the corresponding option of the I subcommand. +=item B<-N> I + +Use to setup network buffering instead of the +default script (/etc/xen/scripts/remus-netbuf-setup). + =item B<-F> Run Remus in unsafe mode. Use this option with caution as failover may @@ -470,6 +475,10 @@ not work as intended. Replicate memory checkpoints to /dev/null (blackhole). Generally useful for debugging. Requires enabling unsafe mode. +=item B<-n> + +Disable network output buffering. Requires enabling unsafe mode. + =back =item B I diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 332b7dfde5..e0e1b4446c 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -807,13 +807,17 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, libxl_defbool_setdefault(&info->allow_unsafe, false); libxl_defbool_setdefault(&info->blackhole, false); libxl_defbool_setdefault(&info->compression, true); + libxl_defbool_setdefault(&info->netbuf, true); if (!libxl_defbool_val(info->allow_unsafe) && - libxl_defbool_val(info->blackhole)) { - LOG(ERROR, "Unsafe mode must be enabled to replicate to /dev/null"); + (libxl_defbool_val(info->blackhole) || + !libxl_defbool_val(info->netbuf))) { + LOG(ERROR, "Unsafe mode must be enabled to replicate to /dev/null and " + "disable network buffering"); goto out; } + GCNEW(dss); dss->ao = ao; dss->callback = remus_failover_cb; @@ -830,11 +834,13 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, /* Convenience aliases */ libxl__remus_devices_state *const rds = &dss->rds; - if (!libxl__netbuffer_enabled(gc)) { - LOG(ERROR, "Remus: No support for network buffering"); - goto out; + if (libxl_defbool_val(info->netbuf)) { + if (!libxl__netbuffer_enabled(gc)) { + LOG(ERROR, "Remus: No support for network buffering"); + goto out; + } + rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF); } - rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF); rds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VBD); rds->ao = ao; diff --git a/tools/libxl/libxl_netbuffer.c b/tools/libxl/libxl_netbuffer.c index 72e0ad0fd5..edc6843253 100644 --- a/tools/libxl/libxl_netbuffer.c +++ b/tools/libxl/libxl_netbuffer.c @@ -41,6 +41,7 @@ int libxl__netbuffer_enabled(libxl__gc *gc) int init_subkind_nic(libxl__remus_devices_state *rds) { int rc, ret; + libxl__domain_suspend_state *dss = CONTAINER_OF(rds, *dss, rds); STATE_AO_GC(rds->ao); @@ -68,8 +69,12 @@ int init_subkind_nic(libxl__remus_devices_state *rds) goto out; } - rds->netbufscript = GCSPRINTF("%s/remus-netbuf-setup", - libxl__xen_script_dir_path()); + if (dss->remus->netbufscript) { + rds->netbufscript = libxl__strdup(gc, dss->remus->netbufscript); + } else { + rds->netbufscript = GCSPRINTF("%s/remus-netbuf-setup", + libxl__xen_script_dir_path()); + } rc = 0; diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 0fea5b6e5a..494d37e0a4 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -614,6 +614,8 @@ libxl_domain_remus_info = Struct("domain_remus_info",[ ("allow_unsafe", libxl_defbool), ("blackhole", libxl_defbool), ("compression", libxl_defbool), + ("netbuf", libxl_defbool), + ("netbufscript", string), ]) libxl_event_type = Enumeration("event_type", [ diff --git a/tools/libxl/xl.c b/tools/libxl/xl.c index 4c5a5ee472..f0143063ee 100644 --- a/tools/libxl/xl.c +++ b/tools/libxl/xl.c @@ -44,6 +44,7 @@ char *default_vifscript = NULL; char *default_bridge = NULL; char *default_gatewaydev = NULL; char *default_vifbackend = NULL; +char *default_remus_netbufscript = NULL; enum output_format default_output_format = OUTPUT_FORMAT_JSON; int claim_mode = 1; bool progress_use_cr = 0; @@ -176,6 +177,9 @@ static void parse_global_config(const char *configfile, if (!xlu_cfg_get_long (config, "claim_mode", &l, 0)) claim_mode = l; + xlu_cfg_replace_string (config, "remus.default.netbufscript", + &default_remus_netbufscript, 0); + xlu_cfg_destroy(config); } diff --git a/tools/libxl/xl.h b/tools/libxl/xl.h index 6a6a0f918d..6c7aa8ecdf 100644 --- a/tools/libxl/xl.h +++ b/tools/libxl/xl.h @@ -171,6 +171,7 @@ extern char *default_vifscript; extern char *default_bridge; extern char *default_gatewaydev; extern char *default_vifbackend; +extern char *default_remus_netbufscript; extern char *blkdev_start; enum output_format { diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index edcfa649f5..48a3a41902 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -7497,7 +7497,7 @@ int main_remus(int argc, char **argv) r_info.interval = 200; libxl_defbool_setdefault(&r_info.blackhole, false); - SWITCH_FOREACH_OPT(opt, "Fbui:s:e", NULL, "remus", 2) { + SWITCH_FOREACH_OPT(opt, "Fbuni:s:N:e", NULL, "remus", 2) { case 'i': r_info.interval = atoi(optarg); break; @@ -7510,6 +7510,12 @@ int main_remus(int argc, char **argv) case 'u': libxl_defbool_set(&r_info.compression, false); break; + case 'n': + libxl_defbool_set(&r_info.netbuf, false); + break; + case 'N': + r_info.netbufscript = optarg; + break; case 's': ssh_command = optarg; break; @@ -7521,6 +7527,9 @@ int main_remus(int argc, char **argv) domid = find_domain(argv[optind]); host = argv[optind + 1]; + if (!r_info.netbufscript) + r_info.netbufscript = default_remus_netbufscript; + if (libxl_defbool_val(r_info.blackhole)) { send_fd = open("/dev/null", O_RDWR, 0644); if (send_fd < 0) { @@ -7558,13 +7567,19 @@ int main_remus(int argc, char **argv) /* Point of no return */ rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0); - /* If we are here, it means backup has failed/domain suspend failed. - * Try to resume the domain and exit gracefully. - * TODO: Split-Brain check. + /* check if the domain exists. User may have xl destroyed the + * domain to force failover */ - fprintf(stderr, "remus sender: libxl_domain_suspend failed" - " (rc=%d)\n", rc); + if (libxl_domain_info(ctx, 0, domid)) { + fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); + close(send_fd); + return 0; + } + /* If we are here, it means remus setup/domain suspend/backup has + * failed. Try to resume the domain and exit gracefully. + * TODO: Split-Brain check. + */ if (rc == ERROR_GUEST_TIMEDOUT) fprintf(stderr, "Failed to suspend domain at primary.\n"); else { diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c index 08f3c90b11..cd1b6123f1 100644 --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -501,10 +501,13 @@ struct cmd_spec cmd_table[] = { " ssh xl migrate-receive -r [-e]\n" "-e Do not wait in the background (on ) for the death\n" " of the domain.\n" - "-F Enable unsafe configurations [-b flags]. Use this option\n" + "-N Use netbufscript to setup network buffering instead of the\n" + " default script (/etc/xen/scripts/remus-netbuf-setup).\n" + "-F Enable unsafe configurations [-b|-n flags]. Use this option\n" " with caution as failover may not work as intended.\n" "-b Replicate memory checkpoints to /dev/null (blackhole).\n" - " Works only in unsafe mode." + " Works only in unsafe mode.\n" + "-n Disable network output buffering. Works only in unsafe mode." }, #endif { "devd",