From: Keir Fraser Date: Tue, 26 May 2009 10:05:04 +0000 (+0100) Subject: Transcendent memory ("tmem") for Xen. X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~13880 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=6009f4dd;p=xen.git Transcendent memory ("tmem") for Xen. Tmem, when called from a tmem-capable (paravirtualized) guest, makes use of otherwise unutilized ("fallow") memory to create and manage pools of pages that can be accessed from the guest either as "ephemeral" pages or as "persistent" pages. In either case, the pages are not directly addressible by the guest, only copied to and fro via the tmem interface. Ephemeral pages are a nice place for a guest to put recently evicted clean pages that it might need again; these pages can be reclaimed synchronously by Xen for other guests or other uses. Persistent pages are a nice place for a guest to put "swap" pages to avoid sending them to disk. These pages retain data as long as the guest lives, but count against the guest memory allocation. Tmem pages may optionally be compressed and, in certain cases, can be shared between guests. Tmem also handles concurrency nicely and provides limited QoS settings to combat malicious DoS attempts. Save/restore and live migration support is not yet provided. Tmem is primarily targeted for an x86 64-bit hypervisor. On a 32-bit x86 hypervisor, it has limited functionality and testing due to limitations of the xen heap. Nearly all of tmem is architecture-independent; three routines remain to be ported to ia64 and it should work on that architecture too. It is also structured to be portable to non-Xen environments. Tmem defaults off (for now) and must be enabled with a "tmem" xen boot option (and does nothing unless a tmem-capable guest is running). The "tmem_compress" boot option enables compression which takes about 10x more CPU but approximately doubles the number of pages that can be stored. Tmem can be controlled via several "xm" commands and many interesting tmem statistics can be obtained. A README and internal specification will follow, but lots of useful prose about tmem, as well as Linux patches, can be found at http://oss.oracle.com/projects/tmem . Signed-off-by: Dan Magenheimer --- diff --git a/.hgignore b/.hgignore index fb16719e24..1b798d15f4 100644 --- a/.hgignore +++ b/.hgignore @@ -181,6 +181,7 @@ ^tools/misc/xc_shadow$ ^tools/misc/xen_cpuperf$ ^tools/misc/xen-detect$ +^tools/misc/xen-tmem-list-parse$ ^tools/misc/xenperf$ ^tools/misc/xenpm$ ^tools/pygrub/build/.*$ diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile index acd7067e57..e984df9088 100644 --- a/tools/libxc/Makefile +++ b/tools/libxc/Makefile @@ -21,6 +21,7 @@ CTRL_SRCS-y += xc_tbuf.c CTRL_SRCS-y += xc_pm.c CTRL_SRCS-y += xc_cpu_hotplug.c CTRL_SRCS-y += xc_resume.c +CTRL_SRCS-y += xc_tmem.c CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c diff --git a/tools/libxc/xc_tmem.c b/tools/libxc/xc_tmem.c new file mode 100644 index 0000000000..ba618ef98c --- /dev/null +++ b/tools/libxc/xc_tmem.c @@ -0,0 +1,83 @@ +/****************************************************************************** + * xc_tmem.c + * + * Copyright (C) 2008 Oracle Corp. + */ + +#include "xc_private.h" +#include + +static int do_tmem_op(int xc, tmem_op_t *op) +{ + int ret; + DECLARE_HYPERCALL; + + hypercall.op = __HYPERVISOR_tmem_op; + hypercall.arg[0] = (unsigned long)op; + if (lock_pages(op, sizeof(*op)) != 0) + { + PERROR("Could not lock memory for Xen hypercall"); + return -EFAULT; + } + if ((ret = do_xen_hypercall(xc, &hypercall)) < 0) + { + if ( errno == EACCES ) + DPRINTF("tmem operation failed -- need to" + " rebuild the user-space tool set?\n"); + } + unlock_pages(op, sizeof(*op)); + + return ret; +} + +int xc_tmem_control(int xc, + int32_t pool_id, + uint32_t subop, + uint32_t cli_id, + uint32_t arg1, + uint32_t arg2, + void *buf) +{ + tmem_op_t op; + int rc; + + op.cmd = TMEM_CONTROL; + op.pool_id = pool_id; + op.subop = subop; + op.cli_id = cli_id; + op.arg1 = arg1; + op.arg2 = arg2; + op.buf.p = buf; + + if (subop == TMEMC_LIST) { + if ((arg1 != 0) && (lock_pages(buf, arg1) != 0)) + { + PERROR("Could not lock memory for Xen hypercall"); + return -ENOMEM; + } + } + +#ifdef VALGRIND + if (arg1 != 0) + memset(buf, 0, arg1); +#endif + + rc = do_tmem_op(xc, &op); + + if (subop == TMEMC_LIST) { + if (arg1 != 0) + unlock_pages(buf, arg1); + } + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index c9b1866b60..8a54d5775d 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -1267,4 +1267,15 @@ int xc_get_vcpu_migration_delay(int xc_handle, uint32_t *value); int xc_get_cpuidle_max_cstate(int xc_handle, uint32_t *value); int xc_set_cpuidle_max_cstate(int xc_handle, uint32_t value); +/** + * tmem operations + */ +int xc_tmem_control(int xc, + int32_t pool_id, + uint32_t subop, + uint32_t cli_id, + uint32_t arg1, + uint32_t arg2, + void *buf); + #endif /* XENCTRL_H */ diff --git a/tools/misc/Makefile b/tools/misc/Makefile index c309a3f106..b6a735bfdd 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -10,7 +10,7 @@ CFLAGS += $(INCLUDES) HDRS = $(wildcard *.h) -TARGETS-y := xenperf xenpm +TARGETS-y := xenperf xenpm xen-tmem-list-parse TARGETS-$(CONFIG_X86) += xen-detect TARGETS := $(TARGETS-y) @@ -22,7 +22,7 @@ INSTALL_BIN-y := xencons INSTALL_BIN-$(CONFIG_X86) += xen-detect INSTALL_BIN := $(INSTALL_BIN-y) -INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm +INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse INSTALL_SBIN := $(INSTALL_SBIN-y) DEFAULT_PYTHON_PATH := $(shell $(XEN_ROOT)/tools/python/get-path) diff --git a/tools/misc/xen-tmem-list-parse.c b/tools/misc/xen-tmem-list-parse.c new file mode 100644 index 0000000000..383daee158 --- /dev/null +++ b/tools/misc/xen-tmem-list-parse.c @@ -0,0 +1,288 @@ +/* + * Parse output from tmem-list and reformat to human-readable + * + * NOTE: NEVER delete a parse call as this file documents backwards + * compatibility for older versions of tmem-list and we don't want to + * accidentally reuse an old tag + * + * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. + */ + +#include +#include +#include + +#define BUFSIZE 4096 +#define PAGE_SIZE 4096 + +unsigned long long parse(char *s,char *match) +{ + char *s1 = strstr(s,match); + unsigned long long ret; + + if ( s1 == NULL ) + return 0LL; + s1 += 2; + if ( *s1++ != ':' ) + return 0LL; + sscanf(s1,"%llu",&ret); + return ret; +} + +unsigned long long parse2(char *s,char *match1, char *match2) +{ + char match[3]; + match[0] = *match1; + match[1] = *match2; + match[2] = '\0'; + return parse(s,match); +} + +void parse_string(char *s,char *match, char *buf, int len) +{ + char *s1 = strstr(s,match); + int i; + + if ( s1 == NULL ) + return; + s1 += 2; + if ( *s1++ != ':' ) + return; + for ( i = 0; i < len; i++ ) + *buf++ = *s1++; +} + +void parse_sharers(char *s, char *match, char *buf, int len) +{ + char *s1 = strstr(s,match); + char *b = buf; + + if ( s1 == NULL ) + return; + while ( s1 ) + { + s1 += 2; + if (*s1++ != ':') + return; + while (*s1 <= '0' && *s1 <= '9') + *b++ = *s1++; + *b++ = ','; + s1 = strstr(s1,match); + } + if ( b != buf ) + *--b = '\0'; +} + +void parse_global(char *s) +{ + unsigned long long total_ops = parse(s,"Tt"); + unsigned long long errored_ops = parse(s,"Te"); + unsigned long long failed_copies = parse(s,"Cf"); + unsigned long long alloc_failed = parse(s,"Af"); + unsigned long long alloc_page_failed = parse(s,"Pf"); + unsigned long long avail_pages = parse(s,"Ta"); + unsigned long long low_on_memory = parse(s,"Lm"); + unsigned long long evicted_pgs = parse(s,"Et"); + unsigned long long evict_attempts = parse(s,"Ea"); + unsigned long long relinq_pgs = parse(s,"Rt"); + unsigned long long relinq_attempts = parse(s,"Ra"); + unsigned long long max_evicts_per_relinq = parse(s,"Rx"); + unsigned long long total_flush_pool = parse(s,"Fp"); + unsigned long long global_eph_count = parse(s,"Ec"); + unsigned long long global_eph_max = parse(s,"Em"); + unsigned long long obj_count = parse(s,"Oc"); + unsigned long long obj_max = parse(s,"Om"); + unsigned long long rtree_node_count = parse(s,"Nc"); + unsigned long long rtree_node_max = parse(s,"Nm"); + unsigned long long pgp_count = parse(s,"Pc"); + unsigned long long pgp_max = parse(s,"Pm"); + + printf("total tmem ops=%llu (errors=%llu) -- tmem pages avail=%llu\n", + total_ops, errored_ops, avail_pages); + printf("datastructs: objs=%llu (max=%llu) pgps=%llu (max=%llu) " + "nodes=%llu (max=%llu)\n", + obj_count, obj_max, pgp_count, pgp_max, + rtree_node_count, rtree_node_max); + printf("misc: failed_copies=%llu alloc_failed=%llu alloc_page_failed=%llu " + "low_mem=%llu evicted=%llu/%llu relinq=%llu/%llu, " + "max_evicts_per_relinq=%llu, flush_pools=%llu, " + "eph_count=%llu, eph_max=%llu\n", + failed_copies, alloc_failed, alloc_page_failed, low_on_memory, + evicted_pgs, evict_attempts, relinq_pgs, relinq_attempts, + max_evicts_per_relinq, total_flush_pool, + global_eph_count, global_eph_max); +} + +#define PARSE_CYC_COUNTER(s,x,prefix) unsigned long long \ + x##_count = parse2(s,prefix,"n"), \ + x##_sum_cycles = parse2(s,prefix,"t"), \ + x##_max_cycles = parse2(s,prefix,"x"), \ + x##_min_cycles = parse2(s,prefix,"m") +#define PRINTF_CYC_COUNTER(x,text) \ + if (x##_count) printf(text" avg=%llu, max=%llu, " \ + "min=%llu, samples=%llu\n", \ + x##_sum_cycles ? (x##_sum_cycles/x##_count) : 0, \ + x##_max_cycles, x##_min_cycles, x##_count) + +void parse_time_stats(char *s) +{ + PARSE_CYC_COUNTER(s,succ_get,"G"); + PARSE_CYC_COUNTER(s,succ_put,"P"); + PARSE_CYC_COUNTER(s,non_succ_get,"g"); + PARSE_CYC_COUNTER(s,non_succ_put,"p"); + PARSE_CYC_COUNTER(s,flush,"F"); + PARSE_CYC_COUNTER(s,flush_obj,"O"); + PARSE_CYC_COUNTER(s,pg_copy,"C"); + PARSE_CYC_COUNTER(s,compress,"c"); + PARSE_CYC_COUNTER(s,decompress,"d"); + + PRINTF_CYC_COUNTER(succ_get,"succ get cycles:"); + PRINTF_CYC_COUNTER(succ_put,"succ put cycles:"); + PRINTF_CYC_COUNTER(non_succ_get,"failed get cycles:"); + PRINTF_CYC_COUNTER(non_succ_put,"failed put cycles:"); + PRINTF_CYC_COUNTER(flush,"flush cycles:"); + PRINTF_CYC_COUNTER(flush_obj,"flush_obj cycles:"); + PRINTF_CYC_COUNTER(pg_copy,"page copy cycles:"); + PRINTF_CYC_COUNTER(compress,"compression cycles:"); + PRINTF_CYC_COUNTER(decompress,"decompression cycles:"); +} + +void parse_client(char *s) +{ + unsigned long cli_id = parse(s,"CI"); + unsigned long weight = parse(s,"ww"); + unsigned long cap = parse(s,"ca"); + unsigned long compress = parse(s,"co"); + unsigned long frozen = parse(s,"fr"); + unsigned long long eph_count = parse(s,"Ec"); + unsigned long long max_eph_count = parse(s,"Em"); + unsigned long long compressed_pages = parse(s,"cp"); + unsigned long long compressed_sum_size = parse(s,"cb"); + unsigned long long compress_poor = parse(s,"cn"); + unsigned long long compress_nomem = parse(s,"cm"); + + printf("domid%lu: weight=%lu,cap=%lu,compress=%d,frozen=%d," + "eph_count=%llu,max_eph=%llu," + "compression ratio=%lu%% (samples=%llu,poor=%llu,nomem=%llu)\n", + cli_id, weight, cap, compress?1:0, frozen?1:0, + eph_count, max_eph_count, + compressed_pages ? (long)((compressed_sum_size*100LL) / + (compressed_pages*PAGE_SIZE)) : 0, + compressed_pages, compress_poor, compress_nomem); + +} + +void parse_pool(char *s) +{ + char pool_type[3]; + unsigned long cli_id = parse(s,"CI"); + unsigned long pool_id = parse(s,"PI"); + unsigned long long pgp_count = parse(s,"Pc"); + unsigned long long max_pgp_count = parse(s,"Pm"); + unsigned long long obj_count = parse(s,"Oc"); + unsigned long long max_obj_count = parse(s,"Om"); + unsigned long long objnode_count = parse(s,"Nc"); + unsigned long long max_objnode_count = parse(s,"Nm"); + unsigned long long good_puts = parse(s,"ps"); + unsigned long long puts = parse(s,"pt"); + unsigned long long no_mem_puts = parse(s,"px"); + unsigned long long dup_puts_flushed = parse(s,"pd"); + unsigned long long dup_puts_replaced = parse(s,"pr"); + unsigned long long found_gets = parse(s,"gs"); + unsigned long long gets = parse(s,"gt"); + unsigned long long flushs_found = parse(s,"fs"); + unsigned long long flushs = parse(s,"ft"); + unsigned long long flush_objs_found = parse(s,"os"); + unsigned long long flush_objs = parse(s,"ot"); + + parse_string(s,"PT",pool_type,2); + printf("domid%lu,id%lu[%s]:pgp=%llu(max=%llu) obj=%llu(%llu) " + "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) " + "gets=%llu/%llu(%llu%%) " + "flush=%llu/%llu flobj=%llu/%llu\n", + cli_id, pool_id, pool_type, + pgp_count, max_pgp_count, obj_count, max_obj_count, + objnode_count, max_objnode_count, + good_puts, puts, no_mem_puts, + dup_puts_flushed, dup_puts_replaced, + found_gets, gets, + gets ? (found_gets*100LL)/gets : 0, + flushs_found, flushs, flush_objs_found, flush_objs); + +} + +void parse_shared_pool(char *s) +{ + char pool_type[3]; + char buf[BUFSIZE]; + unsigned long pool_id = parse(s,"PI"); + unsigned long long uid0 = parse(s,"U0"); + unsigned long long uid1 = parse(s,"U1"); + unsigned long long pgp_count = parse(s,"Pc"); + unsigned long long max_pgp_count = parse(s,"Pm"); + unsigned long long obj_count = parse(s,"Oc"); + unsigned long long max_obj_count = parse(s,"Om"); + unsigned long long objnode_count = parse(s,"Nc"); + unsigned long long max_objnode_count = parse(s,"Nm"); + unsigned long long good_puts = parse(s,"ps"); + unsigned long long puts = parse(s,"pt"); + unsigned long long no_mem_puts = parse(s,"px"); + unsigned long long dup_puts_flushed = parse(s,"pd"); + unsigned long long dup_puts_replaced = parse(s,"pr"); + unsigned long long found_gets = parse(s,"gs"); + unsigned long long gets = parse(s,"gt"); + unsigned long long flushs_found = parse(s,"fs"); + unsigned long long flushs = parse(s,"ft"); + unsigned long long flush_objs_found = parse(s,"os"); + unsigned long long flush_objs = parse(s,"ot"); + + parse_string(s,"PT",pool_type,2); + parse_sharers(s,"SC",buf,BUFSIZE); + printf("poolid=%lu[%s] uuid=%llu.%llu, shared-by:%s: " + "pgp=%llu(max=%llu) obj=%llu(%llu) " + "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) " + "gets=%llu/%llu(%llu%%) " + "flush=%llu/%llu flobj=%llu/%llu\n", + pool_id, pool_type, uid0, uid1, buf, + pgp_count, max_pgp_count, obj_count, max_obj_count, + objnode_count, max_objnode_count, + good_puts, puts, no_mem_puts, + dup_puts_flushed, dup_puts_replaced, + found_gets, gets, + gets ? (found_gets*100LL)/gets : 0, + flushs_found, flushs, flush_objs_found, flush_objs); +} + +int main(int ac, char **av) +{ + char *p, c; + char buf[BUFSIZE]; + + while ( (p = fgets(buf,BUFSIZE,stdin)) != NULL ) + { + c = *p++; + if ( *p++ != '=' ) + continue; + switch ( c ) + { + case 'G': + parse_global(p); + break; + case 'T': + parse_time_stats(p); + break; + case 'C': + parse_client(p); + break; + case 'P': + parse_pool(p); + break; + case 'S': + parse_shared_pool(p); + break; + default: + continue; + } + } + return 0; +} diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index 75a19d3d6b..5a0bf1807c 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -19,6 +19,7 @@ #include "xenctrl.h" #include +#include #include "xc_dom.h" #include #include @@ -1506,6 +1507,50 @@ static PyObject *dom_op(XcObject *self, PyObject *args, return zero; } +static PyObject *pyxc_tmem_control(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + int32_t pool_id; + uint32_t subop; + uint32_t cli_id; + uint32_t arg1; + uint32_t arg2; + char *buf; + char _buffer[32768], *buffer = _buffer; + int rc; + + static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", "buf", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiis", kwd_list, + &pool_id, &subop, &cli_id, &arg1, &arg2, &buf) ) + return NULL; + + if ( (subop == TMEMC_LIST) && (arg1 > 32768) ) + arg1 = 32768; + + if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, arg2, buffer)) < 0 ) + return Py_BuildValue("i", rc); + + switch (subop) { + case TMEMC_LIST: + return Py_BuildValue("s", buffer); + case TMEMC_FLUSH: + return Py_BuildValue("i", rc); + case TMEMC_THAW: + case TMEMC_FREEZE: + case TMEMC_DESTROY: + case TMEMC_SET_WEIGHT: + case TMEMC_SET_CAP: + case TMEMC_SET_COMPRESS: + default: + break; + } + + Py_INCREF(zero); + return zero; +} + static PyMethodDef pyxc_methods[] = { { "handle", (PyCFunction)pyxc_handle, @@ -1965,6 +2010,18 @@ static PyMethodDef pyxc_methods[] = { " dom [int]: Identifier of domain.\n" }, #endif + { "tmem_control", + (PyCFunction)pyxc_tmem_control, + METH_VARARGS | METH_KEYWORDS, "\n" + "Do various control on a tmem pool.\n" + " pool_id [int]: Identifier of the tmem pool (-1 == all).\n" + " subop [int]: Supplementary Operation.\n" + " cli_id [int]: Client identifier (-1 == all).\n" + " arg1 [int]: Argument.\n" + " arg2 [int]: Argument.\n" + " buf [str]: Buffer.\n\n" + "Returns: [int] 0 or [str] tmem info on success; exception on error.\n" }, + { NULL, NULL, 0, NULL } }; diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py index 6dda3a9c79..126db6421c 100644 --- a/tools/python/xen/xend/XendAPI.py +++ b/tools/python/xen/xend/XendAPI.py @@ -925,7 +925,15 @@ class XendAPI(object): ('dmesg', 'String'), ('dmesg_clear', 'String'), ('get_log', 'String'), - ('send_debug_keys', None)] + ('send_debug_keys', None), + ('tmem_thaw', None), + ('tmem_freeze', None), + ('tmem_flush', None), + ('tmem_destroy', None), + ('tmem_list', None), + ('tmem_set_weight', None), + ('tmem_set_cap', None), + ('tmem_set_compress', None)] host_funcs = [('get_by_name_label', None), ('list_methods', None)] @@ -1061,6 +1069,70 @@ class XendAPI(object): 'PSCSIs': XendPSCSI.get_all()} return xen_api_success(record) + def host_tmem_thaw(self, _, host_ref, cli_id): + node = XendNode.instance() + try: + node.tmem_thaw(cli_id) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_freeze(self, _, host_ref, cli_id): + node = XendNode.instance() + try: + node.tmem_freeze(cli_id) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_flush(self, _, host_ref, cli_id, pages): + node = XendNode.instance() + try: + node.tmem_flush(cli_id, pages) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_destroy(self, _, host_ref, cli_id): + node = XendNode.instance() + try: + node.tmem_destroy(cli_id) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_list(self, _, host_ref, cli_id, use_long): + node = XendNode.instance() + try: + info = node.tmem_list(cli_id, use_long) + except Exception, e: + return xen_api_error(e) + return xen_api_success(info) + + def host_tmem_set_weight(self, _, host_ref, cli_id, value): + node = XendNode.instance() + try: + node.tmem_set_weight(cli_id, value) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_set_cap(self, _, host_ref, cli_id, value): + node = XendNode.instance() + try: + node.tmem_set_cap(cli_id, value) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_set_compress(self, _, host_ref, cli_id, value): + node = XendNode.instance() + try: + node.tmem_set_compress(cli_id, value) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + # class methods def host_get_all(self, session): return xen_api_success((XendNode.instance().uuid,)) diff --git a/tools/python/xen/xend/XendConstants.py b/tools/python/xen/xend/XendConstants.py index b0a7c66abb..c25ba2935a 100644 --- a/tools/python/xen/xend/XendConstants.py +++ b/tools/python/xen/xend/XendConstants.py @@ -141,3 +141,29 @@ XS_VMROOT = "/vm/" NR_PCI_DEV = 32 AUTO_PHP_SLOT = NR_PCI_DEV AUTO_PHP_SLOT_STR = "%02x" % NR_PCI_DEV + +# +# tmem +# + +TMEM_CONTROL = 0 +TMEM_NEW_POOL = 1 +TMEM_DESTROY_POOL = 2 +TMEM_NEW_PAGE = 3 +TMEM_PUT_PAGE = 4 +TMEM_GET_PAGE = 5 +TMEM_FLUSH_PAGE = 6 +TMEM_FLUSH_OBJECT = 7 +TMEM_READ = 8 +TMEM_WRITE = 9 +TMEM_XCHG = 10 + +TMEMC_THAW = 0 +TMEMC_FREEZE = 1 +TMEMC_FLUSH = 2 +TMEMC_DESTROY = 3 +TMEMC_LIST = 4 +TMEMC_SET_WEIGHT = 5 +TMEMC_SET_CAP = 6 +TMEMC_SET_COMPRESS = 7 + diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py index d1c4055ba4..34682b90aa 100644 --- a/tools/python/xen/xend/XendNode.py +++ b/tools/python/xen/xend/XendNode.py @@ -26,6 +26,7 @@ from xen.util import pci as PciUtil from xen.util import vscsi_util from xen.xend import XendAPIStore from xen.xend import osdep +from xen.xend.XendConstants import * import uuid, arch from XendPBD import XendPBD @@ -940,6 +941,69 @@ class XendNode: def info_dict(self): return dict(self.info()) + # tmem + def tmem_list(self, cli_id, use_long): + pool_id = -1 + subop = TMEMC_LIST + arg1 = 32768 + arg2 = use_long + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_thaw(self, cli_id): + pool_id = -1 + subop = TMEMC_THAW + arg1 = 0 + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_freeze(self, cli_id): + pool_id = -1 + subop = TMEMC_FREEZE + arg1 = 0 + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_flush(self, cli_id, pages): + pool_id = -1 + subop = TMEMC_FLUSH + arg1 = pages + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_destroy(self, cli_id): + pool_id = -1 + subop = TMEMC_DESTROY + arg1 = 0 + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_set_weight(self, cli_id, arg1): + pool_id = -1 + subop = TMEMC_SET_WEIGHT + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_set_cap(self, cli_id, arg1): + pool_id = -1 + subop = TMEMC_SET_CAP + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def tmem_set_compress(self, cli_id, arg1): + pool_id = -1 + subop = TMEMC_SET_COMPRESS + arg2 = 0 + buf = '' + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + + def instance(): global inst try: diff --git a/tools/python/xen/xend/balloon.py b/tools/python/xen/xend/balloon.py index b31398c745..42c8ea0aa7 100644 --- a/tools/python/xen/xend/balloon.py +++ b/tools/python/xen/xend/balloon.py @@ -26,6 +26,7 @@ import XendOptions from XendLogging import log from XendError import VmError import osdep +from xen.xend.XendConstants import * RETRY_LIMIT = 20 RETRY_LIMIT_INCR = 5 @@ -109,6 +110,9 @@ def free(need_mem, dominfo): last_free = None rlimit = RETRY_LIMIT + # stop tmem from absorbing any more memory (must THAW when done!) + xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, "") + # If unreasonable memory size is required, we give up waiting # for ballooning or scrubbing, as if had retried. physinfo = xc.physinfo() @@ -122,6 +126,17 @@ def free(need_mem, dominfo): if need_mem >= max_free_mem: retries = rlimit + freeable_mem = free_mem + scrub_mem + if freeable_mem < need_mem and need_mem < max_free_mem: + # flush memory from tmem to scrub_mem and reobtain physinfo + need_tmem_kb = need_mem - freeable_mem + tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, "") + log.debug("Balloon: tmem relinquished %d KiB of %d KiB requested.", + tmem_kb, need_tmem_kb) + physinfo = xc.physinfo() + free_mem = physinfo['free_memory'] + scrub_mem = physinfo['scrub_memory'] + # Check whethercurrent machine is a numa system and the new # created hvm has all its vcpus in the same node, if all the # conditions above are fit. We will wait until all the pages @@ -216,4 +231,6 @@ def free(need_mem, dominfo): ' be shrunk any further')) finally: + # allow tmem to accept pages again + xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, "") del xc diff --git a/tools/python/xen/xend/server/XMLRPCServer.py b/tools/python/xen/xend/server/XMLRPCServer.py index fb9bdfee34..93c6caef1b 100644 --- a/tools/python/xen/xend/server/XMLRPCServer.py +++ b/tools/python/xen/xend/server/XMLRPCServer.py @@ -198,7 +198,11 @@ class XMLRPCServer: self.server.register_function(fn, "xend.domain.%s" % name[7:]) # Functions in XendNode and XendDmesg - for type, lst, n in [(XendNode, ['info', 'pciinfo', 'send_debug_keys'], + for type, lst, n in [(XendNode, + ['info', 'pciinfo', 'send_debug_keys', + 'tmem_list', 'tmem_freeze', 'tmem_thaw', + 'tmem_flush', 'tmem_destroy', 'tmem_set_weight', + 'tmem_set_cap', 'tmem_set_compress'], 'node'), (XendDmesg, ['info', 'clear'], 'node.dmesg')]: inst = type.instance() diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py index b7897b248a..e55fab42e5 100644 --- a/tools/python/xen/xm/main.py +++ b/tools/python/xen/xm/main.py @@ -199,6 +199,15 @@ SUBCOMMAND_HELP = { 'scsi-list' : (' [--long]', 'List all SCSI devices currently attached.'), + # tmem + 'tmem-list' : ('[-l|--long] [|-a|--all]', 'List tmem pools.'), + 'tmem-thaw' : ('[|-a|--all]', 'Thaw tmem pools.'), + 'tmem-freeze' : ('[|-a|--all]', 'Freeze tmem pools.'), + 'tmem-destroy' : ('[|-a|--all]', 'Destroy tmem pools.'), + 'tmem-set' : ('[|-a|--all] [weight=] [cap=] ' + '[compress=]', + 'Change tmem settings.'), + # security 'addlabel' : ('