xend: Memory pool for pv guest on systems with >128G memory
authorKeir Fraser <keir.fraser@citrix.com>
Wed, 2 Dec 2009 08:51:12 +0000 (08:51 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Wed, 2 Dec 2009 08:51:12 +0000 (08:51 +0000)
The main idea of this patch is:

1) The admin sets aside some memory below 128G for 32-bit paravirtual
domain creation (via dom0_mem=-<value> in kernel comand line).

2) The admin also explicitly states to the tools (i..e xend) how much
memory is supposed to be left untouched by 64-bit domains

3) If a 32-bit pv DomU gets created, no ballooning ought to be
necessary (since if it is, no guarantee can be made about the address
range of the memory ballooned out), and memory gets allocated from the
reserved range.

4) Upon 64-bit (or 32-bit HVM or HVM) DomU creation, the tools
determine the amount of memory to be ballooned out of Dom0 by adding
the amount needed for the new guest and the amount still in the
reserved pool (and then of course subtracting the total amount of
memory the hypervisor has available for guest use).

Signed-off-by: james song (wei) <jsong@novell.com>
tools/examples/xend-config.sxp
tools/libxc/xc_dom_compat_linux.c
tools/libxc/xenguest.h
tools/python/xen/lowlevel/xc/xc.c
tools/python/xen/xend/MemoryPool.py [new file with mode: 0644]
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/XendOptions.py
tools/python/xen/xend/balloon.py
tools/python/xen/xend/image.py

index 6c6dd2e253a28f75b329fb33f12b3560a9e510c8..6c056d4e05a7c8386be2fce3bf95ea4c4232922d 100644 (file)
 # If enable-dom0-ballooning = no, dom0 will never balloon out.
 (enable-dom0-ballooning yes)
 
+# 32-bit paravirtual domains can only consume physical
+# memory below 168GB. On systems with memory beyond that address,
+# they'll be confined to memory below 128GB.
+# Using total_available_memory (in GB) to specify the amount of memory reserved
+# in the memory pool exclusively for 32-bit paravirtual domains.
+# Additionally you should use dom0_mem = <-Value> as a parameter in 
+# xen kernel to reserve the memory for 32-bit paravirtual domains, default 
+# is "0" (0GB).  
+(total_available_memory 0) 
+
 # In SMP system, dom0 will use dom0-cpus # of CPUS
 # If dom0-cpus = 0, dom0 will take all cpus available
 (dom0-cpus 0)
index c00cd5abcd36ecd47480e8de9ae2417de2f30590..ef809627f87087f4b12607d915f0d36a0f66e750 100644 (file)
@@ -122,6 +122,28 @@ int xc_linux_build(int xc_handle, uint32_t domid,
     xc_dom_release(dom);
     return rc;
 }
+int xc_get_bit_size(const char *image_name, const char *cmdline, 
+                      const char *features, int *bit_size)
+{
+    struct xc_dom_image *dom;
+    int rc;
+    *bit_size = 0;
+    dom = xc_dom_allocate(cmdline, features);
+    if ( (rc = xc_dom_kernel_file(dom, image_name)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_parse_image(dom)) != 0 )
+        goto out;
+    if( dom->guest_type != NULL){
+        if(strstr(dom->guest_type, "x86_64") != NULL)
+            *bit_size = X86_64_B_SIZE; //64bit Guest 
+        if(strstr(dom->guest_type, "x86_32") != NULL)
+            *bit_size = X86_32_B_SIZE; //32bit Guest
+    }
+
+out:
+    xc_dom_release(dom);
+    return rc;
+}
 
 int xc_dom_linux_build(int xc_handle,
                        struct xc_dom_image *dom,
index 6ada19af0047311d442f4c963eeb8484d2721766..851f76985c209498b74524df98f471d58cec998e 100644 (file)
@@ -13,6 +13,8 @@
 #define XCFLAGS_DEBUG     2
 #define XCFLAGS_HVM       4
 #define XCFLAGS_STDVGA    8
+#define X86_64_B_SIZE   64 
+#define X86_32_B_SIZE   32
 
 /* callbacks provided by xc_domain_save */
 struct save_callbacks {
@@ -161,6 +163,9 @@ int xc_suspend_evtchn_init(int xc, int xce, int domid, int port);
 
 int xc_await_suspend(int xce, int suspend_evtchn);
 
+int xc_get_bit_size(const char *image_name, const char *cmdline,
+                      const char *features, int *type);
+
 int xc_mark_page_online(int xc, unsigned long start,
                         unsigned long end, uint32_t *status);
 
index aa780aa303c87a20a84b0d067643cd3248c16b7b..7ad2871765b8d5c87e1b181c5c7a333088513a53 100644 (file)
@@ -399,6 +399,25 @@ static PyObject *pyxc_vcpu_getinfo(XcObject *self,
     return info_dict;
 }
 
+static PyObject *pyxc_getBitSize(XcObject *self,
+                                    PyObject *args,
+                                    PyObject *kwds)
+{
+    PyObject *info_type;
+    char *image = NULL, *cmdline = "", *features = NULL;
+    int type = 0;
+    static char *kwd_list[] = { "image", "cmdline", "features"};
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "sss", kwd_list,
+                                      &image, &cmdline, &features) )
+        return NULL;
+    xc_get_bit_size(image, cmdline, features, &type);
+    if (type < 0)
+        return pyxc_error_to_exception();
+    info_type = Py_BuildValue("{s:i}",
+                              "type", type);
+    return info_type;
+}
+
 static PyObject *pyxc_linux_build(XcObject *self,
                                   PyObject *args,
                                   PyObject *kwds)
@@ -1777,6 +1796,13 @@ static PyMethodDef pyxc_methods[] = {
       " vcpus   [int, 1]:   Number of Virtual CPUS in domain.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
+    {"getBitSize",
+      (PyCFunction)pyxc_getBitSize,
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Get the bitsize of a guest OS.\n"
+      " image   [str]:      Name of kernel image file. May be gzipped.\n"
+      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"},
+
     { "hvm_build", 
       (PyCFunction)pyxc_hvm_build, 
       METH_VARARGS | METH_KEYWORDS, "\n"
diff --git a/tools/python/xen/xend/MemoryPool.py b/tools/python/xen/xend/MemoryPool.py
new file mode 100644 (file)
index 0000000..44e2597
--- /dev/null
@@ -0,0 +1,100 @@
+import xen.lowlevel.xc
+import XendDomain
+import XendOptions
+from XendLogging import log
+from XendError import VmError
+
+class MemoryPool:
+
+    def init(self):
+        xoptions = XendOptions.instance()
+        self.default_reserved_memory = xoptions.get_reserved_memory() * 1024 * 1024 #KiB
+        if self.default_reserved_memory <= 0:
+            return
+        self.enable_memory_pool = 1   
+        self.dom0_ballooning = xoptions.get_enable_dom0_ballooning() 
+        if not self.dom0_ballooning:
+            return
+        self.reserve_memory = 0 
+        self.untouched_memory = 0
+        #init reserved memory
+        #if not reserve_memory_size: 
+        xc = xen.lowlevel.xc.xc()
+        physinfo = xc.physinfo()
+        total_mem = physinfo['total_memory'] 
+        if total_mem < self.reserve_memory:
+            self.default_reserved_memory = total_mem
+        self.reserve_memory = self.default_reserved_memory 
+        self.untouched_memory = self.default_reserved_memory 
+        log.debug("MemoryPool: init reserved_memory %d KiB" %self.reserve_memory)
+            
+    def __init__(self): 
+        self.reserve_memory = 0 
+        self.untouched_memory = 0
+        self.default_reserved_memory = 0  
+        self.enable_memory_pool = 0   
+        self.dom0_ballooning = 0 
+    def available_memory_check(self, need_mem):
+        return self.is_enabled() and self.reserved_memory > need_mem
+
+    def decrease_memory(self, value):
+        if not self.is_enabled() or value <= 4096: #4M for PV guest kernel and ramdisk unzip
+            return 
+        elif self.reserve_memory < value: 
+            raise VMError(('I need %d KiB, but only have %d KiB in Memory Pool') %(value,self.reserve_memory))
+        else:
+            self.reserve_memory -=  value
+            log.debug("MemoryPool:  decrease_memory: decrease: %d reserved_memory %d KiB" %(value,self.reserve_memory))
+        return
+
+    def decrease_untouched_memory(self, value):
+        if not self.is_enabled():
+            return 
+        elif self.untouched_memory < value: 
+            raise VmError(('I need %d  KiB untouch mem, but only have %d KiB untouched mem in Memory Pool') %(value,self.reserve_memory))
+        else:
+            self.untouched_memory -= value
+            log.debug("MemoryPool: decrease_untouched_memory: untouched_memory %d KiB" %self.untouched_memory)
+        return
+
+    def increase_memory(self, value):
+        if not self.is_enabled():
+            return  
+        else:
+            self.reserve_memory += value
+            if self.reserve_memory > self.default_reserved_memory:
+                raise VmError(('the maxsize of memory pool is %d KiB, but current is %d KiB') %(value,self.reserve_memory))
+            log.debug("MemoryPool: increase_memory:%d, reserved_memory %d KiB" %(value,self.reserve_memory))
+        return
+
+    def is_enabled(self):
+        return self.enable_memory_pool and self.dom0_ballooning
+    
+    def get_pool_size(self): 
+        if self.is_enabled():
+            return self.default_reserved_memory
+        else:
+            return 0
+
+    def get_left_memory(self):
+        if self.is_enabled():
+            return self.reserve_memory
+        else:
+            return 0
+
+    def get_untouched_memory(self):
+        if self.is_enabled():
+            return self.untouched_memory
+        else:
+            return 0
+
+def instance():
+    """Singleton constructor. Use this instead of the class constructor.
+    """
+    global MP_inst
+    try:
+        MP_inst
+    except:
+        MP_inst = MemoryPool()
+        MP_inst.init()
+    return MP_inst        
index dcb3f462ad6b24f53d0c5b5b3efdd84d245622c9..7cbf25ecc0697ac2f4fb0f35a8f448b936fe88ac 100644 (file)
@@ -326,6 +326,10 @@ class XendDomainInfo:
     @type info: dictionary
     @ivar domid: Domain ID (if VM has started)
     @type domid: int or None
+    @ivar guest_bitsize: the bitsize of guest 
+    @type guest_bitsize: int or None
+    @ivar alloc_mem: the memory domain allocated when booting 
+    @type alloc_mem: int or None 
     @ivar vmpath: XenStore path to this VM.
     @type vmpath: string
     @ivar dompath: XenStore path to this Domain.
@@ -383,6 +387,8 @@ class XendDomainInfo:
             self.domid =  self.info.get('domid')
         else:
             self.domid = domid
+        self.guest_bitsize = None
+        self.alloc_mem = None
         
         #REMOVE: uuid is now generated in XendConfig
         #if not self._infoIsSet('uuid'):
@@ -2757,6 +2763,7 @@ class XendDomainInfo:
             # Round vtd_mem up to a multiple of a MiB.
             vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
 
+            self.guest_bitsize = self.image.getBitSize()
             # Make sure there's enough RAM available for the domain
             balloon.free(memory + shadow + vtd_mem, self)
 
@@ -2947,7 +2954,6 @@ class XendDomainInfo:
 
         if self.domid is None:
             return
-
         from xen.xend import XendDomain
         log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
 
@@ -2968,6 +2974,12 @@ class XendDomainInfo:
             XendDomain.instance().remove_domain(self)
             self.cleanupDomain()
 
+        if self.info.is_hvm() or self.guest_bitsize != 32:
+            if self.alloc_mem:
+                import MemoryPool 
+                log.debug("%s KiB need to add to Memory pool" %self.alloc_mem)
+                MemoryPool.instance().increase_memory(self.alloc_mem)
+
         self._cleanup_phantom_devs(paths)
         self._cleanupVm()
 
index e4a79825b76c908f844a51c887dd01deddf476a9..13145a111ef48ab7ad2daae972f61ca7b49b341c 100644 (file)
@@ -105,6 +105,8 @@ class XendOptions:
 
     dom0_min_mem_default = 0
 
+    reserved_memory_default = 0
+
     dom0_vcpus_default = 0
 
     vncpasswd_default = None
@@ -364,6 +366,13 @@ class XendOptions:
         return self.get_config_bool('enable-dom0-ballooning',
                                     enable_dom0_ballooning_default)
 
+    def get_reserved_memory(self):
+        if not self.get_enable_dom0_ballooning():
+            return 0 #no ballooning of dom0 will close this item
+        else:
+            return self.get_config_int('total_available_memory', self.reserved_memory_default)
+
+
     def get_dom0_vcpus(self):
         return self.get_config_int('dom0-cpus', self.dom0_vcpus_default)
 
index b5d312970adef3a0bf43a790c6fc819c9e85d94a..aa1c40ff24403d77564dfbe2124b2fa29e9f2d13 100644 (file)
@@ -23,6 +23,7 @@ import xen.lowlevel.xc
 
 import XendDomain
 import XendOptions
+import MemoryPool
 from XendLogging import log
 from XendError import VmError
 import osdep
@@ -97,10 +98,22 @@ def free(need_mem, dominfo):
     xoptions = XendOptions.instance()
     dom0 = XendDomain.instance().privilegedDomain()
     xc = xen.lowlevel.xc.xc()
-
+    memory_pool = MemoryPool.instance() 
     try:
         dom0_min_mem = xoptions.get_dom0_min_mem() * 1024
         dom0_ballooning = xoptions.get_enable_dom0_ballooning()
+        guest_size = 0
+        hvm = dominfo.info.is_hvm()
+        if memory_pool.is_enabled() and dominfo.domid:
+            if not hvm :
+                if need_mem <= 4 * 1024: 
+                    guest_size = 32
+                else:
+                    guest_size = dominfo.image.getBitSize()
+            if guest_size == 32:
+                dom0_ballooning = 0
+        else: #no ballooning as memory pool enabled
+            dom0_ballooning = xoptions.get_enable_dom0_ballooning()
         dom0_alloc = get_dom0_current_alloc()
 
         retries = 0
@@ -109,6 +122,11 @@ def free(need_mem, dominfo):
         last_new_alloc = None
         last_free = None
         rlimit = RETRY_LIMIT
+        mem_need_balloon = 0
+        left_memory_pool = 0
+        mem_target = 0
+        untouched_memory_pool = 0
+        real_need_mem = need_mem
 
         # stop tmem from absorbing any more memory (must THAW when done!)
         xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, 0, "")
@@ -119,6 +137,25 @@ def free(need_mem, dominfo):
         free_mem = physinfo['free_memory']
         scrub_mem = physinfo['scrub_memory']
         total_mem = physinfo['total_memory']
+        if memory_pool.is_enabled() and dominfo.domid:
+            if guest_size != 32 or hvm:
+                if need_mem > 4 * 1024: 
+                    dominfo.alloc_mem = need_mem
+                left_memory_pool = memory_pool.get_left_memory()
+                if need_mem > left_memory_pool:
+                    dominfo.alloc_mem = 0
+                    raise VmError(('Not enough free memory'
+                                   ' so I cannot release any more.  '
+                                   'I need %d KiB but only have %d in the pool.') %
+                                   (need_mem, memory_pool.get_left_memory()))
+                else:
+                    untouched_memory_pool = memory_pool.get_untouched_memory()
+                    if (left_memory_pool - untouched_memory_pool) > need_mem:
+                        dom0_ballooning = 0
+                    else:
+                        mem_need_balloon = need_mem - left_memory_pool + untouched_memory_pool
+                        need_mem = free_mem + scrub_mem + mem_need_balloon
+
         if dom0_ballooning:
             max_free_mem = total_mem - dom0_min_mem
         else:
@@ -170,15 +207,17 @@ def free(need_mem, dominfo):
 
             retries = 0
             sleep_time = SLEEP_TIME_GROWTH
-
         while retries < rlimit:
             physinfo = xc.physinfo()
             free_mem = physinfo['free_memory']
             scrub_mem = physinfo['scrub_memory']
-
             if free_mem >= need_mem:
-                log.debug("Balloon: %d KiB free; need %d; done.",
-                          free_mem, need_mem)
+                if (guest_size != 32 or hvm) and dominfo.domid:
+                    memory_pool.decrease_untouched_memory(mem_need_balloon)
+                    memory_pool.decrease_memory(real_need_mem)
+                else:
+                    log.debug("Balloon: %d KiB free; need %d; done.",
+                              free_mem, need_mem)
                 return
 
             if retries == 0:
@@ -189,7 +228,6 @@ def free(need_mem, dominfo):
             if dom0_ballooning:
                 dom0_alloc = get_dom0_current_alloc()
                 new_alloc = dom0_alloc - (need_mem - free_mem - scrub_mem)
-
                 if free_mem + scrub_mem >= need_mem:
                     if last_new_alloc == None:
                         log.debug("Balloon: waiting on scrubbing")
@@ -213,11 +251,13 @@ def free(need_mem, dominfo):
 
         # Not enough memory; diagnose the problem.
         if not dom0_ballooning:
+            dominfo.alloc_mem = 0 
             raise VmError(('Not enough free memory and enable-dom0-ballooning '
                            'is False, so I cannot release any more.  '
                            'I need %d KiB but only have %d.') %
                           (need_mem, free_mem))
         elif new_alloc < dom0_min_mem:
+            dominfo.alloc_mem = 0 
             raise VmError(
                 ('I need %d KiB, but dom0_min_mem is %d and shrinking to '
                  '%d KiB would leave only %d KiB free.') %
@@ -226,6 +266,7 @@ def free(need_mem, dominfo):
         else:
             dom0_start_alloc_mb = get_dom0_current_alloc() / 1024
             dom0.setMemoryTarget(dom0_start_alloc_mb)
+            dominfo.alloc_mem = 0 
             raise VmError(
                 ('Not enough memory is available, and dom0 cannot'
                  ' be shrunk any further'))
index bec74098382ca3e9fcbb6847076f6ca8b8dacfaa..7dd7cf0200aaf1aac07a3506921cb95ec8e9fdb5 100644 (file)
@@ -736,6 +736,12 @@ class LinuxImageHandler(ImageHandler):
                               vhpt           = self.vhpt,
                               superpages     = self.superpages)
 
+    def getBitSize(self):
+        return xc.getBitSize(image    = self.kernel,
+                                 cmdline  = self.cmdline,
+                                 features = self.vm.getFeatures()
+                                 ).get('type') 
+
     def getRequiredAvailableMemory(self, mem_kb):
         if self.is_stubdom :
             mem_kb += self.vramsize
@@ -768,6 +774,9 @@ class HVMImageHandler(ImageHandler):
         ImageHandler.__init__(self, vm, vmConfig)
         self.shutdownWatch = None
         self.rebootFeatureWatch = None
+    
+    def getBitSize(self):
+        return None
 
     def configure(self, vmConfig):
         ImageHandler.configure(self, vmConfig)
@@ -1011,6 +1020,9 @@ class X86_HVM_ImageHandler(HVMImageHandler):
         rc = HVMImageHandler.buildDomain(self)
         self.setCpuid()
         return rc
+    
+    def getBitSize(self):
+        return None
 
     def getRequiredAvailableMemory(self, mem_kb):
         return mem_kb + self.vramsize