x86/mm: Improve ring management for memory events. Do not lose guest events

author Andres Lagar-Cavilla <andres@lagarcavilla.org>

Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)

committer Andres Lagar-Cavilla <andres@lagarcavilla.org>

Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)
author Andres Lagar-Cavilla <andres@lagarcavilla.org>
Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)
committer Andres Lagar-Cavilla <andres@lagarcavilla.org>
Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c

index 160a47f4b684942cb30e4ff79d7a49a43a9dd44a..01c1411e0bf8102fbd963ee8df7876ed2b52d26f 100644 (file)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4198,15 +4198,21 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
  
      if ( (p & HVMPME_onchangeonly) && (value == old) )
          return 1;
-    
-    rc = mem_event_check_ring(d, &d->mem_event->access);
-    if ( rc )
+
+    rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc == -ENOSYS )
+    {
+        /* If there was no ring to handle the event, then
+         * simple continue executing normally. */
+        return 1;
+    }
+    else if ( rc < 0 )
          return rc;
-    
+
      memset(&req, 0, sizeof(req));
      req.type = MEM_EVENT_TYPE_ACCESS;
      req.reason = reason;
-    
+
      if ( (p & HVMPME_MODE_MASK) == HVMPME_mode_sync ) 
      {
          req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;    
diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c

index d666dfc7d8cc8b2b9c82428220adcb7865f8a6e8..0752e3273a837dbeb3742c6928e2d6640028e0f1 100644 (file)
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
  
  #include <asm/domain.h>
  #include <xen/event.h>
+#include <xen/wait.h>
  #include <asm/p2m.h>
  #include <asm/mem_event.h>
  #include <asm/mem_paging.h>
@@ -41,6 +42,7 @@ static int mem_event_enable(
      struct domain *d,
      xen_domctl_mem_event_op_t *mec,
      struct mem_event_domain *med,
+    int pause_flag,
      xen_event_channel_notification_t notification_fn)
  {
      int rc;
@@ -75,6 +77,8 @@ static int mem_event_enable(
          return -EINVAL;
      }
  
+    mem_event_ring_lock_init(med);
+
      /* Get MFN of shared page */
      guest_get_eff_l1e(v, shared_addr, &l1e);
      shared_gfn = l1e_get_pfn(l1e);
@@ -93,6 +97,9 @@ static int mem_event_enable(
      put_gfn(dom_mem_event, ring_gfn);
      put_gfn(dom_mem_event, shared_gfn); 
  
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
      /* Allocate event channel */
      rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                           current->domain->domain_id,
@@ -108,10 +115,11 @@ static int mem_event_enable(
                      (mem_event_sring_t *)med->ring_page,
                      PAGE_SIZE);
  
-    mem_event_ring_lock_init(med);
+    /* Save the pause flag for this particular ring. */
+    med->pause_flag = pause_flag;
  
-    /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    /* Initialize the last-chance wait queue. */
+    init_waitqueue_head(&med->wq);
  
      return 0;
  
@@ -125,26 +133,177 @@ static int mem_event_enable(
      return rc;
  }
  
-static int mem_event_disable(struct mem_event_domain *med)
+static unsigned int mem_event_ring_available(struct mem_event_domain *med)
  {
-    unmap_domain_page(med->ring_page);
-    med->ring_page = NULL;
+    int avail_req = RING_FREE_REQUESTS(&med->front_ring);
+    avail_req -= med->target_producers;
+    avail_req -= med->foreign_producers;
  
-    unmap_domain_page(med->shared_page);
-    med->shared_page = NULL;
+    BUG_ON(avail_req < 0);
+
+    return avail_req;
+}
+
+/*
+ * mem_event_wake_blocked() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void mem_event_wake_blocked(struct domain *d, struct mem_event_domain *med)
+{
+    struct vcpu *v;
+    int online = d->max_vcpus;
+    unsigned int avail_req = mem_event_ring_available(med);
+
+    if ( avail_req == 0 || med->blocked == 0 )
+        return;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit).
+     * See comment below in mem_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(med->pause_flag, &v->pause_flags) )
+            online--;
+
+    ASSERT(online == (d->max_vcpus - med->blocked));
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        int i, j, k;
+
+        for (i = med->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v )
+                continue;
+
+            if ( !(med->blocked) || online >= avail_req )
+               break;
+
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                med->blocked--;
+                med->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue.  These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void mem_event_wake_queued(struct domain *d, struct mem_event_domain *med)
+{
+    unsigned int avail_req = mem_event_ring_available(med);
+
+    if ( avail_req > 0 )
+        wake_up_nr(&med->wq, avail_req);
+}
+
+/*
+ * mem_event_wake() will wakeup all vcpus waiting for the ring to
+ * become available.  If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call mem_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void mem_event_wake(struct domain *d, struct mem_event_domain *med)
+{
+    if (!list_empty(&med->wq.list))
+        mem_event_wake_queued(d, med);
+    else
+        mem_event_wake_blocked(d, med);
+}
+
+static int mem_event_disable(struct domain *d, struct mem_event_domain *med)
+{
+    if ( med->ring_page )
+    {
+        struct vcpu *v;
+
+        mem_event_ring_lock(med);
+
+        if ( !list_empty(&med->wq.list) )
+        {
+            mem_event_ring_unlock(med);
+            return -EBUSY;
+        }
+
+        unmap_domain_page(med->ring_page);
+        med->ring_page = NULL;
+
+        unmap_domain_page(med->shared_page);
+        med->shared_page = NULL;
+
+        /* Unblock all vCPUs */
+        for_each_vcpu ( d, v )
+        {
+            if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                med->blocked--;
+            }
+        }
+
+        mem_event_ring_unlock(med);
+    }
  
      return 0;
  }
  
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static inline void mem_event_release_slot(struct domain *d,
+                                          struct mem_event_domain *med)
  {
-    mem_event_front_ring_t *front_ring;
-    RING_IDX req_prod;
+    /* Update the accounting */
+    if ( current->domain == d )
+        med->target_producers--;
+    else
+        med->foreign_producers--;
+
+    /* Kick any waiters */
+    mem_event_wake(d, med);
+}
  
-    mem_event_ring_lock(med);
+/*
+ * mem_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
+{
+    if ( !test_and_set_bit(med->pause_flag, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        med->blocked++;
+    }
+}
  
-    front_ring = &med->front_ring;
-    req_prod = front_ring->req_prod_pvt;
+/*
+ * This must be preceded by a call to claim_slot(), and is guaranteed to
+ * succeed.  As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting.  The vCPU will be automatically unpaused when the ring clears.
+ */
+void mem_event_put_request(struct domain *d,
+                           struct mem_event_domain *med,
+                           mem_event_request_t *req)
+{
+    mem_event_front_ring_t *front_ring;
+    int free_req;
+    unsigned int avail_req;
+    RING_IDX req_prod;
  
      if ( current->domain != d )
      {
@@ -152,21 +311,38 @@ void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_e
          ASSERT( !(req->flags & MEM_EVENT_FLAG_VCPU_PAUSED) );
      }
  
+    mem_event_ring_lock(med);
+
+    /* Due to the reservations, this step must succeed. */
+    front_ring = &med->front_ring;
+    free_req = RING_FREE_REQUESTS(front_ring);
+    ASSERT(free_req > 0);
+
      /* Copy request */
+    req_prod = front_ring->req_prod_pvt;
      memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
      req_prod++;
  
      /* Update ring */
-    med->req_producers--;
      front_ring->req_prod_pvt = req_prod;
      RING_PUSH_REQUESTS(front_ring);
  
+    /* We've actually *used* our reservation, so release the slot. */
+    mem_event_release_slot(d, med);
+
+    /* Give this vCPU a black eye if necessary, on the way out.
+     * See the comments above wake_blocked() for more information
+     * on how this mechanism works to avoid waiting. */
+    avail_req = mem_event_ring_available(med);
+    if( current->domain == d && avail_req < d->max_vcpus )
+        mem_event_mark_and_pause(current, med);
+
      mem_event_ring_unlock(med);
  
      notify_via_xen_event_channel(d, med->xen_port);
  }
  
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp)
  {
      mem_event_front_ring_t *front_ring;
      RING_IDX rsp_cons;
@@ -190,57 +366,81 @@ int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *r
      front_ring->rsp_cons = rsp_cons;
      front_ring->sring->rsp_event = rsp_cons + 1;
  
+    /* Kick any waiters -- since we've just consumed an event,
+     * there may be additional space available in the ring. */
+    mem_event_wake(d, med);
+
      mem_event_ring_unlock(med);
  
      return 1;
  }
  
-void mem_event_unpause_vcpus(struct domain *d)
-{
-    struct vcpu *v;
-
-    for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
-            vcpu_wake(v);
-}
-
-void mem_event_mark_and_pause(struct vcpu *v)
-{
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med)
  {
      mem_event_ring_lock(med);
-    med->req_producers--;
+    mem_event_release_slot(d, med);
      mem_event_ring_unlock(med);
  }
  
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+static int mem_event_grab_slot(struct mem_event_domain *med, int foreign)
  {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
+    unsigned int avail_req;
  
      if ( !med->ring_page )
-        return -1;
+        return -ENOSYS;
  
      mem_event_ring_lock(med);
  
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
+    avail_req = mem_event_ring_available(med);
+    if ( avail_req == 0 )
      {
-        med->req_producers++;
-        ring_full = 0;
+        mem_event_ring_unlock(med);
+        return -EBUSY;
      }
  
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
+    if ( !foreign )
+        med->target_producers++;
+    else
+        med->foreign_producers++;
  
      mem_event_ring_unlock(med);
  
-    return ring_full;
+    return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int mem_event_wait_try_grab(struct mem_event_domain *med, int *rc)
+{
+    *rc = mem_event_grab_slot(med, 0);
+    return *rc;
+}
+
+/* Call mem_event_grab_slot() until the ring doesn't exist, or is available. */
+static int mem_event_wait_slot(struct mem_event_domain *med)
+{
+    int rc = -EBUSY;
+    wait_event(med->wq, mem_event_wait_try_grab(med, &rc) != -EBUSY);
+    return rc;
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function.  If it is a guest vCPU, then we
+ * use mem_event_wait_slot() to reserve a slot.  As long as there is a ring,
+ * this function will always return 0 for a guest.  For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ *               -EBUSY: the ring is busy
+ *               0: a spot has been reserved
+ *
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
+{
+    if ( current->domain == d )
+        return mem_event_wait_slot(med);
+    else
+        return mem_event_grab_slot(med, 1);
  }
  
  /* Registered with Xen-bound event channel for incoming notifications. */
@@ -316,14 +516,14 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
              if ( p2m->pod.entry_count )
                  break;
  
-            rc = mem_event_enable(d, mec, med, mem_paging_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_paging, mem_paging_notification);
          }
          break;
  
          case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
          {
              if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
          }
          break;
  
@@ -355,14 +555,14 @@ int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
                  break;
  
-            rc = mem_event_enable(d, mec, med, mem_access_notification);
+            rc = mem_event_enable(d, mec, med, _VPF_mem_access, mem_access_notification);
          }
          break;
  
          case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
          {
              if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
          }
          break;
  
diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c

index 7726d5ba70a3ec2c18e8023cd755a62fcb10a9f7..946242cafa9cb246c4e841560ca0956241bdffec 100644 (file)
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -253,18 +253,10 @@ static void mem_sharing_audit(void)
  #endif
  
  
-static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
  {
-    struct page_info* page;
      struct vcpu *v = current;
-    mem_event_request_t req;
-
-    page = alloc_domheap_page(d, 0); 
-    if(page != NULL) return page;
-
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_SHARED;
+    mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
  
      if ( v->domain != d )
      {
@@ -275,20 +267,21 @@ static struct page_info* mem_sharing_alloc_page(struct domain *d,
          gdprintk(XENLOG_ERR, 
                   "Failed alloc on unshare path for foreign (%d) lookup\n",
                   d->domain_id);
-        return page;
+        return;
      }
  
-    vcpu_pause_nosync(v);
-    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+    {
+        return;
+    }
  
-    if(mem_event_check_ring(d, &d->mem_event->share)) return page;
+    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
+    vcpu_pause_nosync(v);
  
      req.gfn = gfn;
      req.p2mt = p2m_ram_shared;
      req.vcpu_id = v->vcpu_id;
      mem_event_put_request(d, &d->mem_event->share, &req);
-
-    return page;
  }
  
  unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -301,7 +294,7 @@ int mem_sharing_sharing_resume(struct domain *d)
      mem_event_response_t rsp;
  
      /* Get all requests off the ring */
-    while ( mem_event_get_response(&d->mem_event->share, &rsp) )
+    while ( mem_event_get_response(d, &d->mem_event->share, &rsp) )
      {
          if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
              continue;
@@ -658,13 +651,14 @@ gfn_found:
      if(ret == 0) goto private_page_found;
          
      old_page = page;
-    page = mem_sharing_alloc_page(d, gfn);
+    page = alloc_domheap_page(d, 0);
      if(!page) 
      {
          /* We've failed to obtain memory for private page. Need to re-add the
           * gfn_info to relevant list */
          list_add(&gfn_info->list, &hash_entry->gfns);
          put_gfn(d, gfn);
+        mem_sharing_notify_helper(d, gfn);
          shr_unlock();
          return -ENOMEM;
      }
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c

index 6a1cad08bc447b991f7b07d0ca04212c04a85ea0..91851bb883800c5427074602268001e456a2497d 100644 (file)
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -861,20 +861,23 @@ int p2m_mem_paging_evict(struct domain *d, unsigned long gfn)
   */
  void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
  {
-    struct vcpu *v = current;
      mem_event_request_t req;
  
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
-    {
-        /* Send release notification to pager */
-        memset(&req, 0, sizeof(req));
-        req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
-        req.gfn = gfn;
-        req.vcpu_id = v->vcpu_id;
+    /* We allow no ring in this unique case, because it won't affect
+     * correctness of the guest execution at this point.  If this is the only
+     * page that happens to be paged-out, we'll be okay..  but it's likely the
+     * guest will crash shortly anyways. */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc < 0 )
+        return;
  
-        mem_event_put_request(d, &d->mem_event->paging, &req);
-    }
+    /* Send release notification to pager */
+    memset(&req, 0, sizeof(req));
+    req.type = MEM_EVENT_TYPE_PAGING;
+    req.gfn = gfn;
+    req.flags = MEM_EVENT_FLAG_DROP_PAGE;
+
+    mem_event_put_request(d, &d->mem_event->paging, &req);
  }
  
  /**
@@ -907,8 +910,16 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
      mfn_t mfn;
      struct p2m_domain *p2m = p2m_get_hostp2m(d);
  
-    /* Check that there's space on the ring for this request */
-    if ( mem_event_check_ring(d, &d->mem_event->paging) )
+    /* We're paging. There should be a ring */
+    int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+    if ( rc == -ENOSYS )
+    {
+        gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
+                             "in place\n", d->domain_id, gfn);
+        domain_crash(d);
+        return;
+    }
+    else if ( rc < 0 )
          return;
  
      memset(&req, 0, sizeof(req));
@@ -929,7 +940,7 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
      p2m_unlock(p2m);
  
      /* Pause domain if request came from guest and gfn has paging type */
-    if (  p2m_is_paging(p2mt) && v->domain == d )
+    if ( p2m_is_paging(p2mt) && v->domain == d )
      {
          vcpu_pause_nosync(v);
          req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
@@ -938,7 +949,7 @@ void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
      else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
      {
          /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_event->paging);
+        mem_event_cancel_slot(d, &d->mem_event->paging);
          return;
      }
  
@@ -1065,7 +1076,7 @@ void p2m_mem_paging_resume(struct domain *d)
      mfn_t mfn;
  
      /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->paging, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
      {
          if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
              continue;
@@ -1090,9 +1101,6 @@ void p2m_mem_paging_resume(struct domain *d)
          if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
      }
-
-    /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
  }
  
  bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla, 
@@ -1103,7 +1111,6 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
      unsigned long gfn = gpa >> PAGE_SHIFT;
      struct domain *d = v->domain;    
      struct p2m_domain* p2m = p2m_get_hostp2m(d);
-    int res;
      mfn_t mfn;
      p2m_type_t p2mt;
      p2m_access_t p2ma;
@@ -1126,17 +1133,16 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
      p2m_unlock(p2m);
  
      /* Otherwise, check if there is a memory event listener, and send the message along */
-    res = mem_event_check_ring(d, &d->mem_event->access);
-    if ( res < 0 ) 
+    if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS )
      {
          /* No listener */
          if ( p2m->access_required ) 
          {
-            printk(XENLOG_INFO 
-                   "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
-                   v->vcpu_id, d->domain_id);
-
-            mem_event_mark_and_pause(v);
+            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+                                  "no mem_event listener VCPU %d, dom %d\n",
+                                  v->vcpu_id, d->domain_id);
+            domain_crash(v->domain);
+            return 0;
          }
          else
          {
@@ -1149,11 +1155,7 @@ bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long g
              }
              return 1;
          }
-
-        return 0;
      }
-    else if ( res > 0 )
-        return 0;  /* No space in buffer; VCPU paused */
  
      memset(&req, 0, sizeof(req));
      req.type = MEM_EVENT_TYPE_ACCESS;
@@ -1188,7 +1190,7 @@ void p2m_mem_access_resume(struct domain *d)
      mem_event_response_t rsp;
  
      /* Pull all responses off the ring */
-    while( mem_event_get_response(&d->mem_event->access, &rsp) )
+    while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
      {
          if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
              continue;
@@ -1196,13 +1198,8 @@ void p2m_mem_access_resume(struct domain *d)
          if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
              vcpu_unpause(d->vcpu[rsp.vcpu_id]);
      }
-
-    /* Unpause any domains that were paused because the ring was full or no listener 
-     * was available */
-    mem_event_unpause_vcpus(d);
  }
  
-
  /* Set access type for a region of pfns.
   * If start_pfn == -1ul, sets the default access type */
  int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
diff --git a/xen/include/asm-x86/mem_event.h b/xen/include/asm-x86/mem_event.h

index c0233052e9849f38aeade1fdf6accc8aab943ece..c157900c8032a279c5e04f573b8f158a33710187 100644 (file)
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,18 +24,24 @@
  #ifndef __MEM_EVENT_H__
  #define __MEM_EVENT_H__
  
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
+ * available space. For success or -EBUSY, the vCPU may be left blocked
+ * temporarily to ensure that the ring does not lose future events.  In
+ * general, you must follow a claim_slot() call with either put_request() or
+ * cancel_slot(), both of which are guaranteed to succeed. */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med);
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                           mem_event_response_t *rsp);
  
  int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                       XEN_GUEST_HANDLE(void) u_domctl);
  
-
  #endif /* __MEM_EVENT_H__ */
  
  
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h

index cea95b27f8641776207b2893291c19b810860b88..313a459be3c4216a38c91044f9d04e5bead99f7d 100644 (file)
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -317,6 +317,8 @@ page_list_splice(struct page_list_head *list, struct page_list_head *head)
  
  void scrub_one_page(struct page_info *);
  
+/* Returns 1 on success, 0 on error, negative if the ring
+ * for event propagation is full in the presence of paging */
  int guest_remove_page(struct domain *d, unsigned long gmfn);
  
  #define RAM_TYPE_CONVENTIONAL 0x00000001
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h

index 6441546401853d5d9abc1ee94ab2e4c71fafd419..dbfb8b39f334e0363f911a1d91b0e28410104c6d 100644 (file)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -13,6 +13,7 @@
  #include <xen/nodemask.h>
  #include <xen/radix-tree.h>
  #include <xen/multicall.h>
+#include <xen/wait.h>
  #include <public/xen.h>
  #include <public/domctl.h>
  #include <public/sysctl.h>
@@ -182,7 +183,9 @@ struct mem_event_domain
  {
      /* ring lock */
      spinlock_t ring_lock;
-    unsigned int req_producers;
+    /* The ring has 64 entries */
+    unsigned char foreign_producers;
+    unsigned char target_producers;
      /* shared page */
      mem_event_shared_page_t *shared_page;
      /* shared ring page */
@@ -191,6 +194,14 @@ struct mem_event_domain
      mem_event_front_ring_t front_ring;
      /* event channel port (vcpu0 only) */
      int xen_port;
+    /* mem_event bit for vcpu->pause_flags */
+    int pause_flag;
+    /* list of vcpus waiting for room in the ring */
+    struct waitqueue_head wq;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
  };
  
  struct mem_event_per_domain
@@ -614,9 +625,12 @@ static inline struct domain *next_domain_in_cpupool(
   /* VCPU affinity has changed: migrating to a new CPU. */
  #define _VPF_migrating       3
  #define VPF_migrating        (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event       4
-#define VPF_mem_event        (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging      4
+#define VPF_mem_paging       (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access      5
+#define VPF_mem_access       (1UL<<_VPF_mem_access)
  
  static inline int vcpu_runnable(struct vcpu *v)
  {
author	Andres Lagar-Cavilla <andres@lagarcavilla.org>
	Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)
committer	Andres Lagar-Cavilla <andres@lagarcavilla.org>
	Thu, 19 Jan 2012 10:38:47 +0000 (10:38 +0000)
xen/arch/x86/hvm/hvm.c		patch \| blob \| history
xen/arch/x86/mm/mem_event.c		patch \| blob \| history
xen/arch/x86/mm/mem_sharing.c		patch \| blob \| history
xen/arch/x86/mm/p2m.c		patch \| blob \| history
xen/include/asm-x86/mem_event.h		patch \| blob \| history
xen/include/xen/mm.h		patch \| blob \| history
xen/include/xen/sched.h		patch \| blob \| history