#include <asm/domain.h>
#include <xen/event.h>
+#include <xen/wait.h>
#include <asm/p2m.h>
#include <asm/mem_event.h>
#include <asm/mem_paging.h>
struct domain *d,
xen_domctl_mem_event_op_t *mec,
struct mem_event_domain *med,
+ int pause_flag,
xen_event_channel_notification_t notification_fn)
{
int rc;
return -EINVAL;
}
+ mem_event_ring_lock_init(med);
+
/* Get MFN of shared page */
guest_get_eff_l1e(v, shared_addr, &l1e);
shared_gfn = l1e_get_pfn(l1e);
put_gfn(dom_mem_event, ring_gfn);
put_gfn(dom_mem_event, shared_gfn);
+ /* Set the number of currently blocked vCPUs to 0. */
+ med->blocked = 0;
+
/* Allocate event channel */
rc = alloc_unbound_xen_event_channel(d->vcpu[0],
current->domain->domain_id,
(mem_event_sring_t *)med->ring_page,
PAGE_SIZE);
- mem_event_ring_lock_init(med);
+ /* Save the pause flag for this particular ring. */
+ med->pause_flag = pause_flag;
- /* Wake any VCPUs paused for memory events */
- mem_event_unpause_vcpus(d);
+ /* Initialize the last-chance wait queue. */
+ init_waitqueue_head(&med->wq);
return 0;
return rc;
}
-static int mem_event_disable(struct mem_event_domain *med)
+static unsigned int mem_event_ring_available(struct mem_event_domain *med)
{
- unmap_domain_page(med->ring_page);
- med->ring_page = NULL;
+ int avail_req = RING_FREE_REQUESTS(&med->front_ring);
+ avail_req -= med->target_producers;
+ avail_req -= med->foreign_producers;
- unmap_domain_page(med->shared_page);
- med->shared_page = NULL;
+ BUG_ON(avail_req < 0);
+
+ return avail_req;
+}
+
+/*
+ * mem_event_wake_blocked() will wakeup vcpus waiting for room in the
+ * ring. These vCPUs were paused on their way out after placing an event,
+ * but need to be resumed where the ring is capable of processing at least
+ * one event from them.
+ */
+static void mem_event_wake_blocked(struct domain *d, struct mem_event_domain *med)
+{
+ struct vcpu *v;
+ int online = d->max_vcpus;
+ unsigned int avail_req = mem_event_ring_available(med);
+
+ if ( avail_req == 0 || med->blocked == 0 )
+ return;
+
+ /*
+ * We ensure that we only have vCPUs online if there are enough free slots
+ * for their memory events to be processed. This will ensure that no
+ * memory events are lost (due to the fact that certain types of events
+ * cannot be replayed, we need to ensure that there is space in the ring
+ * for when they are hit).
+ * See comment below in mem_event_put_request().
+ */
+ for_each_vcpu ( d, v )
+ if ( test_bit(med->pause_flag, &v->pause_flags) )
+ online--;
+
+ ASSERT(online == (d->max_vcpus - med->blocked));
+
+ /* We remember which vcpu last woke up to avoid scanning always linearly
+ * from zero and starving higher-numbered vcpus under high load */
+ if ( d->vcpu )
+ {
+ int i, j, k;
+
+ for (i = med->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+ {
+ k = i % d->max_vcpus;
+ v = d->vcpu[k];
+ if ( !v )
+ continue;
+
+ if ( !(med->blocked) || online >= avail_req )
+ break;
+
+ if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+ {
+ vcpu_unpause(v);
+ online++;
+ med->blocked--;
+ med->last_vcpu_wake_up = k;
+ }
+ }
+ }
+}
+
+/*
+ * In the event that a vCPU attempted to place an event in the ring and
+ * was unable to do so, it is queued on a wait queue. These are woken as
+ * needed, and take precedence over the blocked vCPUs.
+ */
+static void mem_event_wake_queued(struct domain *d, struct mem_event_domain *med)
+{
+ unsigned int avail_req = mem_event_ring_available(med);
+
+ if ( avail_req > 0 )
+ wake_up_nr(&med->wq, avail_req);
+}
+
+/*
+ * mem_event_wake() will wakeup all vcpus waiting for the ring to
+ * become available. If we have queued vCPUs, they get top priority. We
+ * are guaranteed that they will go through code paths that will eventually
+ * call mem_event_wake() again, ensuring that any blocked vCPUs will get
+ * unpaused once all the queued vCPUs have made it through.
+ */
+void mem_event_wake(struct domain *d, struct mem_event_domain *med)
+{
+ if (!list_empty(&med->wq.list))
+ mem_event_wake_queued(d, med);
+ else
+ mem_event_wake_blocked(d, med);
+}
+
+static int mem_event_disable(struct domain *d, struct mem_event_domain *med)
+{
+ if ( med->ring_page )
+ {
+ struct vcpu *v;
+
+ mem_event_ring_lock(med);
+
+ if ( !list_empty(&med->wq.list) )
+ {
+ mem_event_ring_unlock(med);
+ return -EBUSY;
+ }
+
+ unmap_domain_page(med->ring_page);
+ med->ring_page = NULL;
+
+ unmap_domain_page(med->shared_page);
+ med->shared_page = NULL;
+
+ /* Unblock all vCPUs */
+ for_each_vcpu ( d, v )
+ {
+ if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
+ {
+ vcpu_unpause(v);
+ med->blocked--;
+ }
+ }
+
+ mem_event_ring_unlock(med);
+ }
return 0;
}
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static inline void mem_event_release_slot(struct domain *d,
+ struct mem_event_domain *med)
{
- mem_event_front_ring_t *front_ring;
- RING_IDX req_prod;
+ /* Update the accounting */
+ if ( current->domain == d )
+ med->target_producers--;
+ else
+ med->foreign_producers--;
+
+ /* Kick any waiters */
+ mem_event_wake(d, med);
+}
- mem_event_ring_lock(med);
+/*
+ * mem_event_mark_and_pause() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
+{
+ if ( !test_and_set_bit(med->pause_flag, &v->pause_flags) )
+ {
+ vcpu_pause_nosync(v);
+ med->blocked++;
+ }
+}
- front_ring = &med->front_ring;
- req_prod = front_ring->req_prod_pvt;
+/*
+ * This must be preceded by a call to claim_slot(), and is guaranteed to
+ * succeed. As a side-effect however, the vCPU may be paused if the ring is
+ * overly full and its continued execution would cause stalling and excessive
+ * waiting. The vCPU will be automatically unpaused when the ring clears.
+ */
+void mem_event_put_request(struct domain *d,
+ struct mem_event_domain *med,
+ mem_event_request_t *req)
+{
+ mem_event_front_ring_t *front_ring;
+ int free_req;
+ unsigned int avail_req;
+ RING_IDX req_prod;
if ( current->domain != d )
{
ASSERT( !(req->flags & MEM_EVENT_FLAG_VCPU_PAUSED) );
}
+ mem_event_ring_lock(med);
+
+ /* Due to the reservations, this step must succeed. */
+ front_ring = &med->front_ring;
+ free_req = RING_FREE_REQUESTS(front_ring);
+ ASSERT(free_req > 0);
+
/* Copy request */
+ req_prod = front_ring->req_prod_pvt;
memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
req_prod++;
/* Update ring */
- med->req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
+ /* We've actually *used* our reservation, so release the slot. */
+ mem_event_release_slot(d, med);
+
+ /* Give this vCPU a black eye if necessary, on the way out.
+ * See the comments above wake_blocked() for more information
+ * on how this mechanism works to avoid waiting. */
+ avail_req = mem_event_ring_available(med);
+ if( current->domain == d && avail_req < d->max_vcpus )
+ mem_event_mark_and_pause(current, med);
+
mem_event_ring_unlock(med);
notify_via_xen_event_channel(d, med->xen_port);
}
-int mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, mem_event_response_t *rsp)
{
mem_event_front_ring_t *front_ring;
RING_IDX rsp_cons;
front_ring->rsp_cons = rsp_cons;
front_ring->sring->rsp_event = rsp_cons + 1;
+ /* Kick any waiters -- since we've just consumed an event,
+ * there may be additional space available in the ring. */
+ mem_event_wake(d, med);
+
mem_event_ring_unlock(med);
return 1;
}
-void mem_event_unpause_vcpus(struct domain *d)
-{
- struct vcpu *v;
-
- for_each_vcpu ( d, v )
- if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
- vcpu_wake(v);
-}
-
-void mem_event_mark_and_pause(struct vcpu *v)
-{
- set_bit(_VPF_mem_event, &v->pause_flags);
- vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
+void mem_event_cancel_slot(struct domain *d, struct mem_event_domain *med)
{
mem_event_ring_lock(med);
- med->req_producers--;
+ mem_event_release_slot(d, med);
mem_event_ring_unlock(med);
}
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+static int mem_event_grab_slot(struct mem_event_domain *med, int foreign)
{
- struct vcpu *curr = current;
- int free_requests;
- int ring_full = 1;
+ unsigned int avail_req;
if ( !med->ring_page )
- return -1;
+ return -ENOSYS;
mem_event_ring_lock(med);
- free_requests = RING_FREE_REQUESTS(&med->front_ring);
- if ( med->req_producers < free_requests )
+ avail_req = mem_event_ring_available(med);
+ if ( avail_req == 0 )
{
- med->req_producers++;
- ring_full = 0;
+ mem_event_ring_unlock(med);
+ return -EBUSY;
}
- if ( ring_full && (curr->domain == d) )
- mem_event_mark_and_pause(curr);
+ if ( !foreign )
+ med->target_producers++;
+ else
+ med->foreign_producers++;
mem_event_ring_unlock(med);
- return ring_full;
+ return 0;
+}
+
+/* Simple try_grab wrapper for use in the wait_event() macro. */
+static int mem_event_wait_try_grab(struct mem_event_domain *med, int *rc)
+{
+ *rc = mem_event_grab_slot(med, 0);
+ return *rc;
+}
+
+/* Call mem_event_grab_slot() until the ring doesn't exist, or is available. */
+static int mem_event_wait_slot(struct mem_event_domain *med)
+{
+ int rc = -EBUSY;
+ wait_event(med->wq, mem_event_wait_try_grab(med, &rc) != -EBUSY);
+ return rc;
+}
+
+/*
+ * Determines whether or not the current vCPU belongs to the target domain,
+ * and calls the appropriate wait function. If it is a guest vCPU, then we
+ * use mem_event_wait_slot() to reserve a slot. As long as there is a ring,
+ * this function will always return 0 for a guest. For a non-guest, we check
+ * for space and return -EBUSY if the ring is not available.
+ *
+ * Return codes: -ENOSYS: the ring is not yet configured
+ * -EBUSY: the ring is busy
+ * 0: a spot has been reserved
+ *
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
+{
+ if ( current->domain == d )
+ return mem_event_wait_slot(med);
+ else
+ return mem_event_grab_slot(med, 1);
}
/* Registered with Xen-bound event channel for incoming notifications. */
if ( p2m->pod.entry_count )
break;
- rc = mem_event_enable(d, mec, med, mem_paging_notification);
+ rc = mem_event_enable(d, mec, med, _VPF_mem_paging, mem_paging_notification);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
{
if ( med->ring_page )
- rc = mem_event_disable(med);
+ rc = mem_event_disable(d, med);
}
break;
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
break;
- rc = mem_event_enable(d, mec, med, mem_access_notification);
+ rc = mem_event_enable(d, mec, med, _VPF_mem_access, mem_access_notification);
}
break;
case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
{
if ( med->ring_page )
- rc = mem_event_disable(med);
+ rc = mem_event_disable(d, med);
}
break;
*/
void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
{
- struct vcpu *v = current;
mem_event_request_t req;
- /* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
- {
- /* Send release notification to pager */
- memset(&req, 0, sizeof(req));
- req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
- req.gfn = gfn;
- req.vcpu_id = v->vcpu_id;
+ /* We allow no ring in this unique case, because it won't affect
+ * correctness of the guest execution at this point. If this is the only
+ * page that happens to be paged-out, we'll be okay.. but it's likely the
+ * guest will crash shortly anyways. */
+ int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+ if ( rc < 0 )
+ return;
- mem_event_put_request(d, &d->mem_event->paging, &req);
- }
+ /* Send release notification to pager */
+ memset(&req, 0, sizeof(req));
+ req.type = MEM_EVENT_TYPE_PAGING;
+ req.gfn = gfn;
+ req.flags = MEM_EVENT_FLAG_DROP_PAGE;
+
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
mfn_t mfn;
struct p2m_domain *p2m = p2m_get_hostp2m(d);
- /* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) )
+ /* We're paging. There should be a ring */
+ int rc = mem_event_claim_slot(d, &d->mem_event->paging);
+ if ( rc == -ENOSYS )
+ {
+ gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
+ "in place\n", d->domain_id, gfn);
+ domain_crash(d);
+ return;
+ }
+ else if ( rc < 0 )
return;
memset(&req, 0, sizeof(req));
p2m_unlock(p2m);
/* Pause domain if request came from guest and gfn has paging type */
- if ( p2m_is_paging(p2mt) && v->domain == d )
+ if ( p2m_is_paging(p2mt) && v->domain == d )
{
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(&d->mem_event->paging);
+ mem_event_cancel_slot(d, &d->mem_event->paging);
return;
}
mfn_t mfn;
/* Pull all responses off the ring */
- while( mem_event_get_response(&d->mem_event->paging, &rsp) )
+ while( mem_event_get_response(d, &d->mem_event->paging, &rsp) )
{
if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
continue;
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
}
-
- /* Unpause any domains that were paused because the ring was full */
- mem_event_unpause_vcpus(d);
}
bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla,
unsigned long gfn = gpa >> PAGE_SHIFT;
struct domain *d = v->domain;
struct p2m_domain* p2m = p2m_get_hostp2m(d);
- int res;
mfn_t mfn;
p2m_type_t p2mt;
p2m_access_t p2ma;
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the message along */
- res = mem_event_check_ring(d, &d->mem_event->access);
- if ( res < 0 )
+ if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS )
{
/* No listener */
if ( p2m->access_required )
{
- printk(XENLOG_INFO
- "Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
- v->vcpu_id, d->domain_id);
-
- mem_event_mark_and_pause(v);
+ gdprintk(XENLOG_INFO, "Memory access permissions failure, "
+ "no mem_event listener VCPU %d, dom %d\n",
+ v->vcpu_id, d->domain_id);
+ domain_crash(v->domain);
+ return 0;
}
else
{
}
return 1;
}
-
- return 0;
}
- else if ( res > 0 )
- return 0; /* No space in buffer; VCPU paused */
memset(&req, 0, sizeof(req));
req.type = MEM_EVENT_TYPE_ACCESS;
mem_event_response_t rsp;
/* Pull all responses off the ring */
- while( mem_event_get_response(&d->mem_event->access, &rsp) )
+ while( mem_event_get_response(d, &d->mem_event->access, &rsp) )
{
if ( rsp.flags & MEM_EVENT_FLAG_DUMMY )
continue;
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
}
-
- /* Unpause any domains that were paused because the ring was full or no listener
- * was available */
- mem_event_unpause_vcpus(d);
}
-
/* Set access type for a region of pfns.
* If start_pfn == -1ul, sets the default access type */
int p2m_set_mem_access(struct domain *d, unsigned long start_pfn,