xen:rtds: towards work conserving RTDS

author Meng Xu <mengxu@cis.upenn.edu>

Tue, 10 Oct 2017 23:17:41 +0000 (19:17 -0400)

committer Wei Liu <wei.liu2@citrix.com>

Wed, 11 Oct 2017 11:20:17 +0000 (12:20 +0100)
author Meng Xu <mengxu@cis.upenn.edu>
Tue, 10 Oct 2017 23:17:41 +0000 (19:17 -0400)
committer Wei Liu <wei.liu2@citrix.com>
Wed, 11 Oct 2017 11:20:17 +0000 (12:20 +0100)
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c

index 5c51cd93c76ca455d512b70a1400efcc7a1d882d..b770287f97de63f299689462fa4bb587c54e85a9 100644 (file)
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -49,13 +49,15 @@
   * A PCPU is feasible if the VCPU can run on this PCPU and (the PCPU is idle or
   * has a lower-priority VCPU running on it.)
   *
- * Each VCPU has a dedicated period and budget.
+ * Each VCPU has a dedicated period, budget and a extratime flag
   * The deadline of a VCPU is at the end of each period;
   * A VCPU has its budget replenished at the beginning of each period;
   * While scheduled, a VCPU burns its budget.
   * The VCPU needs to finish its budget before its deadline in each period;
   * The VCPU discards its unused budget at the end of each period.
- * If a VCPU runs out of budget in a period, it has to wait until next period.
+ * When a VCPU runs out of budget in a period, if its extratime flag is set,
+ * the VCPU increases its priority_level by 1 and refills its budget; otherwise,
+ * it has to wait until next period.
   *
   * Each VCPU is implemented as a deferable server.
   * When a VCPU has a task running on it, its budget is continuously burned;
@@ -63,7 +65,8 @@
   *
   * Queue scheme:
   * A global runqueue and a global depletedqueue for each CPU pool.
- * The runqueue holds all runnable VCPUs with budget, sorted by deadline;
+ * The runqueue holds all runnable VCPUs with budget,
+ * sorted by priority_level and deadline;
   * The depletedqueue holds all VCPUs without budget, unsorted;
   *
   * Note: cpumask and cpupool is supported.
@@ -150,6 +153,14 @@
  #define __RTDS_depleted     3
  #define RTDS_depleted (1<<__RTDS_depleted)
  
+/*
+ * RTDS_extratime: Can the vcpu run in the time that is
+ * not part of any real-time reservation, and would therefore
+ * be otherwise left idle?
+ */
+#define __RTDS_extratime    4
+#define RTDS_extratime (1<<__RTDS_extratime)
+
  /*
   * rt tracing events ("only" 512 available!). Check
   * include/public/trace.h for more details.
@@ -201,6 +212,8 @@ struct rt_vcpu {
      struct rt_dom *sdom;
      struct vcpu *vcpu;
  
+    unsigned priority_level;
+
      unsigned flags;              /* mark __RTDS_scheduled, etc.. */
  };
  
@@ -245,6 +258,11 @@ static inline struct list_head *rt_replq(const struct scheduler *ops)
      return &rt_priv(ops)->replq;
  }
  
+static inline bool has_extratime(const struct rt_vcpu *svc)
+{
+    return svc->flags & RTDS_extratime;
+}
+
  /*
   * Helper functions for manipulating the runqueue, the depleted queue,
   * and the replenishment events queue.
@@ -273,6 +291,21 @@ vcpu_on_replq(const struct rt_vcpu *svc)
      return !list_empty(&svc->replq_elem);
  }
  
+/*
+ * If v1 priority >= v2 priority, return value > 0
+ * Otherwise, return value < 0
+ */
+static s_time_t
+compare_vcpu_priority(const struct rt_vcpu *v1, const struct rt_vcpu *v2)
+{
+    int prio = v2->priority_level - v1->priority_level;
+
+    if ( prio == 0 )
+        return v2->cur_deadline - v1->cur_deadline;
+
+    return prio;
+}
+
  /*
   * Debug related code, dump vcpu/cpu information
   */
@@ -303,6 +336,7 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
      cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), mask);
      printk("[%5d.%-2u] cpu %u, (%"PRI_stime", %"PRI_stime"),"
             " cur_b=%"PRI_stime" cur_d=%"PRI_stime" last_start=%"PRI_stime"\n"
+           " \t\t priority_level=%d has_extratime=%d\n"
             " \t\t onQ=%d runnable=%d flags=%x effective hard_affinity=%s\n",
              svc->vcpu->domain->domain_id,
              svc->vcpu->vcpu_id,
@@ -312,6 +346,8 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
              svc->cur_budget,
              svc->cur_deadline,
              svc->last_start,
+            svc->priority_level,
+            has_extratime(svc),
              vcpu_on_q(svc),
              vcpu_runnable(svc->vcpu),
              svc->flags,
@@ -423,15 +459,18 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc)
       */
      svc->last_start = now;
      svc->cur_budget = svc->budget;
+    svc->priority_level = 0;
  
      /* TRACE */
      {
          struct __packed {
              unsigned vcpu:16, dom:16;
+            unsigned priority_level;
              uint64_t cur_deadline, cur_budget;
          } d;
          d.dom = svc->vcpu->domain->domain_id;
          d.vcpu = svc->vcpu->vcpu_id;
+        d.priority_level = svc->priority_level;
          d.cur_deadline = (uint64_t) svc->cur_deadline;
          d.cur_budget = (uint64_t) svc->cur_budget;
          trace_var(TRC_RTDS_BUDGET_REPLENISH, 1,
@@ -454,7 +493,7 @@ rt_update_deadline(s_time_t now, struct rt_vcpu *svc)
   * cases, if the vcpu with the earliest deadline is what we
   * are dealing with).
   */
-static inline bool_t
+static inline bool
  deadline_queue_remove(struct list_head *queue, struct list_head *elem)
  {
      int pos = 0;
@@ -466,7 +505,7 @@ deadline_queue_remove(struct list_head *queue, struct list_head *elem)
      return !pos;
  }
  
-static inline bool_t
+static inline bool
  deadline_queue_insert(struct rt_vcpu * (*qelem)(struct list_head *),
                        struct rt_vcpu *svc, struct list_head *elem,
                        struct list_head *queue)
@@ -477,7 +516,7 @@ deadline_queue_insert(struct rt_vcpu * (*qelem)(struct list_head *),
      list_for_each ( iter, queue )
      {
          struct rt_vcpu * iter_svc = (*qelem)(iter);
-        if ( svc->cur_deadline <= iter_svc->cur_deadline )
+        if ( compare_vcpu_priority(svc, iter_svc) > 0 )
              break;
          pos++;
      }
@@ -537,8 +576,9 @@ runq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
      ASSERT( !vcpu_on_q(svc) );
      ASSERT( vcpu_on_replq(svc) );
  
-    /* add svc to runq if svc still has budget */
-    if ( svc->cur_budget > 0 )
+    /* add svc to runq if svc still has budget or its extratime is set */
+    if ( svc->cur_budget > 0 ||
+         has_extratime(svc) )
          deadline_runq_insert(svc, &svc->q_elem, runq);
      else
          list_add(&svc->q_elem, &prv->depletedq);
@@ -857,6 +897,8 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd)
      svc->vcpu = vc;
      svc->last_start = 0;
  
+    __set_bit(__RTDS_extratime, &svc->flags);
+    svc->priority_level = 0;
      svc->period = RTDS_DEFAULT_PERIOD;
      if ( !is_idle_vcpu(vc) )
          svc->budget = RTDS_DEFAULT_BUDGET;
@@ -966,8 +1008,16 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now)
  
      if ( svc->cur_budget <= 0 )
      {
-        svc->cur_budget = 0;
-        __set_bit(__RTDS_depleted, &svc->flags);
+        if ( has_extratime(svc) )
+        {
+            svc->priority_level++;
+            svc->cur_budget = svc->budget;
+        }
+        else
+        {
+            svc->cur_budget = 0;
+            __set_bit(__RTDS_depleted, &svc->flags);
+        }
      }
  
      /* TRACE */
@@ -976,11 +1026,15 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu *svc, s_time_t now)
              unsigned vcpu:16, dom:16;
              uint64_t cur_budget;
              int delta;
+            unsigned priority_level;
+            bool has_extratime;
          } d;
          d.dom = svc->vcpu->domain->domain_id;
          d.vcpu = svc->vcpu->vcpu_id;
          d.cur_budget = (uint64_t) svc->cur_budget;
          d.delta = delta;
+        d.priority_level = svc->priority_level;
+        d.has_extratime = svc->flags & RTDS_extratime;
          trace_var(TRC_RTDS_BUDGET_BURN, 1,
                    sizeof(d),
                    (unsigned char *) &d);
@@ -1088,7 +1142,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, bool_t tasklet_work_sched
               vcpu_runnable(current) &&
               scurr->cur_budget > 0 &&
               ( is_idle_vcpu(snext->vcpu) ||
-               scurr->cur_deadline <= snext->cur_deadline ) )
+               compare_vcpu_priority(scurr, snext) > 0 ) )
              snext = scurr;
      }
  
@@ -1194,7 +1248,7 @@ runq_tickle(const struct scheduler *ops, struct rt_vcpu *new)
          }
          iter_svc = rt_vcpu(iter_vc);
          if ( latest_deadline_vcpu == NULL ||
-             iter_svc->cur_deadline > latest_deadline_vcpu->cur_deadline )
+             compare_vcpu_priority(iter_svc, latest_deadline_vcpu) < 0 )
              latest_deadline_vcpu = iter_svc;
  
          cpumask_clear_cpu(cpu, &not_tickled);
@@ -1203,7 +1257,7 @@ runq_tickle(const struct scheduler *ops, struct rt_vcpu *new)
  
      /* 2) candicate has higher priority, kick out lowest priority vcpu */
      if ( latest_deadline_vcpu != NULL &&
-         new->cur_deadline < latest_deadline_vcpu->cur_deadline )
+         compare_vcpu_priority(latest_deadline_vcpu, new) < 0 )
      {
          SCHED_STAT_CRANK(tickled_busy_cpu);
          cpu_to_tickle = latest_deadline_vcpu->vcpu->processor;
@@ -1394,6 +1448,10 @@ rt_dom_cntl(
                  svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);
                  local_sched.u.rtds.budget = svc->budget / MICROSECS(1);
                  local_sched.u.rtds.period = svc->period / MICROSECS(1);
+                if ( has_extratime(svc) )
+                    local_sched.u.rtds.flags |= XEN_DOMCTL_SCHEDRT_extra;
+                else
+                    local_sched.u.rtds.flags &= ~XEN_DOMCTL_SCHEDRT_extra;
                  spin_unlock_irqrestore(&prv->lock, flags);
  
                  if ( copy_to_guest_offset(op->u.v.vcpus, index,
@@ -1418,6 +1476,10 @@ rt_dom_cntl(
                  svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);
                  svc->period = period;
                  svc->budget = budget;
+                if ( local_sched.u.rtds.flags & XEN_DOMCTL_SCHEDRT_extra )
+                    __set_bit(__RTDS_extratime, &svc->flags);
+                else
+                    __clear_bit(__RTDS_extratime, &svc->flags);
                  spin_unlock_irqrestore(&prv->lock, flags);
              }
              /* Process a most 64 vCPUs without checking for preemptions. */
@@ -1492,7 +1554,7 @@ static void repl_timer_handler(void *data){
          {
              struct rt_vcpu *next_on_runq = q_elem(runq->next);
  
-            if ( svc->cur_deadline > next_on_runq->cur_deadline )
+            if ( compare_vcpu_priority(svc, next_on_runq) < 0 )
                  runq_tickle(ops, next_on_runq);
          }
          else if ( __test_and_clear_bit(__RTDS_depleted, &svc->flags) &&
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h

index 8853445c931acf8e780b16c93a6fab1b2285e696..70027abc005c2ac6caa67e309433f874355d41d4 100644 (file)
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -342,6 +342,10 @@ struct xen_domctl_sched_credit2 {
  struct xen_domctl_sched_rtds {
      uint32_t period;
      uint32_t budget;
+/* Can this vCPU execute beyond its reserved amount of time? */
+#define _XEN_DOMCTL_SCHEDRT_extra   0
+#define XEN_DOMCTL_SCHEDRT_extra    (1U<<_XEN_DOMCTL_SCHEDRT_extra)
+    uint32_t flags;
  };
  
  typedef struct xen_domctl_schedparam_vcpu {
author	Meng Xu <mengxu@cis.upenn.edu>
	Tue, 10 Oct 2017 23:17:41 +0000 (19:17 -0400)
committer	Wei Liu <wei.liu2@citrix.com>
	Wed, 11 Oct 2017 11:20:17 +0000 (12:20 +0100)
xen/common/sched_rt.c		patch \| blob \| history
xen/include/public/domctl.h		patch \| blob \| history