return cpu;
}
+/**
+ * Xen scheduler callback to change the scheduler of a cpu
+ *
+ * @param new_ops Pointer to this instance of the scheduler structure
+ * @param cpu The cpu that is changing scheduler
+ * @param pdata scheduler specific PCPU data (we don't have any)
+ * @param vdata scheduler specific VCPU data of the idle vcpu
+ */
+static void
+a653_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+ void *pdata, void *vdata)
+{
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+ arinc653_vcpu_t *svc = vdata;
+
+ ASSERT(!pdata && svc && is_idle_vcpu(svc->vc));
+
+ idle_vcpu[cpu]->sched_priv = vdata;
+
+ per_cpu(scheduler, cpu) = new_ops;
+ per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
+
+ /*
+ * (Re?)route the lock to its default location. We actually do not use
+ * it, but if we leave it pointing to where it does now (i.e., the
+ * runqueue lock for this PCPU in the default scheduler), we'd be
+ * causing unnecessary contention on that lock (in cases where it is
+ * shared among multiple PCPUs, like in Credit2 and RTDS).
+ */
+ sd->schedule_lock = &sd->_lock;
+}
+
/**
* Xen scheduler callback function to perform a global (not domain-specific)
* adjustment. It is used by the ARINC 653 scheduler to put in place a new
.pick_cpu = a653sched_pick_cpu,
+ .switch_sched = a653_switch_sched,
+
.adjust = NULL,
.adjust_global = a653sched_adjust_global,
{
unsigned long flags;
struct csched_private *prv = CSCHED_PRIV(ops);
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+
+ /*
+ * This is called either during during boot, resume or hotplug, in
+ * case Credit1 is the scheduler chosen at boot. In such cases, the
+ * scheduler lock for cpu is already pointing to the default per-cpu
+ * spinlock, as Credit1 needs it, so there is no remapping to be done.
+ */
+ ASSERT(sd->schedule_lock == &sd->_lock && !spin_is_locked(&sd->_lock));
spin_lock_irqsave(&prv->lock, flags);
init_pdata(prv, pdata, cpu);
spin_unlock_irqrestore(&prv->lock, flags);
}
+/* Change the scheduler of cpu to us (Credit). */
+static void
+csched_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+ void *pdata, void *vdata)
+{
+ struct schedule_data *sd = &per_cpu(schedule_data, cpu);
+ struct csched_private *prv = CSCHED_PRIV(new_ops);
+ struct csched_vcpu *svc = vdata;
+
+ ASSERT(svc && is_idle_vcpu(svc->vcpu));
+
+ idle_vcpu[cpu]->sched_priv = vdata;
+
+ /*
+ * We are holding the runqueue lock already (it's been taken in
+ * schedule_cpu_switch()). It actually may or may not be the 'right'
+ * one for this cpu, but that is ok for preventing races.
+ */
+ spin_lock(&prv->lock);
+ init_pdata(prv, pdata, cpu);
+ spin_unlock(&prv->lock);
+
+ per_cpu(scheduler, cpu) = new_ops;
+ per_cpu(schedule_data, cpu).sched_priv = pdata;
+
+ /*
+ * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact,
+ * if it is free (and it can be) we want that anyone that manages
+ * taking it, finds all the initializations we've done above in place.
+ */
+ smp_mb();
+ sd->schedule_lock = &sd->_lock;
+}
+
#ifndef NDEBUG
static inline void
__csched_vcpu_check(struct vcpu *vc)
.alloc_pdata = csched_alloc_pdata,
.init_pdata = csched_init_pdata,
.free_pdata = csched_free_pdata,
+ .switch_sched = csched_switch_sched,
.alloc_domdata = csched_alloc_domdata,
.free_domdata = csched_free_domdata,
cpumask_clear_cpu(rqi, &prv->active_queues);
}
-static void
+/* Returns the ID of the runqueue the cpu is assigned to. */
+static unsigned
init_pdata(struct csched2_private *prv, unsigned int cpu)
{
unsigned rqi;
struct csched2_runqueue_data *rqd;
- spinlock_t *old_lock;
ASSERT(spin_is_locked(&prv->lock));
ASSERT(!cpumask_test_cpu(cpu, &prv->initialized));
activate_runqueue(prv, rqi);
}
- /* IRQs already disabled */
- old_lock = pcpu_schedule_lock(cpu);
-
- /* Move spinlock to new runq lock. */
- per_cpu(schedule_data, cpu).schedule_lock = &rqd->lock;
-
/* Set the runqueue map */
prv->runq_map[cpu] = rqi;
cpumask_set_cpu(cpu, &rqd->idle);
cpumask_set_cpu(cpu, &rqd->active);
-
- /* _Not_ pcpu_schedule_unlock(): per_cpu().schedule_lock changed! */
- spin_unlock(old_lock);
-
cpumask_set_cpu(cpu, &prv->initialized);
- return;
+ return rqi;
}
static void
csched2_init_pdata(const struct scheduler *ops, void *pdata, int cpu)
{
struct csched2_private *prv = CSCHED2_PRIV(ops);
+ spinlock_t *old_lock;
unsigned long flags;
+ unsigned rqi;
spin_lock_irqsave(&prv->lock, flags);
- init_pdata(prv, cpu);
+ old_lock = pcpu_schedule_lock(cpu);
+
+ rqi = init_pdata(prv, cpu);
+ /* Move the scheduler lock to the new runq lock. */
+ per_cpu(schedule_data, cpu).schedule_lock = &prv->rqd[rqi].lock;
+
+ /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+ spin_unlock(old_lock);
spin_unlock_irqrestore(&prv->lock, flags);
}
+/* Change the scheduler of cpu to us (Credit2). */
+static void
+csched2_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+ void *pdata, void *vdata)
+{
+ struct csched2_private *prv = CSCHED2_PRIV(new_ops);
+ struct csched2_vcpu *svc = vdata;
+ unsigned rqi;
+
+ ASSERT(!pdata && svc && is_idle_vcpu(svc->vcpu));
+
+ /*
+ * We own one runqueue lock already (from schedule_cpu_switch()). This
+ * looks like it violates this scheduler's locking rules, but it does
+ * not, as what we own is the lock of another scheduler, that hence has
+ * no particular (ordering) relationship with our private global lock.
+ * And owning exactly that one (the lock of the old scheduler of this
+ * cpu) is what is necessary to prevent races.
+ */
+ spin_lock_irq(&prv->lock);
+
+ idle_vcpu[cpu]->sched_priv = vdata;
+
+ rqi = init_pdata(prv, cpu);
+
+ /*
+ * Now that we know what runqueue we'll go in, double check what's said
+ * above: the lock we already hold is not the one of this runqueue of
+ * this scheduler, and so it's safe to have taken it /before/ our
+ * private global lock.
+ */
+ ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->rqd[rqi].lock);
+
+ per_cpu(scheduler, cpu) = new_ops;
+ per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
+
+ /*
+ * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact,
+ * if it is free (and it can be) we want that anyone that manages
+ * taking it, find all the initializations we've done above in place.
+ */
+ smp_mb();
+ per_cpu(schedule_data, cpu).schedule_lock = &prv->rqd[rqi].lock;
+
+ spin_unlock_irq(&prv->lock);
+}
+
static void
csched2_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
{
unsigned long flags;
struct csched2_private *prv = CSCHED2_PRIV(ops);
struct csched2_runqueue_data *rqd;
- struct schedule_data *sd = &per_cpu(schedule_data, cpu);
int rqi;
spin_lock_irqsave(&prv->lock, flags);
deactivate_runqueue(prv, rqi);
}
- /* Move spinlock to the original lock. */
- ASSERT(sd->schedule_lock == &rqd->lock);
- ASSERT(!spin_is_locked(&sd->_lock));
- sd->schedule_lock = &sd->_lock;
-
spin_unlock(&rqd->lock);
cpumask_clear_cpu(cpu, &prv->initialized);
.free_vdata = csched2_free_vdata,
.init_pdata = csched2_init_pdata,
.free_pdata = csched2_free_pdata,
+ .switch_sched = csched2_switch_sched,
.alloc_domdata = csched2_alloc_domdata,
.free_domdata = csched2_free_domdata,
};
spin_unlock_irqrestore(old_lock, flags);
}
+/* Change the scheduler of cpu to us (RTDS). */
+static void
+rt_switch_sched(struct scheduler *new_ops, unsigned int cpu,
+ void *pdata, void *vdata)
+{
+ struct rt_private *prv = rt_priv(new_ops);
+ struct rt_vcpu *svc = vdata;
+
+ ASSERT(!pdata && svc && is_idle_vcpu(svc->vcpu));
+
+ /*
+ * We are holding the runqueue lock already (it's been taken in
+ * schedule_cpu_switch()). It's actually the runqueue lock of
+ * another scheduler, but that is how things need to be, for
+ * preventing races.
+ */
+ ASSERT(per_cpu(schedule_data, cpu).schedule_lock != &prv->lock);
+
+ idle_vcpu[cpu]->sched_priv = vdata;
+ per_cpu(scheduler, cpu) = new_ops;
+ per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */
+
+ /*
+ * (Re?)route the lock to the per pCPU lock as /last/ thing. In fact,
+ * if it is free (and it can be) we want that anyone that manages
+ * taking it, find all the initializations we've done above in place.
+ */
+ smp_mb();
+ per_cpu(schedule_data, cpu).schedule_lock = &prv->lock;
+}
+
static void *
rt_alloc_pdata(const struct scheduler *ops, int cpu)
{
static void
rt_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
{
- struct rt_private *prv = rt_priv(ops);
- struct schedule_data *sd = &per_cpu(schedule_data, cpu);
- unsigned long flags;
-
- spin_lock_irqsave(&prv->lock, flags);
-
- /* Move spinlock back to the default lock */
- ASSERT(sd->schedule_lock == &prv->lock);
- ASSERT(!spin_is_locked(&sd->_lock));
- sd->schedule_lock = &sd->_lock;
-
- spin_unlock_irqrestore(&prv->lock, flags);
-
free_cpumask_var(_cpumask_scratch[cpu]);
}
.alloc_pdata = rt_alloc_pdata,
.free_pdata = rt_free_pdata,
.init_pdata = rt_init_pdata,
+ .switch_sched = rt_switch_sched,
.alloc_domdata = rt_alloc_domdata,
.free_domdata = rt_free_domdata,
.init_domain = rt_dom_init,
int schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
{
struct vcpu *idle;
- spinlock_t *lock;
void *ppriv, *ppriv_old, *vpriv, *vpriv_old;
struct scheduler *old_ops = per_cpu(scheduler, cpu);
struct scheduler *new_ops = (c == NULL) ? &ops : c->sched;
struct cpupool *old_pool = per_cpu(cpupool, cpu);
+ spinlock_t * old_lock;
/*
* pCPUs only move from a valid cpupool to free (i.e., out of any pool),
if ( old_ops == new_ops )
goto out;
+ /*
+ * To setup the cpu for the new scheduler we need:
+ * - a valid instance of per-CPU scheduler specific data, as it is
+ * allocated by SCHED_OP(alloc_pdata). Note that we do not want to
+ * initialize it yet (i.e., we are not calling SCHED_OP(init_pdata)).
+ * That will be done by the target scheduler, in SCHED_OP(switch_sched),
+ * in proper ordering and with locking.
+ * - a valid instance of per-vCPU scheduler specific data, for the idle
+ * vCPU of cpu. That is what the target scheduler will use for the
+ * sched_priv field of the per-vCPU info of the idle domain.
+ */
idle = idle_vcpu[cpu];
ppriv = SCHED_OP(new_ops, alloc_pdata, cpu);
if ( IS_ERR(ppriv) )
return PTR_ERR(ppriv);
- SCHED_OP(new_ops, init_pdata, ppriv, cpu);
vpriv = SCHED_OP(new_ops, alloc_vdata, idle, idle->domain->sched_priv);
if ( vpriv == NULL )
{
return -ENOMEM;
}
- lock = pcpu_schedule_lock_irq(cpu);
-
SCHED_OP(old_ops, tick_suspend, cpu);
+
+ /*
+ * The actual switch, including (if necessary) the rerouting of the
+ * scheduler lock to whatever new_ops prefers, needs to happen in one
+ * critical section, protected by old_ops' lock, or races are possible.
+ * It is, in fact, the lock of another scheduler that we are taking (the
+ * scheduler of the cpupool that cpu still belongs to). But that is ok
+ * as, anyone trying to schedule on this cpu will spin until when we
+ * release that lock (bottom of this function). When he'll get the lock
+ * --thanks to the loop inside *_schedule_lock() functions-- he'll notice
+ * that the lock itself changed, and retry acquiring the new one (which
+ * will be the correct, remapped one, at that point).
+ */
+ old_lock = pcpu_schedule_lock(cpu);
+
vpriv_old = idle->sched_priv;
- idle->sched_priv = vpriv;
- per_cpu(scheduler, cpu) = new_ops;
ppriv_old = per_cpu(schedule_data, cpu).sched_priv;
- per_cpu(schedule_data, cpu).sched_priv = ppriv;
- SCHED_OP(new_ops, tick_resume, cpu);
+ SCHED_OP(new_ops, switch_sched, cpu, ppriv, vpriv);
- pcpu_schedule_unlock_irq(lock, cpu);
+ /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+ spin_unlock_irq(old_lock);
+
+ SCHED_OP(new_ops, tick_resume, cpu);
SCHED_OP(old_ops, free_vdata, vpriv_old);
SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
void (*free_domdata) (const struct scheduler *, void *);
void * (*alloc_domdata) (const struct scheduler *, struct domain *);
+ void (*switch_sched) (struct scheduler *, unsigned int,
+ void *, void *);
+
int (*init_domain) (const struct scheduler *, struct domain *);
void (*destroy_domain) (const struct scheduler *, struct domain *);