use ticket locks for spin locks

author David Vrabel <david.vrabel@citrix.com>

Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)

committer Jan Beulich <jbeulich@suse.com>

Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)
author David Vrabel <david.vrabel@citrix.com>
Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)
committer Jan Beulich <jbeulich@suse.com>
Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c

index 5fd8b1c84c61187c1e88825e2f35f27b957a1ef0..c8dc8ba3dc46142dec6bcf5551d50a86119fc82a 100644 (file)
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -115,125 +115,134 @@ void spin_debug_disable(void)
  
  #endif
  
+static always_inline spinlock_tickets_t observe_lock(spinlock_tickets_t *t)
+{
+    spinlock_tickets_t v;
+
+    smp_rmb();
+    v.head_tail = read_atomic(&t->head_tail);
+    return v;
+}
+
+static always_inline u16 observe_head(spinlock_tickets_t *t)
+{
+    smp_rmb();
+    return read_atomic(&t->head);
+}
+
  void _spin_lock(spinlock_t *lock)
  {
+    spinlock_tickets_t tickets = { .tail = 1, };
      LOCK_PROFILE_VAR;
  
      check_lock(&lock->debug);
-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
+    tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
+                                           tickets.head_tail);
+    while ( tickets.tail != observe_head(&lock->tickets) )
      {
          LOCK_PROFILE_BLOCK;
-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
-            cpu_relax();
+        cpu_relax();
      }
      LOCK_PROFILE_GOT;
      preempt_disable();
+    arch_lock_acquire_barrier();
  }
  
  void _spin_lock_irq(spinlock_t *lock)
  {
-    LOCK_PROFILE_VAR;
-
      ASSERT(local_irq_is_enabled());
      local_irq_disable();
-    check_lock(&lock->debug);
-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
-    {
-        LOCK_PROFILE_BLOCK;
-        local_irq_enable();
-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_disable();
-    }
-    LOCK_PROFILE_GOT;
-    preempt_disable();
+    _spin_lock(lock);
  }
  
  unsigned long _spin_lock_irqsave(spinlock_t *lock)
  {
      unsigned long flags;
-    LOCK_PROFILE_VAR;
  
      local_irq_save(flags);
-    check_lock(&lock->debug);
-    while ( unlikely(!_raw_spin_trylock(&lock->raw)) )
-    {
-        LOCK_PROFILE_BLOCK;
-        local_irq_restore(flags);
-        while ( likely(_raw_spin_is_locked(&lock->raw)) )
-            cpu_relax();
-        local_irq_disable();
-    }
-    LOCK_PROFILE_GOT;
-    preempt_disable();
+    _spin_lock(lock);
      return flags;
  }
  
  void _spin_unlock(spinlock_t *lock)
  {
+    arch_lock_release_barrier();
      preempt_enable();
      LOCK_PROFILE_REL;
-    _raw_spin_unlock(&lock->raw);
+    add_sized(&lock->tickets.head, 1);
  }
  
  void _spin_unlock_irq(spinlock_t *lock)
  {
-    preempt_enable();
-    LOCK_PROFILE_REL;
-    _raw_spin_unlock(&lock->raw);
+    _spin_unlock(lock);
      local_irq_enable();
  }
  
  void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
  {
-    preempt_enable();
-    LOCK_PROFILE_REL;
-    _raw_spin_unlock(&lock->raw);
+    _spin_unlock(lock);
      local_irq_restore(flags);
  }
  
  int _spin_is_locked(spinlock_t *lock)
  {
      check_lock(&lock->debug);
-    return _raw_spin_is_locked(&lock->raw);
+    return lock->tickets.head != lock->tickets.tail;
  }
  
  int _spin_trylock(spinlock_t *lock)
  {
+    spinlock_tickets_t old, new;
+
      check_lock(&lock->debug);
-    if ( !_raw_spin_trylock(&lock->raw) )
+    old = observe_lock(&lock->tickets);
+    if ( old.head != old.tail )
+        return 0;
+    new = old;
+    new.tail++;
+    if ( cmpxchg(&lock->tickets.head_tail,
+                 old.head_tail, new.head_tail) != old.head_tail )
          return 0;
  #ifdef LOCK_PROFILE
      if (lock->profile)
          lock->profile->time_locked = NOW();
  #endif
      preempt_disable();
+    /*
+     * cmpxchg() is a full barrier so no need for an
+     * arch_lock_acquire_barrier().
+     */
      return 1;
  }
  
  void _spin_barrier(spinlock_t *lock)
  {
+    spinlock_tickets_t sample;
  #ifdef LOCK_PROFILE
      s_time_t block = NOW();
-    u64      loop = 0;
+#endif
  
      check_barrier(&lock->debug);
-    do { smp_mb(); loop++;} while ( _raw_spin_is_locked(&lock->raw) );
-    if ((loop > 1) && lock->profile)
+    smp_mb();
+    sample = observe_lock(&lock->tickets);
+    if ( sample.head != sample.tail )
      {
-        lock->profile->time_block += NOW() - block;
-        lock->profile->block_cnt++;
-    }
-#else
-    check_barrier(&lock->debug);
-    do { smp_mb(); } while ( _raw_spin_is_locked(&lock->raw) );
+        while ( observe_head(&lock->tickets) == sample.head )
+            cpu_relax();
+#ifdef LOCK_PROFILE
+        if ( lock->profile )
+        {
+            lock->profile->time_block += NOW() - block;
+            lock->profile->block_cnt++;
+        }
  #endif
+    }
      smp_mb();
  }
  
  int _spin_trylock_recursive(spinlock_t *lock)
  {
-    int cpu = smp_processor_id();
+    unsigned int cpu = smp_processor_id();
  
      /* Don't allow overflow of recurse_cpu field. */
      BUILD_BUG_ON(NR_CPUS > 0xfffu);
@@ -256,8 +265,17 @@ int _spin_trylock_recursive(spinlock_t *lock)
  
  void _spin_lock_recursive(spinlock_t *lock)
  {
-    while ( !spin_trylock_recursive(lock) )
-        cpu_relax();
+    unsigned int cpu = smp_processor_id();
+
+    if ( likely(lock->recurse_cpu != cpu) )
+    {
+        _spin_lock(lock);
+        lock->recurse_cpu = cpu;
+    }
+
+    /* We support only fairly shallow recursion, else the counter overflows. */
+    ASSERT(lock->recurse_cnt < 0xfu);
+    lock->recurse_cnt++;
  }
  
  void _spin_unlock_recursive(spinlock_t *lock)
diff --git a/xen/include/asm-arm/system.h b/xen/include/asm-arm/system.h

index 2eb96e876940cc3799c73d71cc4b79fab4360166..f0e222f413463cf0478c195930b6ba605d9b47a7 100644 (file)
--- a/xen/include/asm-arm/system.h
+++ b/xen/include/asm-arm/system.h
@@ -53,6 +53,9 @@
  
  #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
  
+#define arch_lock_acquire_barrier() smp_mb()
+#define arch_lock_release_barrier() smp_mb()
+
  extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
  
  #endif
diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h

index 9fb70f570482ada69f0676a47bc528008cd6caa1..25a6a2a32d8c13ef8f2bb902c95c5e2e00220a91 100644 (file)
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -185,6 +185,17 @@ static always_inline unsigned long __xadd(
  #define set_mb(var, value) do { xchg(&var, value); } while (0)
  #define set_wmb(var, value) do { var = value; wmb(); } while (0)
  
+/*
+ * On x86 the only reordering is of reads with older writes.  In the
+ * lock case, the read in observe_head() can only be reordered with
+ * writes that precede it, and moving a write _into_ a locked section
+ * is OK.  In the release case, the write in add_sized() can only be
+ * reordered with reads that follow it, and hoisting a read _into_ a
+ * locked region is OK.
+ */
+#define arch_lock_acquire_barrier() barrier()
+#define arch_lock_release_barrier() barrier()
+
  #define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
  #define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
  
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h

index eda9b2ea36b1fa9e7097448131d8f150d4ad53c8..bafbc74fc1bee77ee12bda93dda9823072aba6a1 100644 (file)
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -80,8 +80,7 @@ struct lock_profile_qhead {
      static struct lock_profile *__lock_profile_##name                         \
      __used_section(".lockprofile.data") =                                     \
      &__lock_profile_data_##name
-#define _SPIN_LOCK_UNLOCKED(x) { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0,          \
-                                 _LOCK_DEBUG, x }
+#define _SPIN_LOCK_UNLOCKED(x) { { 0 }, 0xfffu, 0, _LOCK_DEBUG, x }
  #define SPIN_LOCK_UNLOCKED _SPIN_LOCK_UNLOCKED(NULL)
  #define DEFINE_SPINLOCK(l)                                                    \
      spinlock_t l = _SPIN_LOCK_UNLOCKED(NULL);                                 \
@@ -117,8 +116,7 @@ extern void spinlock_profile_reset(unsigned char key);
  
  struct lock_profile_qhead { };
  
-#define SPIN_LOCK_UNLOCKED                                                    \
-    { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, _LOCK_DEBUG }
+#define SPIN_LOCK_UNLOCKED { { 0 }, 0xfffu, 0, _LOCK_DEBUG }
  #define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
  
  #define spin_lock_init_prof(s, l) spin_lock_init(&((s)->l))
@@ -127,8 +125,16 @@ struct lock_profile_qhead { };
  
  #endif
  
+typedef union {
+    u32 head_tail;
+    struct {
+        u16 head;
+        u16 tail;
+    };
+} spinlock_tickets_t;
+
  typedef struct spinlock {
-    raw_spinlock_t raw;
+    spinlock_tickets_t tickets;
      u16 recurse_cpu:12;
      u16 recurse_cnt:4;
      struct lock_debug debug;
author	David Vrabel <david.vrabel@citrix.com>
	Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)
committer	Jan Beulich <jbeulich@suse.com>
	Fri, 15 May 2015 07:49:12 +0000 (09:49 +0200)
xen/common/spinlock.c		patch \| blob \| history
xen/include/asm-arm/system.h		patch \| blob \| history
xen/include/asm-x86/system.h		patch \| blob \| history
xen/include/xen/spinlock.h		patch \| blob \| history