From: David Vrabel Date: Mon, 3 Aug 2015 11:29:19 +0000 (+0100) Subject: arm: reduce power use by contented spin locks with WFE/SEV X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~2536 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=e625faf45ee1349be7da2b27e38567e01ce4b453;p=xen.git arm: reduce power use by contented spin locks with WFE/SEV Instead of cpu_relax() while spinning and observing the ticket head, introduce arch_lock_relax() which executes a WFE instruction. After the ticket head is changed call arch_lock_signal() to execute an SEV instruction (with the required DSB first) to wake any spinners. This should improve power consumption when locks are contented and spinning. For consistency also move arch_lock_(acquire|release)_barrier to asm/spinlock.h. Booted the result on arm32 (Midway) and arm64 (Mustang). Build test only on amd64. Signed-off-by: David Vrabel [ijc: add barrier, rename as arch_lock_*, move arch_lock_*_barrier, test] Signed-off-by: Ian Campbell Cc: Stefano Stabellini Cc: Jan Beulich Cc: Andrew Cooper Reviewed-by: Andrew Cooper Acked-by: Stefano Stabellini --- diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c index 29149d1537..7f89694d49 100644 --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock) while ( tickets.tail != observe_head(&lock->tickets) ) { LOCK_PROFILE_BLOCK; - cpu_relax(); + arch_lock_relax(); } LOCK_PROFILE_GOT; preempt_disable(); @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock) preempt_enable(); LOCK_PROFILE_REL; add_sized(&lock->tickets.head, 1); + arch_lock_signal(); } void _spin_unlock_irq(spinlock_t *lock) @@ -228,7 +229,7 @@ void _spin_barrier(spinlock_t *lock) if ( sample.head != sample.tail ) { while ( observe_head(&lock->tickets) == sample.head ) - cpu_relax(); + arch_lock_relax(); #ifdef LOCK_PROFILE if ( lock->profile ) { diff --git a/xen/include/asm-arm/spinlock.h b/xen/include/asm-arm/spinlock.h index 81955d1697..8cdf9e18ce 100644 --- a/xen/include/asm-arm/spinlock.h +++ b/xen/include/asm-arm/spinlock.h @@ -1,6 +1,13 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H -/* Nothing ARM specific. */ +#define arch_lock_acquire_barrier() smp_mb() +#define arch_lock_release_barrier() smp_mb() + +#define arch_lock_relax() wfe() +#define arch_lock_signal() do { \ + dsb(ishst); \ + sev(); \ +} while(0) #endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/asm-arm/system.h b/xen/include/asm-arm/system.h index f0e222f413..2eb96e8769 100644 --- a/xen/include/asm-arm/system.h +++ b/xen/include/asm-arm/system.h @@ -53,9 +53,6 @@ #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v) -#define arch_lock_acquire_barrier() smp_mb() -#define arch_lock_release_barrier() smp_mb() - extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next); #endif diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h index 7d69e75a93..70a85af228 100644 --- a/xen/include/asm-x86/spinlock.h +++ b/xen/include/asm-x86/spinlock.h @@ -4,4 +4,18 @@ #define _raw_read_unlock(l) \ asm volatile ( "lock; dec%z0 %0" : "+m" ((l)->lock) :: "memory" ) +/* + * On x86 the only reordering is of reads with older writes. In the + * lock case, the read in observe_head() can only be reordered with + * writes that precede it, and moving a write _into_ a locked section + * is OK. In the release case, the write in add_sized() can only be + * reordered with reads that follow it, and hoisting a read _into_ a + * locked region is OK. + */ +#define arch_lock_acquire_barrier() barrier() +#define arch_lock_release_barrier() barrier() + +#define arch_lock_relax() cpu_relax() +#define arch_lock_signal() + #endif /* __ASM_SPINLOCK_H */ diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h index 25a6a2a32d..9fb70f5704 100644 --- a/xen/include/asm-x86/system.h +++ b/xen/include/asm-x86/system.h @@ -185,17 +185,6 @@ static always_inline unsigned long __xadd( #define set_mb(var, value) do { xchg(&var, value); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0) -/* - * On x86 the only reordering is of reads with older writes. In the - * lock case, the read in observe_head() can only be reordered with - * writes that precede it, and moving a write _into_ a locked section - * is OK. In the release case, the write in add_sized() can only be - * reordered with reads that follow it, and hoisting a read _into_ a - * locked region is OK. - */ -#define arch_lock_acquire_barrier() barrier() -#define arch_lock_release_barrier() barrier() - #define local_irq_disable() asm volatile ( "cli" : : : "memory" ) #define local_irq_enable() asm volatile ( "sti" : : : "memory" )