Cpufreq: prevent negative px resident time, add spinlock to avoid race
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 5 Jan 2009 11:16:12 +0000 (11:16 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 5 Jan 2009 11:16:12 +0000 (11:16 +0000)
Due to NOW() value may drift between different cpus, we add protection
to prevent negative px resident time.
Due to both cpufreq logic and xenpm may race accessing
cpufreq_statistic_data, we add spinlock to avoid race.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
xen/drivers/acpi/pmstat.c
xen/drivers/cpufreq/utility.c
xen/include/acpi/cpufreq/cpufreq.h
xen/include/acpi/cpufreq/processor_perf.h

index 952ffc639144852c56113da5c0b514fe7437b877..e1faae3b33dfd925e82df73da79410312ef753f3 100644 (file)
@@ -87,33 +87,34 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
 
     case PMSTAT_get_pxstat:
     {
-        uint64_t now, ct;
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
+        uint32_t ct;
         struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
+        spinlock_t *cpufreq_statistic_lock = 
+                   &per_cpu(cpufreq_statistic_lock, op->cpuid);
+
+        spin_lock_irq(cpufreq_statistic_lock);
 
         if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
+        {
+            spin_unlock_irq(cpufreq_statistic_lock);
             return -ENODATA;
+        }
 
-        total_idle_ns = get_cpu_idle_time(op->cpuid);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        now = NOW();
         pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
-        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
-        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = total_idle_ns;
+
+        cpufreq_residency_update(op->cpuid, pxpt->u.cur);
 
         ct = pmpt->perf.state_count;
         if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
         {
+            spin_unlock_irq(cpufreq_statistic_lock);
             ret = -EFAULT;
             break;
         }
 
         if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
         {
+            spin_unlock_irq(cpufreq_statistic_lock);
             ret = -EFAULT;
             break;
         }
@@ -123,6 +124,8 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
         op->u.getpx.last = pxpt->u.last;
         op->u.getpx.cur = pxpt->u.cur;
 
+        spin_unlock_irq(cpufreq_statistic_lock);
+
         break;
     }
 
index 4ac38339234a755298e7e8721871521f3554e86f..b45fe3d750c18cb3aa8eaeb950c6ba5e859532a4 100644 (file)
@@ -36,35 +36,54 @@ struct cpufreq_driver   *cpufreq_driver;
 struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
 struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
 
+DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock);
+
 /*********************************************************************
  *                    Px STATISTIC INFO                              *
  *********************************************************************/
 
+void cpufreq_residency_update(unsigned int cpu, uint8_t state)
+{
+    uint64_t now, total_idle_ns;
+    int64_t delta;
+    struct pm_px *pxpt = cpufreq_statistic_data[cpu];
+
+    total_idle_ns = get_cpu_idle_time(cpu);
+    now = NOW();
+
+    delta = (now - pxpt->prev_state_wall) - 
+            (total_idle_ns - pxpt->prev_idle_wall);
+
+    if ( likely(delta >= 0) )
+        pxpt->u.pt[state].residency += delta;
+
+    pxpt->prev_state_wall = now;
+    pxpt->prev_idle_wall = total_idle_ns;
+}
+
 void cpufreq_statistic_update(unsigned int cpu, uint8_t from, uint8_t to)
 {
-    uint64_t now;
     struct pm_px *pxpt = cpufreq_statistic_data[cpu];
     struct processor_pminfo *pmpt = processor_pminfo[cpu];
-    uint64_t total_idle_ns;
-    uint64_t tmp_idle_ns;
+    spinlock_t *cpufreq_statistic_lock = 
+               &per_cpu(cpufreq_statistic_lock, cpu);
 
-    if ( !pxpt || !pmpt )
-        return;
+    spin_lock_irq(cpufreq_statistic_lock);
 
-    now = NOW();
-    total_idle_ns = get_cpu_idle_time(cpu);
-    tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+    if ( !pxpt || !pmpt ) {
+        spin_unlock_irq(cpufreq_statistic_lock);
+        return;
+    }
 
     pxpt->u.last = from;
     pxpt->u.cur = to;
     pxpt->u.pt[to].count++;
-    pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
-    pxpt->u.pt[from].residency -= tmp_idle_ns;
+
+    cpufreq_residency_update(cpu, from);
 
     (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
 
-    pxpt->prev_state_wall = now;
-    pxpt->prev_idle_wall = total_idle_ns;
+    spin_unlock_irq(cpufreq_statistic_lock);
 }
 
 int cpufreq_statistic_init(unsigned int cpuid)
@@ -72,24 +91,33 @@ int cpufreq_statistic_init(unsigned int cpuid)
     uint32_t i, count;
     struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
     const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+    spinlock_t *cpufreq_statistic_lock = 
+                          &per_cpu(cpufreq_statistic_lock, cpuid);
 
     if ( !pmpt )
         return -EINVAL;
 
-    if ( pxpt )
+    spin_lock_irq(cpufreq_statistic_lock);
+
+    if ( pxpt ) {
+        spin_unlock_irq(cpufreq_statistic_lock);
         return 0;
+    }
 
     count = pmpt->perf.state_count;
 
     pxpt = xmalloc(struct pm_px);
-    if ( !pxpt )
+    if ( !pxpt ) {
+        spin_unlock_irq(cpufreq_statistic_lock);
         return -ENOMEM;
+    }
     memset(pxpt, 0, sizeof(*pxpt));
     cpufreq_statistic_data[cpuid] = pxpt;
 
     pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
     if (!pxpt->u.trans_pt) {
         xfree(pxpt);
+        spin_unlock_irq(cpufreq_statistic_lock);
         return -ENOMEM;
     }
 
@@ -97,6 +125,7 @@ int cpufreq_statistic_init(unsigned int cpuid)
     if (!pxpt->u.pt) {
         xfree(pxpt->u.trans_pt);
         xfree(pxpt);
+        spin_unlock_irq(cpufreq_statistic_lock);
         return -ENOMEM;
     }
 
@@ -112,19 +141,30 @@ int cpufreq_statistic_init(unsigned int cpuid)
     pxpt->prev_state_wall = NOW();
     pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
 
+    spin_unlock_irq(cpufreq_statistic_lock);
+
     return 0;
 }
 
 void cpufreq_statistic_exit(unsigned int cpuid)
 {
     struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+    spinlock_t *cpufreq_statistic_lock = 
+               &per_cpu(cpufreq_statistic_lock, cpuid);
 
-    if (!pxpt)
+    spin_lock_irq(cpufreq_statistic_lock);
+
+    if (!pxpt) {
+        spin_unlock_irq(cpufreq_statistic_lock);
         return;
+    }
+
     xfree(pxpt->u.trans_pt);
     xfree(pxpt->u.pt);
     xfree(pxpt);
     cpufreq_statistic_data[cpuid] = NULL;
+
+    spin_unlock_irq(cpufreq_statistic_lock);
 }
 
 void cpufreq_statistic_reset(unsigned int cpuid)
@@ -132,9 +172,15 @@ void cpufreq_statistic_reset(unsigned int cpuid)
     uint32_t i, j, count;
     struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
     const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+    spinlock_t *cpufreq_statistic_lock = 
+               &per_cpu(cpufreq_statistic_lock, cpuid);
+
+    spin_lock_irq(cpufreq_statistic_lock);
 
-    if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
+    if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) {
+        spin_unlock_irq(cpufreq_statistic_lock);
         return;
+    }
 
     count = pmpt->perf.state_count;
 
@@ -148,7 +194,25 @@ void cpufreq_statistic_reset(unsigned int cpuid)
 
     pxpt->prev_state_wall = NOW();
     pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+
+    spin_unlock_irq(cpufreq_statistic_lock);
+}
+
+static int __init cpufreq_statistic_lock_init(void)
+{
+    unsigned int cpu;
+    spinlock_t *cpufreq_statistic_lock; 
+
+    for (cpu=0; cpu<NR_CPUS; cpu++) {
+        cpufreq_statistic_lock = 
+                &per_cpu(cpufreq_statistic_lock, cpu);
+
+        spin_lock_init(cpufreq_statistic_lock);
+    }
+
+    return 0;
 }
+__initcall(cpufreq_statistic_lock_init);
 
 
 /*********************************************************************
index 4314414031e0eea5db76c6cb79e2558bea13fa24..eba0d7c6a4cf0afc519f6d47f40dcdf1599eea5c 100644 (file)
@@ -20,6 +20,8 @@
 
 #include "processor_perf.h"
 
+DECLARE_PER_CPU(spinlock_t, cpufreq_statistic_lock);
+
 struct cpufreq_governor;
 
 struct acpi_cpufreq_data {
index 6d5d64173c912cbaee679bc217af0a3fb9e4cc4d..cc6be7a9133672c5e087a4704ca5e167c03e7f4d 100644 (file)
@@ -9,6 +9,7 @@
 int get_cpu_id(u8);
 int powernow_cpufreq_init(void);
 
+void cpufreq_residency_update(unsigned int, uint8_t);
 void cpufreq_statistic_update(unsigned int, uint8_t, uint8_t);
 int  cpufreq_statistic_init(unsigned int);
 void cpufreq_statistic_exit(unsigned int);