xen: Add global irq_vector_map option, set if using AMD global intremap tables

author George Dunlap <george.dunlap@eu.citrix.com>

Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)

committer George Dunlap <george.dunlap@eu.citrix.com>

Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)
author George Dunlap <george.dunlap@eu.citrix.com>
Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)
committer George Dunlap <george.dunlap@eu.citrix.com>
Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)
diff --git a/docs/src/user.tex b/docs/src/user.tex

index dc6ace9eb2af12e2fca7e923da85fe6fe75888bf..f743840d6f4fb5b548a66b69adf6aea37e4b9464 100644 (file)
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -2280,6 +2280,10 @@ writing to the VGA console after domain 0 starts booting (e.g., `vga=text-80x50,
  \item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of 
    vcpu migration in microseconds (default 0). This parameter avoids agressive
    vcpu migration. For example, the linux kernel uses 0.5ms by default.
+\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps.  Setting 'global' 
+  will ensure that no  IRQs will share vectors.  Setting 'per-device' will ensure 
+  that no IRQs from the same device will share vectors.  Setting to 'none' will
+  disable it entirely, overriding any defaults the IOMMU code may set.
  \end{description}
  
  In addition, the following options may be specified on the Xen command
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c

index 9518d6008701525ca849df2066c4f9c24fdc0617..f4893d8d67a9c34f3f6fd5ba94614a5ff7685cfe 100644 (file)
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -24,6 +24,8 @@
  #include <asm/mach-generic/mach_apic.h>
  #include <public/physdev.h>
  
+static void parse_irq_vector_map_param(char *s);
+
  /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
  bool_t __read_mostly opt_noirqbalance = 0;
  boolean_param("noirqbalance", opt_noirqbalance);
@@ -33,8 +35,10 @@ unsigned int __read_mostly nr_irqs;
  integer_param("nr_irqs", nr_irqs);
  
  /* This default may be changed by the AMD IOMMU code */
-bool_t __read_mostly opt_irq_perdev_vector_map = 0;
-boolean_param("irq-perdev-vector-map", opt_irq_perdev_vector_map);
+int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
+custom_param("irq_vector_map", parse_irq_vector_map_param);
+
+vmask_t global_used_vector_map;
  
  u8 __read_mostly *irq_vector;
  struct irq_desc __read_mostly *irq_desc = NULL;
@@ -64,6 +68,26 @@ static struct timer irq_ratelimit_timer;
  static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
  integer_param("irq_ratelimit", irq_ratelimit_threshold);
  
+static void __init parse_irq_vector_map_param(char *s)
+{
+    char *ss;
+
+    do {
+        ss = strchr(s, ',');
+        if ( ss )
+            *ss = '\0';
+
+        if ( !strcmp(s, "none"))
+            opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE;
+        else if ( !strcmp(s, "global"))
+            opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL;
+        else if ( !strcmp(s, "per-device"))
+            opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV;
+
+        s = ss + 1;
+    } while ( ss );
+}
+
  /* Must be called when irq disabled */
  void lock_vector_lock(void)
  {
@@ -365,6 +389,41 @@ hw_irq_controller no_irq_type = {
      end_none
  };
  
+static vmask_t *irq_get_used_vector_mask(int irq)
+{
+    vmask_t *ret = NULL;
+
+    if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
+    {
+        struct irq_desc *desc = irq_to_desc(irq);
+
+        ret = &global_used_vector_map;
+
+        if ( desc->chip_data->used_vectors )
+        {
+            printk(XENLOG_INFO "%s: Strange, unassigned irq %d already has used_vectors!\n",
+                   __func__, irq);
+        }
+        else
+        {
+            int vector;
+            
+            vector = irq_to_vector(irq);
+            if ( vector > 0 )
+            {
+                printk(XENLOG_INFO "%s: Strange, irq %d already assigned vector %d!\n",
+                       __func__, irq, vector);
+                
+                ASSERT(!test_bit(vector, ret));
+
+                set_bit(vector, ret);
+            }
+        }
+    }
+
+    return ret;
+}
+
  int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
  {
      /*
@@ -383,6 +442,7 @@ int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
      int cpu, err;
      unsigned long flags;
      cpumask_t tmp_mask;
+    vmask_t *irq_used_vectors = NULL;
  
      old_vector = irq_to_vector(irq);
      if (old_vector) {
@@ -397,6 +457,17 @@ int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
          return -EAGAIN;
  
      err = -ENOSPC;
+
+    /* This is the only place normal IRQs are ever marked
+     * as "in use".  If they're not in use yet, check to see
+     * if we need to assign a global vector mask. */
+    if ( irq_status[irq] == IRQ_USED )
+    {
+        irq_used_vectors = cfg->used_vectors;
+    }
+    else
+        irq_used_vectors = irq_get_used_vector_mask(irq);
+
      for_each_cpu_mask(cpu, *mask) {
          int new_cpu;
          int vector, offset;
@@ -422,8 +493,8 @@ next:
          if (test_bit(vector, used_vectors))
              goto next;
  
-        if (cfg->used_vectors
-            && test_bit(vector, cfg->used_vectors) )
+        if (irq_used_vectors
+            && test_bit(vector, irq_used_vectors) )
              goto next;
  
          for_each_cpu_mask(new_cpu, tmp_mask)
@@ -442,15 +513,22 @@ next:
              per_cpu(vector_irq, new_cpu)[vector] = irq;
          cfg->vector = vector;
          cpus_copy(cfg->cpu_mask, tmp_mask);
+
+        irq_status[irq] = IRQ_USED;
+        ASSERT((cfg->used_vectors == NULL)
+               || (cfg->used_vectors == irq_used_vectors));
+        cfg->used_vectors = irq_used_vectors;
+
+        if (IO_APIC_IRQ(irq))
+            irq_vector[irq] = vector;
+
          if ( cfg->used_vectors )
          {
              ASSERT(!test_bit(vector, cfg->used_vectors));
+
              set_bit(vector, cfg->used_vectors);
          }
  
-        irq_status[irq] = IRQ_USED;
-            if (IO_APIC_IRQ(irq))
-                    irq_vector[irq] = vector;
          err = 0;
          local_irq_restore(flags);
          break;
@@ -1621,7 +1699,7 @@ int map_domain_pirq(
  
      if ( !IS_PRIV(current->domain) &&
           !(IS_PRIV_FOR(current->domain, d) &&
-          irq_access_permitted(current->domain, pirq)))
+           irq_access_permitted(current->domain, pirq)))
          return -EPERM;
  
      if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs )
@@ -1673,11 +1751,22 @@ int map_domain_pirq(
  
          if ( desc->handler != &no_irq_type )
              dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
-              d->domain_id, irq);
+                    d->domain_id, irq);
          desc->handler = &pci_msi_type;
-        if ( opt_irq_perdev_vector_map
+
+        if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
               && !desc->chip_data->used_vectors )
+        {
              desc->chip_data->used_vectors = &pdev->info.used_vectors;
+            if ( desc->chip_data->vector != IRQ_VECTOR_UNASSIGNED )
+            {
+                int vector = desc->chip_data->vector;
+                ASSERT(!test_bit(vector, desc->chip_data->used_vectors));
+
+                set_bit(vector, desc->chip_data->used_vectors);
+            }
+        }
+
          set_domain_irq_pirq(d, irq, info);
          setup_msi_irq(msi_desc, irq);
          spin_unlock_irqrestore(&desc->lock, flags);
@@ -1687,9 +1776,12 @@ int map_domain_pirq(
          spin_lock_irqsave(&desc->lock, flags);
          set_domain_irq_pirq(d, irq, info);
          spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
+            printk(XENLOG_INFO "Per-device vector maps for GSIs not implemented yet.\n");
      }
  
- done:
+done:
      if ( ret )
          cleanup_domain_irq_pirq(d, irq, info);
      return ret;
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c

index 555ee0506d1d16d6ffe366efc0725fb7a9d209cd..0ceafb4d481454404a3293714a8af0977b9a46f0 100644 (file)
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -167,18 +167,35 @@ int __init amd_iov_detect(void)
          return -ENODEV;
      }
  
-    /* Enable use of per-device vector map unless otherwise
-     * specified */
-    if ( iommu_amd_perdev_vector_map )
+    /*
+     * AMD IOMMUs don't distinguish between vectors destined for
+     * different cpus when doing interrupt remapping.  This means
+     * that interrupts going through the same intremap table
+     * can't share the same vector.
+     *
+     * If irq_vector_map isn't specified, choose a sensible default:
+     * - If we're using per-device interemap tables, per-device
+     *   vector non-sharing maps
+     * - If we're using a global interemap table, global vector
+     *   non-sharing map
+     */
+    if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
      {
-        printk("AMD-Vi: Enabling per-device vector maps\n");
-        opt_irq_perdev_vector_map=1;
+        if ( amd_iommu_perdev_intremap )
+        {
+            printk("AMD-Vi: Enabling per-device vector maps\n");
+            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
+        }
+        else
+        {
+            printk("AMD-Vi: Enabling global vector map\n");
+            opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
+        }
      }
      else
      {
-        printk("AMD-Vi: WARNING - not enabling per-device vector maps\n");
+        printk("AMD-Vi: Not overriding irq_vector_map setting\n");
      }
-
      return scan_pci_devices();
  }
  
diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h

index da0b2b99d51fd674ea10f4abf91e55513c735298..1a37c4d4c2a2a3394ddcf8d21dc5c929e6483318 100644 (file)
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -46,6 +46,13 @@ extern u8 *irq_vector;
  
  extern bool_t opt_noirqbalance;
  
+#define OPT_IRQ_VECTOR_MAP_DEFAULT 0 /* Do the default thing  */
+#define OPT_IRQ_VECTOR_MAP_NONE    1 /* None */ 
+#define OPT_IRQ_VECTOR_MAP_GLOBAL  2 /* One global vector map (no vector sharing) */ 
+#define OPT_IRQ_VECTOR_MAP_PERDEV  3 /* Per-device vetor map (no vector sharing w/in a device) */
+
+extern int opt_irq_vector_map;
+
  /*
   * Per-cpu current frame pointer - the location of the last exception frame on
   * the stack
author	George Dunlap <george.dunlap@eu.citrix.com>
	Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)
committer	George Dunlap <george.dunlap@eu.citrix.com>
	Mon, 5 Sep 2011 14:00:15 +0000 (15:00 +0100)
docs/src/user.tex		patch \| blob \| history
xen/arch/x86/irq.c		patch \| blob \| history
xen/drivers/passthrough/amd/pci_amd_iommu.c		patch \| blob \| history
xen/include/asm-x86/irq.h		patch \| blob \| history