xen: arm: use superpages in p2m when pages are suitably aligned

author Ian Campbell <ian.campbell@citrix.com>

Wed, 9 Jul 2014 12:07:45 +0000 (13:07 +0100)

committer Ian Campbell <ian.campbell@citrix.com>

Thu, 10 Jul 2014 14:36:24 +0000 (15:36 +0100)
author Ian Campbell <ian.campbell@citrix.com>
Wed, 9 Jul 2014 12:07:45 +0000 (13:07 +0100)
committer Ian Campbell <ian.campbell@citrix.com>
Thu, 10 Jul 2014 14:36:24 +0000 (15:36 +0100)
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c

index 0c16f23e4d23c0d037ef22fdaa53ee9a62312253..87902ef52fd07edfb661916165586a1106f949ae 100644 (file)
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -761,6 +761,7 @@ int domain_relinquish_resources(struct domain *d)
  
  void arch_dump_domain_info(struct domain *d)
  {
+    p2m_dump_info(d);
  }
  
  
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c

index cb22e9cbf34a9814ef630155472f00a93a8eb186..c0c011a3cea02893675f6ba68e42eeccd64f7a7d 100644 (file)
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -1,6 +1,7 @@
  #include <xen/config.h>
  #include <xen/sched.h>
  #include <xen/lib.h>
+#include <xen/stdbool.h>
  #include <xen/errno.h>
  #include <xen/domain_page.h>
  #include <xen/bitops.h>
@@ -25,12 +26,26 @@ static bool_t p2m_table(lpae_t pte)
  {
      return p2m_valid(pte) && pte.p2m.table;
  }
-#if 0
  static bool_t p2m_mapping(lpae_t pte)
  {
      return p2m_valid(pte) && !pte.p2m.table;
  }
-#endif
+
+void p2m_dump_info(struct domain *d)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+
+    spin_lock(&p2m->lock);
+    printk("p2m mappings for domain %d (vmid %d):\n",
+           d->domain_id, p2m->vmid);
+    BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
+    printk("  1G mappings: %ld (shattered %ld)\n",
+           p2m->stats.mappings[1], p2m->stats.shattered[1]);
+    printk("  2M mappings: %ld (shattered %ld)\n",
+           p2m->stats.mappings[2], p2m->stats.shattered[2]);
+    printk("  4K mappings: %ld\n", p2m->stats.mappings[3]);
+    spin_unlock(&p2m->lock);
+}
  
  void dump_p2m_lookup(struct domain *d, paddr_t addr)
  {
@@ -287,15 +302,26 @@ static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool_t flush_cache)
          clean_xen_dcache(*p);
  }
  
-/* Allocate a new page table page and hook it in via the given entry */
-static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache)
+/*
+ * Allocate a new page table page and hook it in via the given entry.
+ * apply_one_level relies on this returning 0 on success
+ * and -ve on failure.
+ *
+ * If the existing entry is present then it must be a mapping and not
+ * a table and it will be shattered into the next level down.
+ *
+ * level_shift is the number of bits at the level we want to create.
+ */
+static int p2m_create_table(struct domain *d, lpae_t *entry,
+                            int level_shift, bool_t flush_cache)
  {
      struct p2m_domain *p2m = &d->arch.p2m;
      struct page_info *page;
-    void *p;
+    lpae_t *p;
      lpae_t pte;
+    int splitting = p2m_valid(*entry);
  
-    BUG_ON(entry->p2m.valid);
+    BUG_ON(p2m_table(*entry));
  
      page = alloc_domheap_page(NULL, 0);
      if ( page == NULL )
@@ -304,9 +330,37 @@ static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache)
      page_list_add(page, &p2m->pages);
  
      p = __map_domain_page(page);
-    clear_page(p);
+    if ( splitting )
+    {
+        p2m_type_t t = entry->p2m.type;
+        unsigned long base_pfn = entry->p2m.base;
+        int i;
+
+        /*
+         * We are either splitting a first level 1G page into 512 second level
+         * 2M pages, or a second level 2M page into 512 third level 4K pages.
+         */
+         for ( i=0 ; i < LPAE_ENTRIES; i++ )
+         {
+             pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)),
+                                    MATTR_MEM, t);
+
+             /*
+              * First and second level super pages set p2m.table = 0, but
+              * third level entries set table = 1.
+              */
+             if ( level_shift - LPAE_SHIFT )
+                 pte.p2m.table = 0;
+
+             write_pte(&p[i], pte);
+         }
+    }
+    else
+        clear_page(p);
+
      if ( flush_cache )
          clean_xen_dcache_va_range(p, PAGE_SIZE);
+
      unmap_domain_page(p);
  
      pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
@@ -324,8 +378,14 @@ enum p2m_operation {
      CACHEFLUSH,
  };
  
-static void p2m_put_page(const lpae_t pte)
+/* Put any references on the single 4K page referenced by pte.  TODO:
+ * Handle superpages, for now we only take special references for leaf
+ * pages (specifically foreign ones, which can't be super mapped today).
+ */
+static void p2m_put_l3_page(const lpae_t pte)
  {
+    ASSERT(p2m_valid(pte));
+
      /* TODO: Handle other p2m types
       *
       * It's safe to do the put_page here because page_alloc will
@@ -341,6 +401,265 @@ static void p2m_put_page(const lpae_t pte)
      }
  }
  
+/*
+ * Returns true if start_gpaddr..end_gpaddr contains at least one
+ * suitably aligned level_size mappping of maddr.
+ *
+ * So long as the range is large enough the end_gpaddr need not be
+ * aligned (callers should create one superpage mapping based on this
+ * result and then call this again on the new range, eventually the
+ * slop at the end will cause this function to return false).
+ */
+static bool_t is_mapping_aligned(const paddr_t start_gpaddr,
+                                 const paddr_t end_gpaddr,
+                                 const paddr_t maddr,
+                                 const paddr_t level_size)
+{
+    const paddr_t level_mask = level_size - 1;
+
+    /* No hardware superpages at level 0 */
+    if ( level_size == ZEROETH_SIZE )
+        return false;
+
+    /*
+     * A range smaller than the size of a superpage at this level
+     * cannot be superpage aligned.
+     */
+    if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 )
+        return false;
+
+    /* Both the gpaddr and maddr must be aligned */
+    if ( start_gpaddr & level_mask )
+        return false;
+    if ( maddr & level_mask )
+        return false;
+    return true;
+}
+
+#define P2M_ONE_DESCEND        0
+#define P2M_ONE_PROGRESS_NOP   0x1
+#define P2M_ONE_PROGRESS       0x10
+
+/*
+ * 0   == (P2M_ONE_DESCEND) continue to descend the tree
+ * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush,
+ *        entry, addr and maddr updated.  Return value is an
+ *        indication of the amount of work done (for preemption).
+ * -ve == (-Exxx) error.
+ */
+static int apply_one_level(struct domain *d,
+                           lpae_t *entry,
+                           unsigned int level,
+                           bool_t flush_cache,
+                           enum p2m_operation op,
+                           paddr_t start_gpaddr,
+                           paddr_t end_gpaddr,
+                           paddr_t *addr,
+                           paddr_t *maddr,
+                           bool_t *flush,
+                           int mattr,
+                           p2m_type_t t)
+{
+    /* Helpers to lookup the properties of each level */
+    const paddr_t level_sizes[] =
+        { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE };
+    const paddr_t level_masks[] =
+        { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
+    const paddr_t level_shifts[] =
+        { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT };
+    const paddr_t level_size = level_sizes[level];
+    const paddr_t level_mask = level_masks[level];
+    const paddr_t level_shift = level_shifts[level];
+
+    struct p2m_domain *p2m = &d->arch.p2m;
+    lpae_t pte;
+    const lpae_t orig_pte = *entry;
+    int rc;
+
+    BUG_ON(level > 3);
+
+    switch ( op )
+    {
+    case ALLOCATE:
+        ASSERT(level < 3 || !p2m_valid(orig_pte));
+        ASSERT(*maddr == 0);
+
+        if ( p2m_valid(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
+        {
+            struct page_info *page;
+
+            page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
+            if ( page )
+            {
+                pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
+                if ( level < 3 )
+                    pte.p2m.table = 0;
+                p2m_write_pte(entry, pte, flush_cache);
+                p2m->stats.mappings[level]++;
+
+                *addr += level_size;
+                *maddr += level_size;
+
+                return P2M_ONE_PROGRESS;
+            }
+            else if ( level == 3 )
+                return -ENOMEM;
+        }
+
+        /* L3 is always suitably aligned for mapping (handled, above) */
+        BUG_ON(level == 3);
+
+        /*
+         * If we get here then we failed to allocate a sufficiently
+         * large contiguous region for this level (which can't be
+         * L3). Create a page table and continue to descend so we try
+         * smaller allocations.
+         */
+        rc = p2m_create_table(d, entry, 0, flush_cache);
+        if ( rc < 0 )
+            return rc;
+
+        return P2M_ONE_DESCEND;
+
+    case INSERT:
+        if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
+           /* We do not handle replacing an existing table with a superpage */
+             (level == 3 || !p2m_table(orig_pte)) )
+        {
+            /* New mapping is superpage aligned, make it */
+            pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t);
+            if ( level < 3 )
+                pte.p2m.table = 0; /* Superpage entry */
+
+            p2m_write_pte(entry, pte, flush_cache);
+
+            *flush |= p2m_valid(orig_pte);
+
+            *addr += level_size;
+            *maddr += level_size;
+
+            if ( p2m_valid(orig_pte) )
+            {
+                /*
+                 * We can't currently get here for an existing table
+                 * mapping, since we don't handle replacing an
+                 * existing table with a superpage. If we did we would
+                 * need to handle freeing (and accounting) for the bit
+                 * of the p2m tree which we would be about to lop off.
+                 */
+                BUG_ON(level < 3 && p2m_table(orig_pte));
+                if ( level == 3 )
+                    p2m_put_l3_page(orig_pte);
+            }
+            else /* New mapping */
+                p2m->stats.mappings[level]++;
+
+            return P2M_ONE_PROGRESS;
+        }
+        else
+        {
+            /* New mapping is not superpage aligned, create a new table entry */
+
+            /* L3 is always suitably aligned for mapping (handled, above) */
+            BUG_ON(level == 3);
+
+            /* Not present -> create table entry and descend */
+            if ( !p2m_valid(orig_pte) )
+            {
+                rc = p2m_create_table(d, entry, 0, flush_cache);
+                if ( rc < 0 )
+                    return rc;
+                return P2M_ONE_DESCEND;
+            }
+
+            /* Existing superpage mapping -> shatter and descend */
+            if ( p2m_mapping(orig_pte) )
+            {
+                *flush = true;
+                rc = p2m_create_table(d, entry,
+                                      level_shift - PAGE_SHIFT, flush_cache);
+                if ( rc < 0 )
+                    return rc;
+
+                p2m->stats.shattered[level]++;
+                p2m->stats.mappings[level]--;
+                p2m->stats.mappings[level+1] += LPAE_ENTRIES;
+            } /* else: an existing table mapping -> descend */
+
+            BUG_ON(!p2m_table(*entry));
+
+            return P2M_ONE_DESCEND;
+        }
+
+        break;
+
+    case RELINQUISH:
+    case REMOVE:
+        if ( !p2m_valid(orig_pte) )
+        {
+            /* Progress up to next boundary */
+            *addr = (*addr + level_size) & level_mask;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+
+        if ( level < 3 && p2m_table(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        *flush = true;
+
+        memset(&pte, 0x00, sizeof(pte));
+        p2m_write_pte(entry, pte, flush_cache);
+
+        *addr += level_size;
+
+        p2m->stats.mappings[level]--;
+
+        if ( level == 3 )
+            p2m_put_l3_page(orig_pte);
+
+        /*
+         * This is still a single pte write, no matter the level, so no need to
+         * scale.
+         */
+        return P2M_ONE_PROGRESS;
+
+    case CACHEFLUSH:
+        if ( !p2m_valid(orig_pte) )
+        {
+            *addr = (*addr + level_size) & level_mask;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+
+        if ( level < 3 && p2m_table(orig_pte) )
+            return P2M_ONE_DESCEND;
+
+        /*
+         * could flush up to the next superpage boundary, but would
+         * need to be careful about preemption, so just do one 4K page
+         * now and return P2M_ONE_PROGRESS{,_NOP} so that the caller will
+         * continue to loop over the rest of the range.
+         */
+        if ( p2m_is_ram(orig_pte.p2m.type) )
+        {
+            unsigned long offset = paddr_to_pfn(*addr & ~level_mask);
+            flush_page_to_ram(orig_pte.p2m.base + offset);
+
+            *addr += PAGE_SIZE;
+            return P2M_ONE_PROGRESS;
+        }
+        else
+        {
+            *addr += PAGE_SIZE;
+            return P2M_ONE_PROGRESS_NOP;
+        }
+    }
+
+    BUG(); /* Should never get here */
+}
+
  static int apply_p2m_changes(struct domain *d,
                       enum p2m_operation op,
                       paddr_t start_gpaddr,
@@ -349,7 +668,7 @@ static int apply_p2m_changes(struct domain *d,
                       int mattr,
                       p2m_type_t t)
  {
-    int rc;
+    int rc, ret;
      struct p2m_domain *p2m = &d->arch.p2m;
      lpae_t *first = NULL, *second = NULL, *third = NULL;
      paddr_t addr;
@@ -357,9 +676,7 @@ static int apply_p2m_changes(struct domain *d,
                    cur_first_offset = ~0,
                    cur_second_offset = ~0;
      unsigned long count = 0;
-    unsigned int flush = 0;
-    bool_t populate = (op == INSERT || op == ALLOCATE);
-    lpae_t pte;
+    bool_t flush = false;
      bool_t flush_pt;
  
      /* Some IOMMU don't support coherent PT walk. When the p2m is
@@ -373,6 +690,25 @@ static int apply_p2m_changes(struct domain *d,
      addr = start_gpaddr;
      while ( addr < end_gpaddr )
      {
+        /*
+         * Arbitrarily, preempt every 512 operations or 8192 nops.
+         * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
+         *
+         * count is initialised to 0 above, so we are guaranteed to
+         * always make at least one pass.
+         */
+
+        if ( op == RELINQUISH && count >= 0x2000 )
+        {
+            if ( hypercall_preempt_check() )
+            {
+                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
+                rc = -ERESTART;
+                goto out;
+            }
+            count = 0;
+        }
+
          if ( cur_first_page != p2m_first_level_index(addr) )
          {
              if ( first ) unmap_domain_page(first);
@@ -385,22 +721,18 @@ static int apply_p2m_changes(struct domain *d,
              cur_first_page = p2m_first_level_index(addr);
          }
  
-        if ( !p2m_valid(first[first_table_offset(addr)]) )
-        {
-            if ( !populate )
-            {
-                addr = (addr + FIRST_SIZE) & FIRST_MASK;
-                continue;
-            }
+        /* We only use a 3 level p2m at the moment, so no level 0,
+         * current hardware doesn't support super page mappings at
+         * level 0 anyway */
  
-            rc = p2m_create_table(d, &first[first_table_offset(addr)],
-                                  flush_pt);
-            if ( rc < 0 )
-            {
-                printk("p2m_populate_ram: L1 failed\n");
-                goto out;
-            }
-        }
+        ret = apply_one_level(d, &first[first_table_offset(addr)],
+                              1, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        count += ret;
+        if ( ret != P2M_ONE_DESCEND ) continue;
  
          BUG_ON(!p2m_valid(first[first_table_offset(addr)]));
  
@@ -412,23 +744,16 @@ static int apply_p2m_changes(struct domain *d,
          }
          /* else: second already valid */
  
-        if ( !p2m_valid(second[second_table_offset(addr)]) )
-        {
-            if ( !populate )
-            {
-                addr = (addr + SECOND_SIZE) & SECOND_MASK;
-                continue;
-            }
-
-            rc = p2m_create_table(d, &second[second_table_offset(addr)],
-                                  flush_pt);
-            if ( rc < 0 ) {
-                printk("p2m_populate_ram: L2 failed\n");
-                goto out;
-            }
-        }
+        ret = apply_one_level(d,&second[second_table_offset(addr)],
+                              2, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        count += ret;
+        if ( ret != P2M_ONE_DESCEND ) continue;
  
-        BUG_ON(!second[second_table_offset(addr)].p2m.valid);
+        BUG_ON(!p2m_valid(second[second_table_offset(addr)]));
  
          if ( cur_second_offset != second_table_offset(addr) )
          {
@@ -438,84 +763,15 @@ static int apply_p2m_changes(struct domain *d,
              cur_second_offset = second_table_offset(addr);
          }
  
-        pte = third[third_table_offset(addr)];
-
-        flush |= pte.p2m.valid;
-
-        switch (op) {
-            case ALLOCATE:
-                {
-                    /* Allocate a new RAM page and attach */
-                    struct page_info *page;
-
-                    ASSERT(!pte.p2m.valid);
-                    rc = -ENOMEM;
-                    page = alloc_domheap_page(d, 0);
-                    if ( page == NULL ) {
-                        printk("p2m_populate_ram: failed to allocate page\n");
-                        goto out;
-                    }
-
-                    pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
-
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                }
-                break;
-            case INSERT:
-                {
-                    if ( pte.p2m.valid )
-                        p2m_put_page(pte);
-                    pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                    maddr += PAGE_SIZE;
-                }
-                break;
-            case RELINQUISH:
-            case REMOVE:
-                {
-                    if ( !pte.p2m.valid )
-                    {
-                        count++;
-                        break;
-                    }
-
-                    p2m_put_page(pte);
-
-                    count += 0x10;
-
-                    memset(&pte, 0x00, sizeof(pte));
-                    p2m_write_pte(&third[third_table_offset(addr)],
-                                  pte, flush_pt);
-                    count++;
-                }
-                break;
-
-            case CACHEFLUSH:
-                {
-                    if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
-                        break;
-
-                    flush_page_to_ram(pte.p2m.base);
-                }
-                break;
-        }
-
-        /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
-        if ( op == RELINQUISH && count >= 0x2000 )
-        {
-            if ( hypercall_preempt_check() )
-            {
-                p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
-                rc = -ERESTART;
-                goto out;
-            }
-            count = 0;
-        }
-
-        /* Got the next page */
-        addr += PAGE_SIZE;
+        ret = apply_one_level(d, &third[third_table_offset(addr)],
+                              3, flush_pt, op,
+                              start_gpaddr, end_gpaddr,
+                              &addr, &maddr, &flush,
+                              mattr, t);
+        if ( ret < 0 ) { rc = ret ; goto out; }
+        /* L3 had better have done something! We cannot descend any further */
+        BUG_ON(ret == P2M_ONE_DESCEND);
+        count += ret;
      }
  
      if ( flush )
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h

index 911d32df1cbd6056e776bbb5ba7987e1f5692bfb..327a79dcb2e5f40837f8a157d28bbebecc215bc7 100644 (file)
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -29,6 +29,15 @@ struct p2m_domain {
       * resume the search. Apart from during teardown this can only
       * decrease. */
      unsigned long lowest_mapped_gfn;
+
+    /* Gather some statistics for information purposes only */
+    struct {
+        /* Number of mappings at each p2m tree level */
+        unsigned long mappings[4];
+        /* Number of times we have shattered a mapping
+         * at each p2m tree level. */
+        unsigned long shattered[4];
+    } stats;
  };
  
  /* List of possible type for each page in the p2m entry.
@@ -79,6 +88,9 @@ int p2m_alloc_table(struct domain *d);
  void p2m_save_state(struct vcpu *p);
  void p2m_restore_state(struct vcpu *n);
  
+/* Print debugging/statistial info about a domain's p2m */
+void p2m_dump_info(struct domain *d);
+
  /* Look up the MFN corresponding to a domain's PFN. */
  paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
author	Ian Campbell <ian.campbell@citrix.com>
	Wed, 9 Jul 2014 12:07:45 +0000 (13:07 +0100)
committer	Ian Campbell <ian.campbell@citrix.com>
	Thu, 10 Jul 2014 14:36:24 +0000 (15:36 +0100)
xen/arch/arm/domain.c		patch \| blob \| history
xen/arch/arm/p2m.c		patch \| blob \| history
xen/include/asm-arm/p2m.h		patch \| blob \| history