#include <xen/config.h>
#include <xen/sched.h>
#include <xen/lib.h>
+#include <xen/stdbool.h>
#include <xen/errno.h>
#include <xen/domain_page.h>
#include <xen/bitops.h>
{
return p2m_valid(pte) && pte.p2m.table;
}
-#if 0
static bool_t p2m_mapping(lpae_t pte)
{
return p2m_valid(pte) && !pte.p2m.table;
}
-#endif
+
+void p2m_dump_info(struct domain *d)
+{
+ struct p2m_domain *p2m = &d->arch.p2m;
+
+ spin_lock(&p2m->lock);
+ printk("p2m mappings for domain %d (vmid %d):\n",
+ d->domain_id, p2m->vmid);
+ BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
+ printk(" 1G mappings: %ld (shattered %ld)\n",
+ p2m->stats.mappings[1], p2m->stats.shattered[1]);
+ printk(" 2M mappings: %ld (shattered %ld)\n",
+ p2m->stats.mappings[2], p2m->stats.shattered[2]);
+ printk(" 4K mappings: %ld\n", p2m->stats.mappings[3]);
+ spin_unlock(&p2m->lock);
+}
void dump_p2m_lookup(struct domain *d, paddr_t addr)
{
clean_xen_dcache(*p);
}
-/* Allocate a new page table page and hook it in via the given entry */
-static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache)
+/*
+ * Allocate a new page table page and hook it in via the given entry.
+ * apply_one_level relies on this returning 0 on success
+ * and -ve on failure.
+ *
+ * If the existing entry is present then it must be a mapping and not
+ * a table and it will be shattered into the next level down.
+ *
+ * level_shift is the number of bits at the level we want to create.
+ */
+static int p2m_create_table(struct domain *d, lpae_t *entry,
+ int level_shift, bool_t flush_cache)
{
struct p2m_domain *p2m = &d->arch.p2m;
struct page_info *page;
- void *p;
+ lpae_t *p;
lpae_t pte;
+ int splitting = p2m_valid(*entry);
- BUG_ON(entry->p2m.valid);
+ BUG_ON(p2m_table(*entry));
page = alloc_domheap_page(NULL, 0);
if ( page == NULL )
page_list_add(page, &p2m->pages);
p = __map_domain_page(page);
- clear_page(p);
+ if ( splitting )
+ {
+ p2m_type_t t = entry->p2m.type;
+ unsigned long base_pfn = entry->p2m.base;
+ int i;
+
+ /*
+ * We are either splitting a first level 1G page into 512 second level
+ * 2M pages, or a second level 2M page into 512 third level 4K pages.
+ */
+ for ( i=0 ; i < LPAE_ENTRIES; i++ )
+ {
+ pte = mfn_to_p2m_entry(base_pfn + (i<<(level_shift-LPAE_SHIFT)),
+ MATTR_MEM, t);
+
+ /*
+ * First and second level super pages set p2m.table = 0, but
+ * third level entries set table = 1.
+ */
+ if ( level_shift - LPAE_SHIFT )
+ pte.p2m.table = 0;
+
+ write_pte(&p[i], pte);
+ }
+ }
+ else
+ clear_page(p);
+
if ( flush_cache )
clean_xen_dcache_va_range(p, PAGE_SIZE);
+
unmap_domain_page(p);
pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
CACHEFLUSH,
};
-static void p2m_put_page(const lpae_t pte)
+/* Put any references on the single 4K page referenced by pte. TODO:
+ * Handle superpages, for now we only take special references for leaf
+ * pages (specifically foreign ones, which can't be super mapped today).
+ */
+static void p2m_put_l3_page(const lpae_t pte)
{
+ ASSERT(p2m_valid(pte));
+
/* TODO: Handle other p2m types
*
* It's safe to do the put_page here because page_alloc will
}
}
+/*
+ * Returns true if start_gpaddr..end_gpaddr contains at least one
+ * suitably aligned level_size mappping of maddr.
+ *
+ * So long as the range is large enough the end_gpaddr need not be
+ * aligned (callers should create one superpage mapping based on this
+ * result and then call this again on the new range, eventually the
+ * slop at the end will cause this function to return false).
+ */
+static bool_t is_mapping_aligned(const paddr_t start_gpaddr,
+ const paddr_t end_gpaddr,
+ const paddr_t maddr,
+ const paddr_t level_size)
+{
+ const paddr_t level_mask = level_size - 1;
+
+ /* No hardware superpages at level 0 */
+ if ( level_size == ZEROETH_SIZE )
+ return false;
+
+ /*
+ * A range smaller than the size of a superpage at this level
+ * cannot be superpage aligned.
+ */
+ if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 )
+ return false;
+
+ /* Both the gpaddr and maddr must be aligned */
+ if ( start_gpaddr & level_mask )
+ return false;
+ if ( maddr & level_mask )
+ return false;
+ return true;
+}
+
+#define P2M_ONE_DESCEND 0
+#define P2M_ONE_PROGRESS_NOP 0x1
+#define P2M_ONE_PROGRESS 0x10
+
+/*
+ * 0 == (P2M_ONE_DESCEND) continue to descend the tree
+ * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush,
+ * entry, addr and maddr updated. Return value is an
+ * indication of the amount of work done (for preemption).
+ * -ve == (-Exxx) error.
+ */
+static int apply_one_level(struct domain *d,
+ lpae_t *entry,
+ unsigned int level,
+ bool_t flush_cache,
+ enum p2m_operation op,
+ paddr_t start_gpaddr,
+ paddr_t end_gpaddr,
+ paddr_t *addr,
+ paddr_t *maddr,
+ bool_t *flush,
+ int mattr,
+ p2m_type_t t)
+{
+ /* Helpers to lookup the properties of each level */
+ const paddr_t level_sizes[] =
+ { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE };
+ const paddr_t level_masks[] =
+ { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
+ const paddr_t level_shifts[] =
+ { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT };
+ const paddr_t level_size = level_sizes[level];
+ const paddr_t level_mask = level_masks[level];
+ const paddr_t level_shift = level_shifts[level];
+
+ struct p2m_domain *p2m = &d->arch.p2m;
+ lpae_t pte;
+ const lpae_t orig_pte = *entry;
+ int rc;
+
+ BUG_ON(level > 3);
+
+ switch ( op )
+ {
+ case ALLOCATE:
+ ASSERT(level < 3 || !p2m_valid(orig_pte));
+ ASSERT(*maddr == 0);
+
+ if ( p2m_valid(orig_pte) )
+ return P2M_ONE_DESCEND;
+
+ if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
+ {
+ struct page_info *page;
+
+ page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
+ if ( page )
+ {
+ pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
+ if ( level < 3 )
+ pte.p2m.table = 0;
+ p2m_write_pte(entry, pte, flush_cache);
+ p2m->stats.mappings[level]++;
+
+ *addr += level_size;
+ *maddr += level_size;
+
+ return P2M_ONE_PROGRESS;
+ }
+ else if ( level == 3 )
+ return -ENOMEM;
+ }
+
+ /* L3 is always suitably aligned for mapping (handled, above) */
+ BUG_ON(level == 3);
+
+ /*
+ * If we get here then we failed to allocate a sufficiently
+ * large contiguous region for this level (which can't be
+ * L3). Create a page table and continue to descend so we try
+ * smaller allocations.
+ */
+ rc = p2m_create_table(d, entry, 0, flush_cache);
+ if ( rc < 0 )
+ return rc;
+
+ return P2M_ONE_DESCEND;
+
+ case INSERT:
+ if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
+ /* We do not handle replacing an existing table with a superpage */
+ (level == 3 || !p2m_table(orig_pte)) )
+ {
+ /* New mapping is superpage aligned, make it */
+ pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t);
+ if ( level < 3 )
+ pte.p2m.table = 0; /* Superpage entry */
+
+ p2m_write_pte(entry, pte, flush_cache);
+
+ *flush |= p2m_valid(orig_pte);
+
+ *addr += level_size;
+ *maddr += level_size;
+
+ if ( p2m_valid(orig_pte) )
+ {
+ /*
+ * We can't currently get here for an existing table
+ * mapping, since we don't handle replacing an
+ * existing table with a superpage. If we did we would
+ * need to handle freeing (and accounting) for the bit
+ * of the p2m tree which we would be about to lop off.
+ */
+ BUG_ON(level < 3 && p2m_table(orig_pte));
+ if ( level == 3 )
+ p2m_put_l3_page(orig_pte);
+ }
+ else /* New mapping */
+ p2m->stats.mappings[level]++;
+
+ return P2M_ONE_PROGRESS;
+ }
+ else
+ {
+ /* New mapping is not superpage aligned, create a new table entry */
+
+ /* L3 is always suitably aligned for mapping (handled, above) */
+ BUG_ON(level == 3);
+
+ /* Not present -> create table entry and descend */
+ if ( !p2m_valid(orig_pte) )
+ {
+ rc = p2m_create_table(d, entry, 0, flush_cache);
+ if ( rc < 0 )
+ return rc;
+ return P2M_ONE_DESCEND;
+ }
+
+ /* Existing superpage mapping -> shatter and descend */
+ if ( p2m_mapping(orig_pte) )
+ {
+ *flush = true;
+ rc = p2m_create_table(d, entry,
+ level_shift - PAGE_SHIFT, flush_cache);
+ if ( rc < 0 )
+ return rc;
+
+ p2m->stats.shattered[level]++;
+ p2m->stats.mappings[level]--;
+ p2m->stats.mappings[level+1] += LPAE_ENTRIES;
+ } /* else: an existing table mapping -> descend */
+
+ BUG_ON(!p2m_table(*entry));
+
+ return P2M_ONE_DESCEND;
+ }
+
+ break;
+
+ case RELINQUISH:
+ case REMOVE:
+ if ( !p2m_valid(orig_pte) )
+ {
+ /* Progress up to next boundary */
+ *addr = (*addr + level_size) & level_mask;
+ return P2M_ONE_PROGRESS_NOP;
+ }
+
+ if ( level < 3 && p2m_table(orig_pte) )
+ return P2M_ONE_DESCEND;
+
+ *flush = true;
+
+ memset(&pte, 0x00, sizeof(pte));
+ p2m_write_pte(entry, pte, flush_cache);
+
+ *addr += level_size;
+
+ p2m->stats.mappings[level]--;
+
+ if ( level == 3 )
+ p2m_put_l3_page(orig_pte);
+
+ /*
+ * This is still a single pte write, no matter the level, so no need to
+ * scale.
+ */
+ return P2M_ONE_PROGRESS;
+
+ case CACHEFLUSH:
+ if ( !p2m_valid(orig_pte) )
+ {
+ *addr = (*addr + level_size) & level_mask;
+ return P2M_ONE_PROGRESS_NOP;
+ }
+
+ if ( level < 3 && p2m_table(orig_pte) )
+ return P2M_ONE_DESCEND;
+
+ /*
+ * could flush up to the next superpage boundary, but would
+ * need to be careful about preemption, so just do one 4K page
+ * now and return P2M_ONE_PROGRESS{,_NOP} so that the caller will
+ * continue to loop over the rest of the range.
+ */
+ if ( p2m_is_ram(orig_pte.p2m.type) )
+ {
+ unsigned long offset = paddr_to_pfn(*addr & ~level_mask);
+ flush_page_to_ram(orig_pte.p2m.base + offset);
+
+ *addr += PAGE_SIZE;
+ return P2M_ONE_PROGRESS;
+ }
+ else
+ {
+ *addr += PAGE_SIZE;
+ return P2M_ONE_PROGRESS_NOP;
+ }
+ }
+
+ BUG(); /* Should never get here */
+}
+
static int apply_p2m_changes(struct domain *d,
enum p2m_operation op,
paddr_t start_gpaddr,
int mattr,
p2m_type_t t)
{
- int rc;
+ int rc, ret;
struct p2m_domain *p2m = &d->arch.p2m;
lpae_t *first = NULL, *second = NULL, *third = NULL;
paddr_t addr;
cur_first_offset = ~0,
cur_second_offset = ~0;
unsigned long count = 0;
- unsigned int flush = 0;
- bool_t populate = (op == INSERT || op == ALLOCATE);
- lpae_t pte;
+ bool_t flush = false;
bool_t flush_pt;
/* Some IOMMU don't support coherent PT walk. When the p2m is
addr = start_gpaddr;
while ( addr < end_gpaddr )
{
+ /*
+ * Arbitrarily, preempt every 512 operations or 8192 nops.
+ * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
+ *
+ * count is initialised to 0 above, so we are guaranteed to
+ * always make at least one pass.
+ */
+
+ if ( op == RELINQUISH && count >= 0x2000 )
+ {
+ if ( hypercall_preempt_check() )
+ {
+ p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
+ rc = -ERESTART;
+ goto out;
+ }
+ count = 0;
+ }
+
if ( cur_first_page != p2m_first_level_index(addr) )
{
if ( first ) unmap_domain_page(first);
cur_first_page = p2m_first_level_index(addr);
}
- if ( !p2m_valid(first[first_table_offset(addr)]) )
- {
- if ( !populate )
- {
- addr = (addr + FIRST_SIZE) & FIRST_MASK;
- continue;
- }
+ /* We only use a 3 level p2m at the moment, so no level 0,
+ * current hardware doesn't support super page mappings at
+ * level 0 anyway */
- rc = p2m_create_table(d, &first[first_table_offset(addr)],
- flush_pt);
- if ( rc < 0 )
- {
- printk("p2m_populate_ram: L1 failed\n");
- goto out;
- }
- }
+ ret = apply_one_level(d, &first[first_table_offset(addr)],
+ 1, flush_pt, op,
+ start_gpaddr, end_gpaddr,
+ &addr, &maddr, &flush,
+ mattr, t);
+ if ( ret < 0 ) { rc = ret ; goto out; }
+ count += ret;
+ if ( ret != P2M_ONE_DESCEND ) continue;
BUG_ON(!p2m_valid(first[first_table_offset(addr)]));
}
/* else: second already valid */
- if ( !p2m_valid(second[second_table_offset(addr)]) )
- {
- if ( !populate )
- {
- addr = (addr + SECOND_SIZE) & SECOND_MASK;
- continue;
- }
-
- rc = p2m_create_table(d, &second[second_table_offset(addr)],
- flush_pt);
- if ( rc < 0 ) {
- printk("p2m_populate_ram: L2 failed\n");
- goto out;
- }
- }
+ ret = apply_one_level(d,&second[second_table_offset(addr)],
+ 2, flush_pt, op,
+ start_gpaddr, end_gpaddr,
+ &addr, &maddr, &flush,
+ mattr, t);
+ if ( ret < 0 ) { rc = ret ; goto out; }
+ count += ret;
+ if ( ret != P2M_ONE_DESCEND ) continue;
- BUG_ON(!second[second_table_offset(addr)].p2m.valid);
+ BUG_ON(!p2m_valid(second[second_table_offset(addr)]));
if ( cur_second_offset != second_table_offset(addr) )
{
cur_second_offset = second_table_offset(addr);
}
- pte = third[third_table_offset(addr)];
-
- flush |= pte.p2m.valid;
-
- switch (op) {
- case ALLOCATE:
- {
- /* Allocate a new RAM page and attach */
- struct page_info *page;
-
- ASSERT(!pte.p2m.valid);
- rc = -ENOMEM;
- page = alloc_domheap_page(d, 0);
- if ( page == NULL ) {
- printk("p2m_populate_ram: failed to allocate page\n");
- goto out;
- }
-
- pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
-
- p2m_write_pte(&third[third_table_offset(addr)],
- pte, flush_pt);
- }
- break;
- case INSERT:
- {
- if ( pte.p2m.valid )
- p2m_put_page(pte);
- pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
- p2m_write_pte(&third[third_table_offset(addr)],
- pte, flush_pt);
- maddr += PAGE_SIZE;
- }
- break;
- case RELINQUISH:
- case REMOVE:
- {
- if ( !pte.p2m.valid )
- {
- count++;
- break;
- }
-
- p2m_put_page(pte);
-
- count += 0x10;
-
- memset(&pte, 0x00, sizeof(pte));
- p2m_write_pte(&third[third_table_offset(addr)],
- pte, flush_pt);
- count++;
- }
- break;
-
- case CACHEFLUSH:
- {
- if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
- break;
-
- flush_page_to_ram(pte.p2m.base);
- }
- break;
- }
-
- /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
- if ( op == RELINQUISH && count >= 0x2000 )
- {
- if ( hypercall_preempt_check() )
- {
- p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
- rc = -ERESTART;
- goto out;
- }
- count = 0;
- }
-
- /* Got the next page */
- addr += PAGE_SIZE;
+ ret = apply_one_level(d, &third[third_table_offset(addr)],
+ 3, flush_pt, op,
+ start_gpaddr, end_gpaddr,
+ &addr, &maddr, &flush,
+ mattr, t);
+ if ( ret < 0 ) { rc = ret ; goto out; }
+ /* L3 had better have done something! We cannot descend any further */
+ BUG_ON(ret == P2M_ONE_DESCEND);
+ count += ret;
}
if ( flush )