epte->mfn += i * trunk;
epte->snp = (iommu_enabled && iommu_snoop);
ASSERT(!epte->rsvd1);
- ASSERT(!epte->avail1);
ASSERT(!epte->avail3);
ept_p2m_type_to_flags(epte, epte->sa_p2mt, epte->access);
return GUEST_TABLE_NORMAL_PAGE;
}
-static bool_t ept_invalidate_emt(mfn_t mfn)
+/*
+ * Invalidate (via setting the EMT field to an invalid value) all valid
+ * present entries in the given page table, optionally marking the entries
+ * also for their subtrees needing P2M type re-calculation.
+ */
+static bool_t ept_invalidate_emt(mfn_t mfn, bool_t recalc)
{
ept_entry_t *epte = map_domain_page(mfn_x(mfn));
unsigned int i;
ept_entry_t e = atomic_read_ept_entry(&epte[i]);
if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
- e.emt == MTRR_NUM_TYPES )
+ (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) )
continue;
e.emt = MTRR_NUM_TYPES;
+ if ( recalc )
+ e.recalc = 1;
atomic_write_ept_entry(&epte[i], e);
changed = 1;
}
return changed;
}
-bool_t ept_handle_misconfig(uint64_t gpa)
+/*
+ * Resolve deliberately mis-configured (EMT field set to an invalid value)
+ * entries in the page table hierarchy for the given GFN:
+ * - calculate the correct value for the EMT field,
+ * - if marked so, re-calculate the P2M type,
+ * - propagate EMT and re-calculation flag down to the next page table level
+ * for entries not involved in the translation of the given GFN.
+ * Returns:
+ * - negative errno values in error,
+ * - zero if no adjustment was done,
+ * - a positive value if at least one adjustment was done.
+ */
+static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
{
- struct vcpu *curr = current;
- struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
struct ept_data *ept = &p2m->ept;
unsigned int level = ept_get_wl(ept);
- unsigned long gfn = PFN_DOWN(gpa);
unsigned long mfn = ept_get_asr(ept);
ept_entry_t *epte;
- int okay;
+ int rc = 0;
if ( !mfn )
return 0;
- p2m_lock(p2m);
-
- okay = -curr->arch.hvm_vmx.ept_spurious_misconfig;
for ( ; ; --level )
{
ept_entry_t e;
_mfn(e.mfn), 0, &ipat,
e.sa_p2mt == p2m_mmio_direct);
e.ipat = ipat;
+ if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
+ {
+ e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i)
+ ? p2m_ram_logdirty : p2m_ram_rw;
+ ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access);
+ }
+ e.recalc = 0;
atomic_write_ept_entry(&epte[i], e);
}
}
int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
level * EPT_TABLE_ORDER, &ipat,
e.sa_p2mt == p2m_mmio_direct);
+ bool_t recalc = e.recalc;
+
+ if ( recalc && p2m_is_changeable(e.sa_p2mt) )
+ {
+ unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
+
+ switch ( p2m_is_logdirty_range(p2m, gfn & mask,
+ gfn | ~mask) )
+ {
+ case 0:
+ e.sa_p2mt = p2m_ram_rw;
+ e.recalc = 0;
+ break;
+ case 1:
+ e.sa_p2mt = p2m_ram_logdirty;
+ e.recalc = 0;
+ break;
+ default: /* Force split. */
+ emt = -1;
+ break;
+ }
+ }
if ( unlikely(emt < 0) )
{
if ( ept_split_super_page(p2m, &e, level, level - 1) )
continue;
}
ept_free_entry(p2m, &e, level);
- okay = 0;
+ rc = -ENOMEM;
break;
}
e.emt = emt;
e.ipat = ipat;
+ e.recalc = 0;
+ if ( recalc && p2m_is_changeable(e.sa_p2mt) )
+ ept_p2m_type_to_flags(&e, e.sa_p2mt, e.access);
atomic_write_ept_entry(&epte[i], e);
}
- okay = 1;
+ rc = 1;
break;
}
if ( e.emt == MTRR_NUM_TYPES )
{
ASSERT(is_epte_present(&e));
- ept_invalidate_emt(_mfn(e.mfn));
+ ept_invalidate_emt(_mfn(e.mfn), e.recalc);
smp_wmb();
e.emt = 0;
+ e.recalc = 0;
atomic_write_ept_entry(&epte[i], e);
unmap_domain_page(epte);
- okay = 1;
+ rc = 1;
}
else if ( is_epte_present(&e) && !e.emt )
unmap_domain_page(epte);
}
unmap_domain_page(epte);
- if ( okay > 0 )
+ if ( rc )
{
struct vcpu *v;
- for_each_vcpu ( curr->domain, v )
+ for_each_vcpu ( p2m->domain, v )
v->arch.hvm_vmx.ept_spurious_misconfig = 1;
}
+
+ return rc;
+}
+
+bool_t ept_handle_misconfig(uint64_t gpa)
+{
+ struct vcpu *curr = current;
+ struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
+ bool_t spurious;
+ int rc;
+
+ p2m_lock(p2m);
+
+ spurious = curr->arch.hvm_vmx.ept_spurious_misconfig;
+ rc = resolve_misconfig(p2m, PFN_DOWN(gpa));
curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
ept_sync_domain(p2m);
+
p2m_unlock(p2m);
- return !!okay;
+ return spurious ? (rc >= 0) : (rc > 0);
}
/*
ept_entry_t *table, *ept_entry = NULL;
unsigned long gfn_remainder = gfn;
int i, target = order / EPT_TABLE_ORDER;
- int rc = 0;
- int ret = 0;
+ int ret, rc = 0;
bool_t direct_mmio = (p2mt == p2m_mmio_direct);
uint8_t ipat = 0;
int need_modify_vtd_table = 1;
int vtd_pte_present = 0;
- int needs_sync = 1;
+ enum { sync_off, sync_on, sync_check } needs_sync = sync_check;
ept_entry_t old_entry = { .epte = 0 };
ept_entry_t new_entry = { .epte = 0 };
struct ept_data *ept = &p2m->ept;
(order % EPT_TABLE_ORDER) )
return -EINVAL;
+ /* Carry out any eventually pending earlier changes first. */
+ ret = resolve_misconfig(p2m, gfn);
+ if ( ret < 0 )
+ {
+ ept_sync_domain(p2m);
+ return ret;
+ }
+ if ( ret > 0 )
+ needs_sync = sync_on;
+
ASSERT((target == 2 && hvm_hap_has_1gb()) ||
(target == 1 && hvm_hap_has_2mb()) ||
(target == 0));
table = map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
+ ret = GUEST_TABLE_MAP_FAILED;
for ( i = ept_get_wl(ept); i > target; i-- )
{
ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i);
/* We reached the target level. */
/* No need to flush if the old entry wasn't valid */
- if ( !is_epte_present(ept_entry) )
- needs_sync = 0;
+ if ( needs_sync == sync_check && !is_epte_present(ept_entry) )
+ needs_sync = sync_off;
/* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
* the intermediate tables will be freed below after the ept flush
out:
unmap_domain_page(table);
- if ( needs_sync )
+ if ( needs_sync != sync_off )
ept_sync_domain(p2m);
/* For non-nested p2m, may need to change VT-d page table.*/
u32 index;
int i;
int ret = 0;
+ bool_t recalc = 0;
mfn_t mfn = _mfn(INVALID_MFN);
struct ept_data *ept = &p2m->ept;
for ( i = ept_get_wl(ept); i > 0; i-- )
{
retry:
+ if ( table[gfn_remainder >> (i * EPT_TABLE_ORDER)].recalc )
+ recalc = 1;
ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
if ( !ret )
goto out;
if ( is_epte_valid(ept_entry) )
{
- *t = ept_entry->sa_p2mt;
+ if ( (recalc || ept_entry->recalc) &&
+ p2m_is_changeable(ept_entry->sa_p2mt) )
+ *t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
+ : p2m_ram_rw;
+ else
+ *t = ept_entry->sa_p2mt;
*a = ept_entry->access;
mfn = _mfn(ept_entry->mfn);
return;
}
-/*
- * Walk the whole p2m table, changing any entries of the old type
- * to the new type. This is used in hardware-assisted paging to
- * quickly enable or diable log-dirty tracking
- */
-static void ept_change_entry_type_page(mfn_t ept_page_mfn, int ept_page_level,
- p2m_type_t ot, p2m_type_t nt)
-{
- ept_entry_t e, *epte = map_domain_page(mfn_x(ept_page_mfn));
-
- for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
- {
- if ( !is_epte_valid(epte + i) )
- continue;
-
- if ( (ept_page_level > 0) && !is_epte_superpage(epte + i) )
- ept_change_entry_type_page(_mfn(epte[i].mfn),
- ept_page_level - 1, ot, nt);
- else
- {
- e = atomic_read_ept_entry(&epte[i]);
- if ( e.sa_p2mt != ot )
- continue;
-
- e.sa_p2mt = nt;
- ept_p2m_type_to_flags(&e, nt, e.access);
- atomic_write_ept_entry(&epte[i], e);
- }
- }
-
- unmap_domain_page(epte);
-}
-
static void ept_change_entry_type_global(struct p2m_domain *p2m,
p2m_type_t ot, p2m_type_t nt)
{
- struct ept_data *ept = &p2m->ept;
- if ( ept_get_asr(ept) == 0 )
- return;
+ unsigned long mfn = ept_get_asr(&p2m->ept);
- BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
- BUG_ON(p2m_is_mmio(ot) || p2m_is_mmio(nt));
+ if ( !mfn || ot == nt )
+ return;
- ept_change_entry_type_page(_mfn(ept_get_asr(ept)),
- ept_get_wl(ept), ot, nt);
+ BUG_ON(!p2m_is_changeable(ot) || !p2m_is_changeable(nt));
- ept_sync_domain(p2m);
+ if ( ept_invalidate_emt(_mfn(mfn), 1) )
+ ept_sync_domain(p2m);
}
static void ept_memory_type_changed(struct p2m_domain *p2m)
if ( !mfn )
return;
- if ( ept_invalidate_emt(_mfn(mfn)) )
+ if ( ept_invalidate_emt(_mfn(mfn), 0) )
ept_sync_domain(p2m);
}
if ( p2m )
{
- d->arch.p2m = p2m;
- return 0;
+ p2m->logdirty_ranges = rangeset_new(d, "log-dirty",
+ RANGESETF_prettyprint_hex);
+ if ( p2m->logdirty_ranges )
+ {
+ d->arch.p2m = p2m;
+ return 0;
+ }
+ p2m_free_one(p2m);
}
return -ENOMEM;
}
if ( p2m )
{
+ rangeset_destroy(p2m->logdirty_ranges);
p2m_free_one(p2m);
d->arch.p2m = NULL;
}
return rc;
}
+int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
+ unsigned long end)
+{
+ ASSERT(!p2m_is_nestedp2m(p2m));
+ if ( p2m->global_logdirty ||
+ rangeset_contains_range(p2m->logdirty_ranges, start, end) )
+ return 1;
+ if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
+ return -1;
+ return 0;
+}
+
void p2m_change_entry_type_global(struct domain *d,
p2m_type_t ot, p2m_type_t nt)
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
p2m_lock(p2m);
p2m->change_entry_type_global(p2m, ot, nt);
+ p2m->global_logdirty = (nt == p2m_ram_logdirty);
p2m_unlock(p2m);
}
unsigned long gfn;
mfn_t mfn;
struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ int rc = 0;
BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, &order);
while ( order > PAGE_ORDER_4K )
{
- if ( pt != ot )
- break;
- if ( !(gfn & ((1UL << order) - 1)) &&
- end > (gfn | ((1UL << order) - 1)) )
- break;
+ unsigned long mask = ~0UL << order;
+
+ /*
+ * Log-dirty ranges starting/ending in the middle of a super page
+ * (with a page split still pending) can't have a consistent type
+ * reported for the full range and hence need the split to be
+ * enforced here.
+ */
+ if ( !p2m_is_changeable(pt) ||
+ p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 )
+ {
+ if ( pt != ot )
+ break;
+ if ( !(gfn & ~mask) && end > (gfn | ~mask) )
+ break;
+ }
if ( order == PAGE_ORDER_1G )
order = PAGE_ORDER_2M;
else
break;
}
+ switch ( nt )
+ {
+ case p2m_ram_rw:
+ if ( ot == p2m_ram_logdirty )
+ rc = rangeset_remove_range(p2m->logdirty_ranges, start, end - 1);
+ break;
+ case p2m_ram_logdirty:
+ if ( ot == p2m_ram_rw )
+ rc = rangeset_add_range(p2m->logdirty_ranges, start, end - 1);
+ break;
+ default:
+ break;
+ }
+ if ( rc )
+ {
+ printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
+ rc, d->domain_id);
+ domain_crash(d);
+ }
+
p2m->defer_nested_flush = 0;
if ( nestedhvm_enabled(d) )
p2m_flush_nestedp2m(d);