xen: arm: force guest memory accesses to cacheable when MMU is disabled
authorIan Campbell <ian.campbell@citrix.com>
Wed, 8 Jan 2014 14:09:01 +0000 (14:09 +0000)
committerIan Campbell <ian.campbell@citrix.com>
Fri, 10 Jan 2014 16:56:37 +0000 (16:56 +0000)
On ARM guest OSes are started with MMU and Caches disables (as they are on
native) however caching is enabled in the domain running the builder and
therefore we must ensure cache consistency.

The existing solution to this problem (a0035ecc0d82 "tools: libxc: flush data
cache after loading images into guest memory") is to flush the caches after
loading the various blobs into guest RAM. However this approach has two short
comings:

 - The cache flush primitives available to userspace on arm32 are not
   sufficient for our needs.
 - There is a race between the cache flush and the unmap of the guest page
   where the processor might speculatively dirty the cache line again.

(of these the second is the more fundamental)

This patch makes use of the the hardware functionality to force all accesses
made from guest mode to be cached (the HCR.DC == default cached bit). This
means that we don't need to worry about the domain builder's writes being
cached because the guests "uncached" accesses will actually be cached.

Unfortunately the use of HCR.DC is incompatible with the guest enabling its
MMU (SCTLR.M bit). Therefore we must trap accesses to the SCTLR so that we can
detect when this happens and disable HCR.DC. This is done with the HCR.TVM
(trap virtual memory controls) bit which also causes various other registers
to be trapped, all of which can be passed straight through to the underlying
register. Once the guest has enabled its MMU we no longer need to trap so
there is no ongoing overhead. In my tests Linux makes about half a dozen
accesses to these registers before the MMU is enabled, I would expect other
OSes to behave similarly (the sequence of writes needed to setup the MMU is
pretty obvious).

Apart from this unfortunate need to trap these accesses this approach is
incompatible with guests which attempt to do DMA operations with their MMU
disabled. In practice this means guests with passthrough which we do not yet
support. Since a typical guest (including dom0) does not access devices which
require DMA until after it is fully up and running with paging enabled the
main risk is to in-guest firmware which does DMA i.e. running EFI in a guest,
with a disk passed through and booting from that disk. Since we know that dom0
is not using any such firmware and we do not support device passthrough to
guests yet we can live with this restriction. Once passthrough is implemented
this will need to be revisited.

The patch includes a couple of seemingly unrelated but necessary changes:

 - HSR_SYSREG_CRN_MASK was incorrectly defined, which happened to be benign
   with the existing set of system register we handled, but broke with the new
   ones introduced here.
 - The defines used to decode the HSR system register fields were named the
   same as the register. This breaks the accessor macros. This had gone
   unnoticed because the handling of the existing trapped registers did not
   require accessing the underlying hardware register. Rename those constants
   with an HSR_SYSREG prefix (in line with HSR_CP32/64 for 32-bit registers).

This patch has survived thousands of boot loops on a Midway system.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Julien Grall <julien.grall@linaro.org>
xen/arch/arm/domain.c
xen/arch/arm/traps.c
xen/arch/arm/vtimer.c
xen/include/asm-arm/cpregs.h
xen/include/asm-arm/domain.h
xen/include/asm-arm/processor.h
xen/include/asm-arm/sysregs.h

index 124cccf292841d203bae281d8eef937b2a1c41e6..635a9a4602840e7a8d4cfb5ba9b583ae49b2ecf6 100644 (file)
@@ -19,6 +19,7 @@
 #include <xen/errno.h>
 #include <xen/bitops.h>
 #include <xen/grant_table.h>
+#include <xen/stdbool.h>
 
 #include <asm/current.h>
 #include <asm/event.h>
@@ -219,6 +220,11 @@ static void ctxt_switch_to(struct vcpu *n)
     else
         hcr |= HCR_RW;
 
+    if ( n->arch.default_cache )
+        hcr |= (HCR_TVM|HCR_DC);
+    else
+        hcr &= ~(HCR_TVM|HCR_DC);
+
     WRITE_SYSREG(hcr, HCR_EL2);
     isb();
 
@@ -469,6 +475,7 @@ int vcpu_initialise(struct vcpu *v)
         return rc;
 
     v->arch.sctlr = SCTLR_GUEST_INIT;
+    v->arch.default_cache = true;
 
     /*
      * By default exposes an SMP system with AFF0 set to the VCPU ID
index 7c5ab199a633b61b9e7c3e6202ca445766703c15..48a6fcc800517436e81926a455f9584e3bc41907 100644 (file)
 #include <xen/hypercall.h>
 #include <xen/softirq.h>
 #include <xen/domain_page.h>
+#include <xen/stdbool.h>
 #include <public/sched.h>
 #include <public/xen.h>
 #include <asm/event.h>
 #include <asm/regs.h>
 #include <asm/cpregs.h>
 #include <asm/psci.h>
+#include <asm/flushtlb.h>
 
 #include "decode.h"
 #include "io.h"
@@ -1279,6 +1281,29 @@ static void advance_pc(struct cpu_user_regs *regs, union hsr hsr)
     regs->pc += hsr.len ? 4 : 2;
 }
 
+static void update_sctlr(struct vcpu *v, uint32_t val)
+{
+    /*
+     * If MMU (SCTLR_M) is now enabled then we must disable HCR.DC
+     * because they are incompatible.
+     *
+     * Once HCR.DC is disabled then we do not need HCR_TVM either,
+     * since it's only purpose was to catch the MMU being enabled.
+     *
+     * Both are set appropriately on context switch but we need to
+     * clear them now since we may not context switch on return to
+     * guest.
+     */
+    if ( val & SCTLR_M )
+    {
+        WRITE_SYSREG(READ_SYSREG(HCR_EL2) & ~(HCR_DC|HCR_TVM), HCR_EL2);
+        /* ARM ARM 0406C.b B3.2.1: Disabling HCR.DC without changing
+         * VMID requires us to flush the TLB for that VMID. */
+        flush_tlb();
+        v->arch.default_cache = false;
+    }
+}
+
 static void do_cp15_32(struct cpu_user_regs *regs,
                        union hsr hsr)
 {
@@ -1338,6 +1363,89 @@ static void do_cp15_32(struct cpu_user_regs *regs,
         if ( cp32.read )
            *r = v->arch.actlr;
         break;
+
+/* Passthru a 32-bit AArch32 register which is also 32-bit under AArch64 */
+#define CP32_PASSTHRU32(R...) do {              \
+    if ( cp32.read )                            \
+        *r = READ_SYSREG32(R);                  \
+    else                                        \
+        WRITE_SYSREG32(*r, R);                  \
+} while(0)
+
+/*
+ * Passthru a 32-bit AArch32 register which is 64-bit under AArch64.
+ * Updates the lower 32-bits and clears the upper bits.
+ */
+#define CP32_PASSTHRU64(R...) do {              \
+    if ( cp32.read )                            \
+        *r = (uint32_t)READ_SYSREG64(R);        \
+    else                                        \
+        WRITE_SYSREG64((uint64_t)*r, R);        \
+} while(0)
+
+/*
+ * Passthru a 32-bit AArch32 register which is 64-bit under AArch64.
+ * Updates either the HI ([63:32]) or LO ([31:0]) 32-bits preserving
+ * the other half.
+ */
+#ifdef CONFIG_ARM_64
+#define CP32_PASSTHRU64_HI(R...) do {                   \
+    if ( cp32.read )                                    \
+        *r = (uint32_t)(READ_SYSREG64(R) >> 32);        \
+    else                                                \
+    {                                                   \
+        uint64_t t = READ_SYSREG64(R) & 0xffffffffUL;   \
+        t |= ((uint64_t)(*r)) << 32;                    \
+        WRITE_SYSREG64(t, R);                           \
+    }                                                   \
+} while(0)
+#define CP32_PASSTHRU64_LO(R...) do {                           \
+    if ( cp32.read )                                            \
+        *r = (uint32_t)(READ_SYSREG64(R) & 0xffffffff);         \
+    else                                                        \
+    {                                                           \
+        uint64_t t = READ_SYSREG64(R) & 0xffffffff00000000UL;   \
+        t |= *r;                                                \
+        WRITE_SYSREG64(t, R);                                   \
+    }                                                           \
+} while(0)
+#endif
+
+    /* HCR.TVM */
+    case HSR_CPREG32(SCTLR):
+        CP32_PASSTHRU32(SCTLR_EL1);
+        update_sctlr(v, *r);
+        break;
+    case HSR_CPREG32(TTBR0_32):   CP32_PASSTHRU64(TTBR0_EL1);      break;
+    case HSR_CPREG32(TTBR1_32):   CP32_PASSTHRU64(TTBR1_EL1);      break;
+    case HSR_CPREG32(TTBCR):      CP32_PASSTHRU32(TCR_EL1);        break;
+    case HSR_CPREG32(DACR):       CP32_PASSTHRU32(DACR32_EL2);     break;
+    case HSR_CPREG32(DFSR):       CP32_PASSTHRU32(ESR_EL1);        break;
+    case HSR_CPREG32(IFSR):       CP32_PASSTHRU32(IFSR32_EL2);     break;
+    case HSR_CPREG32(ADFSR):      CP32_PASSTHRU32(AFSR0_EL1);      break;
+    case HSR_CPREG32(AIFSR):      CP32_PASSTHRU32(AFSR1_EL1);      break;
+    case HSR_CPREG32(CONTEXTIDR): CP32_PASSTHRU32(CONTEXTIDR_EL1); break;
+
+#ifdef CONFIG_ARM_64
+    case HSR_CPREG32(DFAR):       CP32_PASSTHRU64_LO(FAR_EL1);     break;
+    case HSR_CPREG32(IFAR):       CP32_PASSTHRU64_HI(FAR_EL1);     break;
+    case HSR_CPREG32(MAIR0):      CP32_PASSTHRU64_LO(MAIR_EL1);    break;
+    case HSR_CPREG32(MAIR1):      CP32_PASSTHRU64_HI(MAIR_EL1);    break;
+    case HSR_CPREG32(AMAIR0):     CP32_PASSTHRU64_LO(AMAIR_EL1);   break;
+    case HSR_CPREG32(AMAIR1):     CP32_PASSTHRU64_HI(AMAIR_EL1);   break;
+#else
+    case HSR_CPREG32(DFAR):       CP32_PASSTHRU32(DFAR);           break;
+    case HSR_CPREG32(IFAR):       CP32_PASSTHRU32(IFAR);           break;
+    case HSR_CPREG32(MAIR0):      CP32_PASSTHRU32(MAIR0);          break;
+    case HSR_CPREG32(MAIR1):      CP32_PASSTHRU32(MAIR1);          break;
+    case HSR_CPREG32(AMAIR0):     CP32_PASSTHRU32(AMAIR0);         break;
+    case HSR_CPREG32(AMAIR1):     CP32_PASSTHRU32(AMAIR1);         break;
+#endif
+
+#undef CP32_PASSTHRU32
+#undef CP32_PASSTHRU64
+#undef CP32_PASSTHRU64_LO
+#undef CP32_PASSTHRU64_HI
     default:
         printk("%s p15, %d, r%d, cr%d, cr%d, %d @ 0x%"PRIregister"\n",
                cp32.read ? "mrc" : "mcr",
@@ -1351,6 +1459,9 @@ static void do_cp15_64(struct cpu_user_regs *regs,
                        union hsr hsr)
 {
     struct hsr_cp64 cp64 = hsr.cp64;
+    uint32_t *r1 = (uint32_t *)select_user_reg(regs, cp64.reg1);
+    uint32_t *r2 = (uint32_t *)select_user_reg(regs, cp64.reg2);
+    uint64_t r;
 
     if ( !check_conditional_instr(regs, hsr) )
     {
@@ -1368,6 +1479,26 @@ static void do_cp15_64(struct cpu_user_regs *regs,
             domain_crash_synchronous();
         }
         break;
+
+#define CP64_PASSTHRU(R...) do {                                  \
+    if ( cp64.read )                                            \
+    {                                                           \
+        r = READ_SYSREG64(R);                                   \
+        *r1 = r & 0xffffffffUL;                                 \
+        *r2 = r >> 32;                                          \
+    }                                                           \
+    else                                                        \
+    {                                                           \
+        r = (*r1) | (((uint64_t)(*r2))<<32);                    \
+        WRITE_SYSREG64(r, R);                                   \
+    }                                                           \
+} while(0)
+
+    case HSR_CPREG64(TTBR0): CP64_PASSTHRU(TTBR0_EL1); break;
+    case HSR_CPREG64(TTBR1): CP64_PASSTHRU(TTBR1_EL1); break;
+
+#undef CP64_PASSTHRU
+
     default:
         printk("%s p15, %d, r%d, r%d, cr%d @ 0x%"PRIregister"\n",
                cp64.read ? "mrrc" : "mcrr",
@@ -1382,11 +1513,13 @@ static void do_sysreg(struct cpu_user_regs *regs,
                       union hsr hsr)
 {
     struct hsr_sysreg sysreg = hsr.sysreg;
+    register_t *x = select_user_reg(regs, sysreg.reg);
+    struct vcpu *v = current;
 
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
-    case CNTP_CTL_EL0:
-    case CNTP_TVAL_EL0:
+    case HSR_SYSREG_CNTP_CTL_EL0:
+    case HSR_SYSREG_CNTP_TVAL_EL0:
         if ( !vtimer_emulate(regs, hsr) )
         {
             dprintk(XENLOG_ERR,
@@ -1394,6 +1527,31 @@ static void do_sysreg(struct cpu_user_regs *regs,
             domain_crash_synchronous();
         }
         break;
+
+#define SYSREG_PASSTHRU(R...) do {              \
+    if ( sysreg.read )                          \
+        *x = READ_SYSREG(R);                    \
+    else                                        \
+        WRITE_SYSREG(*x, R);                    \
+} while(0)
+
+    case HSR_SYSREG_SCTLR_EL1:
+        SYSREG_PASSTHRU(SCTLR_EL1);
+        update_sctlr(v, *x);
+        break;
+    case HSR_SYSREG_TTBR0_EL1:      SYSREG_PASSTHRU(TTBR0_EL1);      break;
+    case HSR_SYSREG_TTBR1_EL1:      SYSREG_PASSTHRU(TTBR1_EL1);      break;
+    case HSR_SYSREG_TCR_EL1:        SYSREG_PASSTHRU(TCR_EL1);        break;
+    case HSR_SYSREG_ESR_EL1:        SYSREG_PASSTHRU(ESR_EL1);        break;
+    case HSR_SYSREG_FAR_EL1:        SYSREG_PASSTHRU(FAR_EL1);        break;
+    case HSR_SYSREG_AFSR0_EL1:      SYSREG_PASSTHRU(AFSR0_EL1);      break;
+    case HSR_SYSREG_AFSR1_EL1:      SYSREG_PASSTHRU(AFSR1_EL1);      break;
+    case HSR_SYSREG_MAIR_EL1:       SYSREG_PASSTHRU(MAIR_EL1);       break;
+    case HSR_SYSREG_AMAIR_EL1:      SYSREG_PASSTHRU(AMAIR_EL1);      break;
+    case HSR_SYSREG_CONTEXTIDR_EL1: SYSREG_PASSTHRU(CONTEXTIDR_EL1); break;
+
+#undef SYSREG_PASSTHRU
+
     default:
         printk("%s %d, %d, c%d, c%d, %d %s x%d @ 0x%"PRIregister"\n",
                sysreg.read ? "mrs" : "msr",
@@ -1466,7 +1624,6 @@ done:
     if (first) unmap_domain_page(first);
 }
 
-
 static void do_trap_instr_abort_guest(struct cpu_user_regs *regs,
                                       union hsr hsr)
 {
index 433ad5578e6733a8bfb072807bbb78b44ed638bf..e325f788c1bf7d2464a9df1515d615ce308e0a6f 100644 (file)
@@ -240,18 +240,18 @@ static int vtimer_emulate_sysreg(struct cpu_user_regs *regs, union hsr hsr)
 
     switch ( hsr.bits & HSR_SYSREG_REGS_MASK )
     {
-    case CNTP_CTL_EL0:
+    case HSR_SYSREG_CNTP_CTL_EL0:
         vtimer_cntp_ctl(regs, &r, sysreg.read);
         if ( sysreg.read )
             *x = r;
         return 1;
-    case CNTP_TVAL_EL0:
+    case HSR_SYSREG_CNTP_TVAL_EL0:
         vtimer_cntp_tval(regs, &r, sysreg.read);
         if ( sysreg.read )
             *x = r;
         return 1;
 
-    case HSR_CPREG64(CNTPCT):
+    case HSR_SYSREG_CNTPCT_EL0:
         return vtimer_cntpct(regs, x, sysreg.read);
 
     default:
index f0f1d53b67dd31fed9e083d17b29621d1da74f1d..508467a7b74f55f232fa3ebb1068348a3518f0d3 100644 (file)
 #define TTBR0           p15,0,c2        /* Translation Table Base Reg. 0 */
 #define TTBR1           p15,1,c2        /* Translation Table Base Reg. 1 */
 #define HTTBR           p15,4,c2        /* Hyp. Translation Table Base Register */
+#define TTBR0_32        p15,0,c2,c0,0   /* 32-bit access to TTBR0 */
+#define TTBR1_32        p15,0,c2,c0,1   /* 32-bit access to TTBR1 */
 #define HTCR            p15,4,c2,c0,2   /* Hyp. Translation Control Register */
 #define VTCR            p15,4,c2,c1,2   /* Virtualization Translation Control Register */
 #define VTTBR           p15,6,c2        /* Virtualization Translation Table Base Register */
 #define CPACR_EL1               CPACR
 #define CSSELR_EL1              CSSELR
 #define DACR32_EL2              DACR
+#define ESR_EL1                 DFSR
 #define ESR_EL2                 HSR
+#define FAR_EL1                 HIFAR
 #define FAR_EL2                 HIFAR
 #define HCR_EL2                 HCR
 #define HPFAR_EL2               HPFAR
index bc20a15ea7c5bbc7006c902ef370ab117ccc6d0b..af8c64b3d8e2e655b28d962f4dd719119f805d8f 100644 (file)
@@ -257,6 +257,8 @@ struct arch_vcpu
     uint64_t event_mask;
     uint64_t lr_mask;
 
+    bool_t default_cache;
+
     struct {
         /*
          * SGIs and PPIs are per-VCPU, SPIs are domain global and in
index dfe807dc576a741d96edc65c66dfe9ef27f98b96..06e638f6f5f02400571b05b0700ebc019aa76fe6 100644 (file)
@@ -342,7 +342,7 @@ union hsr {
 #define HSR_SYSREG_OP0_SHIFT (20)
 #define HSR_SYSREG_OP1_MASK (0x0001c000)
 #define HSR_SYSREG_OP1_SHIFT (14)
-#define HSR_SYSREG_CRN_MASK (0x00003800)
+#define HSR_SYSREG_CRN_MASK (0x00003c00)
 #define HSR_SYSREG_CRN_SHIFT (10)
 #define HSR_SYSREG_CRM_MASK (0x0000001e)
 #define HSR_SYSREG_CRM_SHIFT (1)
index 48ad07e39be9643f5a1307b851d2aba3c175ad32..0cee0e90c6457797ff4392e6128971903c8c8a8f 100644 (file)
     ((__HSR_SYSREG_##crm) << HSR_SYSREG_CRM_SHIFT) | \
     ((__HSR_SYSREG_##op2) << HSR_SYSREG_OP2_SHIFT)
 
-#define CNTP_CTL_EL0  HSR_SYSREG(3,3,c14,c2,1)
-#define CNTP_TVAL_EL0 HSR_SYSREG(3,3,c14,c2,0)
+#define HSR_SYSREG_SCTLR_EL1      HSR_SYSREG(3,0,c1, c0,0)
+#define HSR_SYSREG_TTBR0_EL1      HSR_SYSREG(3,0,c2, c0,0)
+#define HSR_SYSREG_TTBR1_EL1      HSR_SYSREG(3,0,c2, c0,1)
+#define HSR_SYSREG_TCR_EL1        HSR_SYSREG(3,0,c2, c0,2)
+#define HSR_SYSREG_AFSR0_EL1      HSR_SYSREG(3,0,c5, c1,0)
+#define HSR_SYSREG_AFSR1_EL1      HSR_SYSREG(3,0,c5, c1,1)
+#define HSR_SYSREG_ESR_EL1        HSR_SYSREG(3,0,c5, c2,0)
+#define HSR_SYSREG_FAR_EL1        HSR_SYSREG(3,0,c6, c0,0)
+#define HSR_SYSREG_MAIR_EL1       HSR_SYSREG(3,0,c10,c2,0)
+#define HSR_SYSREG_AMAIR_EL1      HSR_SYSREG(3,0,c10,c3,0)
+#define HSR_SYSREG_CONTEXTIDR_EL1 HSR_SYSREG(3,0,c13,c0,1)
+
+#define HSR_SYSREG_CNTPCT_EL0     HSR_SYSREG(3,3,c14,c0,0)
+#define HSR_SYSREG_CNTP_CTL_EL0   HSR_SYSREG(3,3,c14,c2,1)
+#define HSR_SYSREG_CNTP_TVAL_EL0  HSR_SYSREG(3,3,c14,c2,0)
+
+
 #endif
 
 #endif